1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 1990 Mentat Inc.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/stream.h>
  28 #include <sys/dlpi.h>
  29 #include <sys/stropts.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/strsun.h>
  32 #include <sys/strlog.h>
  33 #include <sys/strsubr.h>
  34 #define _SUN_TPI_VERSION        2
  35 #include <sys/tihdr.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/cmn_err.h>
  39 #include <sys/debug.h>
  40 #include <sys/sdt.h>
  41 #include <sys/kobj.h>
  42 #include <sys/zone.h>
  43 #include <sys/neti.h>
  44 #include <sys/hook.h>
  45 
  46 #include <sys/kmem.h>
  47 #include <sys/systm.h>
  48 #include <sys/param.h>
  49 #include <sys/socket.h>
  50 #include <sys/vtrace.h>
  51 #include <sys/isa_defs.h>
  52 #include <sys/atomic.h>
  53 #include <sys/policy.h>
  54 #include <sys/mac.h>
  55 #include <net/if.h>
  56 #include <net/if_types.h>
  57 #include <net/route.h>
  58 #include <net/if_dl.h>
  59 #include <sys/sockio.h>
  60 #include <netinet/in.h>
  61 #include <netinet/ip6.h>
  62 #include <netinet/icmp6.h>
  63 #include <netinet/sctp.h>
  64 
  65 #include <inet/common.h>
  66 #include <inet/mi.h>
  67 #include <inet/optcom.h>
  68 #include <inet/mib2.h>
  69 #include <inet/nd.h>
  70 #include <inet/arp.h>
  71 
  72 #include <inet/ip.h>
  73 #include <inet/ip_impl.h>
  74 #include <inet/ip6.h>
  75 #include <inet/ip6_asp.h>
  76 #include <inet/tcp.h>
  77 #include <inet/tcp_impl.h>
  78 #include <inet/udp_impl.h>
  79 #include <inet/ipp_common.h>
  80 
  81 #include <inet/ip_multi.h>
  82 #include <inet/ip_if.h>
  83 #include <inet/ip_ire.h>
  84 #include <inet/ip_rts.h>
  85 #include <inet/ip_ndp.h>
  86 #include <net/pfkeyv2.h>
  87 #include <inet/sadb.h>
  88 #include <inet/ipsec_impl.h>
  89 #include <inet/iptun/iptun_impl.h>
  90 #include <inet/sctp_ip.h>
  91 #include <sys/pattr.h>
  92 #include <inet/ipclassifier.h>
  93 #include <inet/ipsecah.h>
  94 #include <inet/rawip_impl.h>
  95 #include <inet/rts_impl.h>
  96 #include <sys/squeue_impl.h>
  97 #include <sys/squeue.h>
  98 
  99 #include <sys/tsol/label.h>
 100 #include <sys/tsol/tnet.h>
 101 
 102 /* Temporary; for CR 6451644 work-around */
 103 #include <sys/ethernet.h>
 104 
 105 /*
 106  * Naming conventions:
 107  *      These rules should be judiciously applied
 108  *      if there is a need to identify something as IPv6 versus IPv4
 109  *      IPv6 funcions will end with _v6 in the ip module.
 110  *      IPv6 funcions will end with _ipv6 in the transport modules.
 111  *      IPv6 macros:
 112  *              Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
 113  *              Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
 114  *              And then there are ..V4_PART_OF_V6.
 115  *              The intent is that macros in the ip module end with _V6.
 116  *      IPv6 global variables will start with ipv6_
 117  *      IPv6 structures will start with ipv6
 118  *      IPv6 defined constants should start with IPV6_
 119  *              (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
 120  */
 121 
 122 /*
 123  * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems).
 124  * We need to do this because we didn't obtain the IP6OPT_LS (0x0a)
 125  * from IANA. This mechanism will remain in effect until an official
 126  * number is obtained.
 127  */
 128 uchar_t ip6opt_ls;
 129 
 130 const in6_addr_t ipv6_all_ones = {
 131         {{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }}};
 132 const in6_addr_t ipv6_all_zeros = {{{ 0, 0, 0, 0 }}};
 133 
 134 #ifdef  _BIG_ENDIAN
 135 const in6_addr_t ipv6_unspecified_group = {{{ 0xff000000U, 0, 0, 0 }}};
 136 #else   /* _BIG_ENDIAN */
 137 const in6_addr_t ipv6_unspecified_group = {{{ 0x000000ffU, 0, 0, 0 }}};
 138 #endif  /* _BIG_ENDIAN */
 139 
 140 #ifdef  _BIG_ENDIAN
 141 const in6_addr_t ipv6_loopback = {{{ 0, 0, 0, 0x00000001U }}};
 142 #else  /* _BIG_ENDIAN */
 143 const in6_addr_t ipv6_loopback = {{{ 0, 0, 0, 0x01000000U }}};
 144 #endif /* _BIG_ENDIAN */
 145 
 146 #ifdef _BIG_ENDIAN
 147 const in6_addr_t ipv6_all_hosts_mcast = {{{ 0xff020000U, 0, 0, 0x00000001U }}};
 148 #else  /* _BIG_ENDIAN */
 149 const in6_addr_t ipv6_all_hosts_mcast = {{{ 0x000002ffU, 0, 0, 0x01000000U }}};
 150 #endif /* _BIG_ENDIAN */
 151 
 152 #ifdef _BIG_ENDIAN
 153 const in6_addr_t ipv6_all_rtrs_mcast = {{{ 0xff020000U, 0, 0, 0x00000002U }}};
 154 #else  /* _BIG_ENDIAN */
 155 const in6_addr_t ipv6_all_rtrs_mcast = {{{ 0x000002ffU, 0, 0, 0x02000000U }}};
 156 #endif /* _BIG_ENDIAN */
 157 
 158 #ifdef _BIG_ENDIAN
 159 const in6_addr_t ipv6_all_v2rtrs_mcast = {{{ 0xff020000U, 0, 0, 0x00000016U }}};
 160 #else  /* _BIG_ENDIAN */
 161 const in6_addr_t ipv6_all_v2rtrs_mcast = {{{ 0x000002ffU, 0, 0, 0x16000000U }}};
 162 #endif /* _BIG_ENDIAN */
 163 
 164 #ifdef _BIG_ENDIAN
 165 const in6_addr_t ipv6_solicited_node_mcast = {
 166                         {{ 0xff020000U, 0, 0x00000001U, 0xff000000U }}};
 167 #else  /* _BIG_ENDIAN */
 168 const in6_addr_t ipv6_solicited_node_mcast = {
 169                         {{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU }}};
 170 #endif /* _BIG_ENDIAN */
 171 
 172 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
 173 static void     icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
 174 static void     icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
 175     ip_recv_attr_t *);
 176 static void     icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
 177     ip_recv_attr_t *);
 178 static void     icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
 179     in6_addr_t *, ip_recv_attr_t *);
 180 static void     icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
 181     ip_recv_attr_t *);
 182 static boolean_t        ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
 183 
 184 /*
 185  * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
 186  * If the ICMP message is consumed by IP, i.e., it should not be delivered
 187  * to any IPPROTO_ICMP raw sockets, then it returns NULL.
 188  * Likewise, if the ICMP error is misformed (too short, etc), then it
 189  * returns NULL. The caller uses this to determine whether or not to send
 190  * to raw sockets.
 191  *
 192  * All error messages are passed to the matching transport stream.
 193  *
 194  * See comment for icmp_inbound_v4() on how IPsec is handled.
 195  */
 196 mblk_t *
 197 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
 198 {
 199         icmp6_t         *icmp6;
 200         ip6_t           *ip6h;          /* Outer header */
 201         int             ip_hdr_length;  /* Outer header length */
 202         boolean_t       interested;
 203         ill_t           *ill = ira->ira_ill;
 204         ip_stack_t      *ipst = ill->ill_ipst;
 205         mblk_t          *mp_ret = NULL;
 206 
 207         ip6h = (ip6_t *)mp->b_rptr;
 208 
 209         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
 210 
 211         /* Check for Martian packets  */
 212         if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
 213                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
 214                 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
 215                 freemsg(mp);
 216                 return (NULL);
 217         }
 218 
 219         /* Make sure ira_l2src is set for ndp_input */
 220         if (!(ira->ira_flags & IRAF_L2SRC_SET))
 221                 ip_setl2src(mp, ira, ira->ira_rill);
 222 
 223         ip_hdr_length = ira->ira_ip_hdr_length;
 224         if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
 225                 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
 226                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
 227                         ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
 228                         freemsg(mp);
 229                         return (NULL);
 230                 }
 231                 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
 232                 if (ip6h == NULL) {
 233                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
 234                         freemsg(mp);
 235                         return (NULL);
 236                 }
 237         }
 238 
 239         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 240         DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
 241         ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
 242             icmp6->icmp6_code));
 243 
 244         /*
 245          * We will set "interested" to "true" if we should pass a copy to
 246          * the transport i.e., if it is an error message.
 247          */
 248         interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
 249 
 250         switch (icmp6->icmp6_type) {
 251         case ICMP6_DST_UNREACH:
 252                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
 253                 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
 254                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
 255                 break;
 256 
 257         case ICMP6_TIME_EXCEEDED:
 258                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
 259                 break;
 260 
 261         case ICMP6_PARAM_PROB:
 262                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
 263                 break;
 264 
 265         case ICMP6_PACKET_TOO_BIG:
 266                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
 267                 break;
 268 
 269         case ICMP6_ECHO_REQUEST:
 270                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
 271                 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
 272                     !ipst->ips_ipv6_resp_echo_mcast)
 273                         break;
 274 
 275                 /*
 276                  * We must have exclusive use of the mblk to convert it to
 277                  * a response.
 278                  * If not, we copy it.
 279                  */
 280                 if (mp->b_datap->db_ref > 1) {
 281                         mblk_t  *mp1;
 282 
 283                         mp1 = copymsg(mp);
 284                         if (mp1 == NULL) {
 285                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 286                                 ip_drop_input("ipIfStatsInDiscards - copymsg",
 287                                     mp, ill);
 288                                 freemsg(mp);
 289                                 return (NULL);
 290                         }
 291                         freemsg(mp);
 292                         mp = mp1;
 293                         ip6h = (ip6_t *)mp->b_rptr;
 294                         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 295                 }
 296 
 297                 icmp6->icmp6_type = ICMP6_ECHO_REPLY;
 298                 icmp_send_reply_v6(mp, ip6h, icmp6, ira);
 299                 return (NULL);
 300 
 301         case ICMP6_ECHO_REPLY:
 302                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
 303                 break;
 304 
 305         case ND_ROUTER_SOLICIT:
 306                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
 307                 break;
 308 
 309         case ND_ROUTER_ADVERT:
 310                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
 311                 break;
 312 
 313         case ND_NEIGHBOR_SOLICIT:
 314                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
 315                 ndp_input(mp, ira);
 316                 return (NULL);
 317 
 318         case ND_NEIGHBOR_ADVERT:
 319                 BUMP_MIB(ill->ill_icmp6_mib,
 320                     ipv6IfIcmpInNeighborAdvertisements);
 321                 ndp_input(mp, ira);
 322                 return (NULL);
 323 
 324         case ND_REDIRECT:
 325                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
 326 
 327                 if (ipst->ips_ipv6_ignore_redirect)
 328                         break;
 329 
 330                 /* We now allow a RAW socket to receive this. */
 331                 interested = B_TRUE;
 332                 break;
 333 
 334         /*
 335          * The next three icmp messages will be handled by MLD.
 336          * Pass all valid MLD packets up to any process(es)
 337          * listening on a raw ICMP socket.
 338          */
 339         case MLD_LISTENER_QUERY:
 340         case MLD_LISTENER_REPORT:
 341         case MLD_LISTENER_REDUCTION:
 342                 mp = mld_input(mp, ira);
 343                 return (mp);
 344         default:
 345                 break;
 346         }
 347         /*
 348          * See if there is an ICMP client to avoid an extra copymsg/freemsg
 349          * if there isn't one.
 350          */
 351         if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
 352                 /* If there is an ICMP client and we want one too, copy it. */
 353 
 354                 if (!interested) {
 355                         /* Caller will deliver to RAW sockets */
 356                         return (mp);
 357                 }
 358                 mp_ret = copymsg(mp);
 359                 if (mp_ret == NULL) {
 360                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 361                         ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
 362                 }
 363         } else if (!interested) {
 364                 /* Neither we nor raw sockets are interested. Drop packet now */
 365                 freemsg(mp);
 366                 return (NULL);
 367         }
 368 
 369         /*
 370          * ICMP error or redirect packet. Make sure we have enough of
 371          * the header and that db_ref == 1 since we might end up modifying
 372          * the packet.
 373          */
 374         if (mp->b_cont != NULL) {
 375                 if (ip_pullup(mp, -1, ira) == NULL) {
 376                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 377                         ip_drop_input("ipIfStatsInDiscards - ip_pullup",
 378                             mp, ill);
 379                         freemsg(mp);
 380                         return (mp_ret);
 381                 }
 382         }
 383 
 384         if (mp->b_datap->db_ref > 1) {
 385                 mblk_t  *mp1;
 386 
 387                 mp1 = copymsg(mp);
 388                 if (mp1 == NULL) {
 389                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 390                         ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
 391                         freemsg(mp);
 392                         return (mp_ret);
 393                 }
 394                 freemsg(mp);
 395                 mp = mp1;
 396         }
 397 
 398         /*
 399          * In case mp has changed, verify the message before any further
 400          * processes.
 401          */
 402         ip6h = (ip6_t *)mp->b_rptr;
 403         icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
 404         if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 405                 freemsg(mp);
 406                 return (mp_ret);
 407         }
 408 
 409         switch (icmp6->icmp6_type) {
 410         case ND_REDIRECT:
 411                 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
 412                 break;
 413         case ICMP6_PACKET_TOO_BIG:
 414                 /* Update DCE and adjust MTU is icmp header if needed */
 415                 icmp_inbound_too_big_v6(icmp6, ira);
 416                 /* FALLTHRU */
 417         default:
 418                 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 419                 break;
 420         }
 421 
 422         return (mp_ret);
 423 }
 424 
 425 /*
 426  * Send an ICMP echo reply.
 427  * The caller has already updated the payload part of the packet.
 428  * We handle the ICMP checksum, IP source address selection and feed
 429  * the packet into ip_output_simple.
 430  */
 431 static void
 432 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
 433     ip_recv_attr_t *ira)
 434 {
 435         uint_t          ip_hdr_length = ira->ira_ip_hdr_length;
 436         ill_t           *ill = ira->ira_ill;
 437         ip_stack_t      *ipst = ill->ill_ipst;
 438         ip_xmit_attr_t  ixas;
 439         in6_addr_t      origsrc;
 440 
 441         /*
 442          * Remove any extension headers (do not reverse a source route)
 443          * and clear the flow id (keep traffic class for now).
 444          */
 445         if (ip_hdr_length != IPV6_HDR_LEN) {
 446                 int     i;
 447 
 448                 for (i = 0; i < IPV6_HDR_LEN; i++) {
 449                         mp->b_rptr[ip_hdr_length - i - 1] =
 450                             mp->b_rptr[IPV6_HDR_LEN - i - 1];
 451                 }
 452                 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
 453                 ip6h = (ip6_t *)mp->b_rptr;
 454                 ip6h->ip6_nxt = IPPROTO_ICMPV6;
 455                 i = ntohs(ip6h->ip6_plen);
 456                 i -= (ip_hdr_length - IPV6_HDR_LEN);
 457                 ip6h->ip6_plen = htons(i);
 458                 ip_hdr_length = IPV6_HDR_LEN;
 459                 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
 460         }
 461         ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
 462 
 463         /* Reverse the source and destination addresses. */
 464         origsrc = ip6h->ip6_src;
 465         ip6h->ip6_src = ip6h->ip6_dst;
 466         ip6h->ip6_dst = origsrc;
 467 
 468         /* set the hop limit */
 469         ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
 470 
 471         /*
 472          * Prepare for checksum by putting icmp length in the icmp
 473          * checksum field. The checksum is calculated in ip_output
 474          */
 475         icmp6->icmp6_cksum = ip6h->ip6_plen;
 476 
 477         bzero(&ixas, sizeof (ixas));
 478         ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
 479         ixas.ixa_zoneid = ira->ira_zoneid;
 480         ixas.ixa_cred = kcred;
 481         ixas.ixa_cpid = NOPID;
 482         ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
 483         ixas.ixa_ifindex = 0;
 484         ixas.ixa_ipst = ipst;
 485         ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
 486 
 487         if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
 488                 /*
 489                  * This packet should go out the same way as it
 490                  * came in i.e in clear, independent of the IPsec
 491                  * policy for transmitting packets.
 492                  */
 493                 ixas.ixa_flags |= IXAF_NO_IPSEC;
 494         } else {
 495                 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
 496                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 497                         /* Note: mp already consumed and ip_drop_packet done */
 498                         return;
 499                 }
 500         }
 501 
 502         /* Was the destination (now source) link-local? Send out same group */
 503         if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
 504                 ixas.ixa_flags |= IXAF_SCOPEID_SET;
 505                 if (IS_UNDER_IPMP(ill))
 506                         ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
 507                 else
 508                         ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
 509         }
 510 
 511         if (ira->ira_flags & IRAF_MULTIBROADCAST) {
 512                 /*
 513                  * Not one or our addresses (IRE_LOCALs), thus we let
 514                  * ip_output_simple pick the source.
 515                  */
 516                 ip6h->ip6_src = ipv6_all_zeros;
 517                 ixas.ixa_flags |= IXAF_SET_SOURCE;
 518         }
 519 
 520         /* Should we send using dce_pmtu? */
 521         if (ipst->ips_ipv6_icmp_return_pmtu)
 522                 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
 523 
 524         (void) ip_output_simple(mp, &ixas);
 525         ixa_cleanup(&ixas);
 526 
 527 }
 528 
 529 /*
 530  * Verify the ICMP messages for either for ICMP error or redirect packet.
 531  * The caller should have fully pulled up the message. If it's a redirect
 532  * packet, only basic checks on IP header will be done; otherwise, verify
 533  * the packet by looking at the included ULP header.
 534  *
 535  * Called before icmp_inbound_error_fanout_v6 is called.
 536  */
 537 static boolean_t
 538 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
 539 {
 540         ill_t           *ill = ira->ira_ill;
 541         uint16_t        hdr_length;
 542         uint8_t         *nexthdrp;
 543         uint8_t         nexthdr;
 544         ip_stack_t      *ipst = ill->ill_ipst;
 545         conn_t          *connp;
 546         ip6_t           *ip6h;  /* Inner header */
 547 
 548         ip6h = (ip6_t *)&icmp6[1];
 549         if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
 550                 goto truncated;
 551 
 552         if (icmp6->icmp6_type == ND_REDIRECT) {
 553                 hdr_length = sizeof (nd_redirect_t);
 554         } else {
 555                 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
 556                         goto discard_pkt;
 557                 hdr_length = IPV6_HDR_LEN;
 558         }
 559 
 560         if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
 561                 goto truncated;
 562 
 563         /*
 564          * Stop here for ICMP_REDIRECT.
 565          */
 566         if (icmp6->icmp6_type == ND_REDIRECT)
 567                 return (B_TRUE);
 568 
 569         /*
 570          * ICMP errors only.
 571          */
 572         if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
 573                 goto discard_pkt;
 574         nexthdr = *nexthdrp;
 575 
 576         /* Try to pass the ICMP message to clients who need it */
 577         switch (nexthdr) {
 578         case IPPROTO_UDP:
 579                 /*
 580                  * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 581                  * transport header.
 582                  */
 583                 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 584                     mp->b_wptr)
 585                         goto truncated;
 586                 break;
 587         case IPPROTO_TCP: {
 588                 tcpha_t         *tcpha;
 589 
 590                 /*
 591                  * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 592                  * transport header.
 593                  */
 594                 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 595                     mp->b_wptr)
 596                         goto truncated;
 597 
 598                 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
 599                 /*
 600                  * With IPMP we need to match across group, which we do
 601                  * since we have the upper ill from ira_ill.
 602                  */
 603                 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
 604                     ill->ill_phyint->phyint_ifindex, ipst);
 605                 if (connp == NULL)
 606                         goto discard_pkt;
 607 
 608                 if ((connp->conn_verifyicmp != NULL) &&
 609                     !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
 610                         CONN_DEC_REF(connp);
 611                         goto discard_pkt;
 612                 }
 613                 CONN_DEC_REF(connp);
 614                 break;
 615         }
 616         case IPPROTO_SCTP:
 617                 /*
 618                  * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
 619                  * transport header.
 620                  */
 621                 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
 622                     mp->b_wptr)
 623                         goto truncated;
 624                 break;
 625         case IPPROTO_ESP:
 626         case IPPROTO_AH:
 627                 break;
 628         case IPPROTO_ENCAP:
 629         case IPPROTO_IPV6: {
 630                 /* Look for self-encapsulated packets that caused an error */
 631                 ip6_t *in_ip6h;
 632 
 633                 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
 634                 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
 635                     sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
 636                         goto truncated;
 637                 break;
 638         }
 639         default:
 640                 break;
 641         }
 642 
 643         return (B_TRUE);
 644 
 645 discard_pkt:
 646         /* Bogus ICMP error. */
 647         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 648         return (B_FALSE);
 649 
 650 truncated:
 651         /* We pulled up everthing already. Must be truncated */
 652         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
 653         return (B_FALSE);
 654 }
 655 
 656 /*
 657  * Process received IPv6 ICMP Packet too big.
 658  * The caller is responsible for validating the packet before passing it in
 659  * and also to fanout the ICMP error to any matching transport conns. Assumes
 660  * the message has been fully pulled up.
 661  *
 662  * Before getting here, the caller has called icmp_inbound_verify_v6()
 663  * that should have verified with ULP to prevent undoing the changes we're
 664  * going to make to DCE. For example, TCP might have verified that the packet
 665  * which generated error is in the send window.
 666  *
 667  * In some cases modified this MTU in the ICMP header packet; the caller
 668  * should pass to the matching ULP after this returns.
 669  */
 670 static void
 671 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
 672 {
 673         uint32_t        mtu;
 674         dce_t           *dce;
 675         ill_t           *ill = ira->ira_ill; /* Upper ill if IPMP */
 676         ip_stack_t      *ipst = ill->ill_ipst;
 677         int             old_max_frag;
 678         in6_addr_t      final_dst;
 679         ip6_t           *ip6h;  /* Inner IP header */
 680 
 681         /* Caller has already pulled up everything. */
 682         ip6h = (ip6_t *)&icmp6[1];
 683         final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
 684 
 685         /*
 686          * For link local destinations matching simply on address is not
 687          * sufficient. Same link local addresses for different ILL's is
 688          * possible.
 689          */
 690         if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
 691                 dce = dce_lookup_and_add_v6(&final_dst,
 692                     ill->ill_phyint->phyint_ifindex, ipst);
 693         } else {
 694                 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
 695         }
 696         if (dce == NULL) {
 697                 /* Couldn't add a unique one - ENOMEM */
 698                 if (ip_debug > 2) {
 699                         /* ip1dbg */
 700                         pr_addr_dbg("icmp_inbound_too_big_v6:"
 701                             "no dce for dst %s\n", AF_INET6,
 702                             &final_dst);
 703                 }
 704                 return;
 705         }
 706 
 707         mtu = ntohl(icmp6->icmp6_mtu);
 708 
 709         mutex_enter(&dce->dce_lock);
 710         if (dce->dce_flags & DCEF_PMTU)
 711                 old_max_frag = dce->dce_pmtu;
 712         else if (IN6_IS_ADDR_MULTICAST(&final_dst))
 713                 old_max_frag = ill->ill_mc_mtu;
 714         else
 715                 old_max_frag = ill->ill_mtu;
 716 
 717         if (mtu < IPV6_MIN_MTU) {
 718                 ip1dbg(("Received mtu less than IPv6 "
 719                     "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
 720                 mtu = IPV6_MIN_MTU;
 721                 /*
 722                  * If an mtu less than IPv6 min mtu is received,
 723                  * we must include a fragment header in
 724                  * subsequent packets.
 725                  */
 726                 dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
 727         } else {
 728                 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
 729         }
 730         ip1dbg(("Received mtu from router: %d\n", mtu));
 731         dce->dce_pmtu = MIN(old_max_frag, mtu);
 732 
 733         /* Prepare to send the new max frag size for the ULP. */
 734         if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
 735                 /*
 736                  * If we need a fragment header in every packet
 737                  * (above case or multirouting), make sure the
 738                  * ULP takes it into account when computing the
 739                  * payload size.
 740                  */
 741                 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
 742         } else {
 743                 icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
 744         }
 745         /* We now have a PMTU for sure */
 746         dce->dce_flags |= DCEF_PMTU;
 747         dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
 748         mutex_exit(&dce->dce_lock);
 749         /*
 750          * After dropping the lock the new value is visible to everyone.
 751          * Then we bump the generation number so any cached values reinspect
 752          * the dce_t.
 753          */
 754         dce_increment_generation(dce);
 755         dce_refrele(dce);
 756 }
 757 
 758 /*
 759  * Fanout received ICMPv6 error packets to the transports.
 760  * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
 761  *
 762  * The caller must have called icmp_inbound_verify_v6.
 763  */
 764 void
 765 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
 766 {
 767         uint16_t        *up;    /* Pointer to ports in ULP header */
 768         uint32_t        ports;  /* reversed ports for fanout */
 769         ip6_t           rip6h;  /* With reversed addresses */
 770         ip6_t           *ip6h;  /* Inner IP header */
 771         uint16_t        hdr_length; /* Inner IP header length */
 772         uint8_t         *nexthdrp;
 773         uint8_t         nexthdr;
 774         tcpha_t         *tcpha;
 775         conn_t          *connp;
 776         ill_t           *ill = ira->ira_ill; /* Upper in the case of IPMP */
 777         ip_stack_t      *ipst = ill->ill_ipst;
 778         ipsec_stack_t   *ipss = ipst->ips_netstack->netstack_ipsec;
 779 
 780         /* Caller has already pulled up everything. */
 781         ip6h = (ip6_t *)&icmp6[1];
 782         ASSERT(mp->b_cont == NULL);
 783         ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
 784 
 785         if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
 786                 goto drop_pkt;
 787         nexthdr = *nexthdrp;
 788         ira->ira_protocol = nexthdr;
 789 
 790         /*
 791          * We need a separate IP header with the source and destination
 792          * addresses reversed to do fanout/classification because the ip6h in
 793          * the ICMPv6 error is in the form we sent it out.
 794          */
 795         rip6h.ip6_src = ip6h->ip6_dst;
 796         rip6h.ip6_dst = ip6h->ip6_src;
 797         rip6h.ip6_nxt = nexthdr;
 798 
 799         /* Try to pass the ICMP message to clients who need it */
 800         switch (nexthdr) {
 801         case IPPROTO_UDP: {
 802                 /* Attempt to find a client stream based on port. */
 803                 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
 804 
 805                 /* Note that we send error to all matches. */
 806                 ira->ira_flags |= IRAF_ICMP_ERROR;
 807                 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
 808                 ira->ira_flags &= ~IRAF_ICMP_ERROR;
 809                 return;
 810         }
 811         case IPPROTO_TCP: {
 812                 /*
 813                  * Attempt to find a client stream based on port.
 814                  * Note that we do a reverse lookup since the header is
 815                  * in the form we sent it out.
 816                  */
 817                 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
 818                 /*
 819                  * With IPMP we need to match across group, which we do
 820                  * since we have the upper ill from ira_ill.
 821                  */
 822                 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
 823                     TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
 824                 if (connp == NULL) {
 825                         goto drop_pkt;
 826                 }
 827 
 828                 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
 829                     (ira->ira_flags & IRAF_IPSEC_SECURE)) {
 830                         mp = ipsec_check_inbound_policy(mp, connp,
 831                             NULL, ip6h, ira);
 832                         if (mp == NULL) {
 833                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
 834                                 /* Note that mp is NULL */
 835                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
 836                                 CONN_DEC_REF(connp);
 837                                 return;
 838                         }
 839                 }
 840 
 841                 ira->ira_flags |= IRAF_ICMP_ERROR;
 842                 if (IPCL_IS_TCP(connp)) {
 843                         SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
 844                             connp->conn_recvicmp, connp, ira, SQ_FILL,
 845                             SQTAG_TCP6_INPUT_ICMP_ERR);
 846                 } else {
 847                         /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
 848                         ill_t *rill = ira->ira_rill;
 849 
 850                         ira->ira_ill = ira->ira_rill = NULL;
 851                         (connp->conn_recv)(connp, mp, NULL, ira);
 852                         CONN_DEC_REF(connp);
 853                         ira->ira_ill = ill;
 854                         ira->ira_rill = rill;
 855                 }
 856                 ira->ira_flags &= ~IRAF_ICMP_ERROR;
 857                 return;
 858 
 859         }
 860         case IPPROTO_SCTP:
 861                 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
 862                 /* Find a SCTP client stream for this packet. */
 863                 ((uint16_t *)&ports)[0] = up[1];
 864                 ((uint16_t *)&ports)[1] = up[0];
 865 
 866                 ira->ira_flags |= IRAF_ICMP_ERROR;
 867                 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
 868                 ira->ira_flags &= ~IRAF_ICMP_ERROR;
 869                 return;
 870 
 871         case IPPROTO_ESP:
 872         case IPPROTO_AH:
 873                 if (!ipsec_loaded(ipss)) {
 874                         ip_proto_not_sup(mp, ira);
 875                         return;
 876                 }
 877 
 878                 if (nexthdr == IPPROTO_ESP)
 879                         mp = ipsecesp_icmp_error(mp, ira);
 880                 else
 881                         mp = ipsecah_icmp_error(mp, ira);
 882                 if (mp == NULL)
 883                         return;
 884 
 885                 /* Just in case ipsec didn't preserve the NULL b_cont */
 886                 if (mp->b_cont != NULL) {
 887                         if (!pullupmsg(mp, -1))
 888                                 goto drop_pkt;
 889                 }
 890 
 891                 /*
 892                  * If succesful, the mp has been modified to not include
 893                  * the ESP/AH header so we can fanout to the ULP's icmp
 894                  * error handler.
 895                  */
 896                 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
 897                         goto drop_pkt;
 898 
 899                 ip6h = (ip6_t *)mp->b_rptr;
 900                 /* Don't call hdr_length_v6() unless you have to. */
 901                 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
 902                         hdr_length = ip_hdr_length_v6(mp, ip6h);
 903                 else
 904                         hdr_length = IPV6_HDR_LEN;
 905 
 906                 /* Verify the modified message before any further processes. */
 907                 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
 908                 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 909                         freemsg(mp);
 910                         return;
 911                 }
 912 
 913                 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 914                 return;
 915 
 916         case IPPROTO_IPV6: {
 917                 /* Look for self-encapsulated packets that caused an error */
 918                 ip6_t *in_ip6h;
 919 
 920                 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
 921 
 922                 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
 923                     IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
 924                         /*
 925                          * Self-encapsulated case. As in the ipv4 case,
 926                          * we need to strip the 2nd IP header. Since mp
 927                          * is already pulled-up, we can simply bcopy
 928                          * the 3rd header + data over the 2nd header.
 929                          */
 930                         uint16_t unused_len;
 931 
 932                         /*
 933                          * Make sure we don't do recursion more than once.
 934                          */
 935                         if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
 936                             &unused_len, &nexthdrp) ||
 937                             *nexthdrp == IPPROTO_IPV6) {
 938                                 goto drop_pkt;
 939                         }
 940 
 941                         /*
 942                          * Copy the 3rd header + remaining data on top
 943                          * of the 2nd header.
 944                          */
 945                         bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
 946 
 947                         /*
 948                          * Subtract length of the 2nd header.
 949                          */
 950                         mp->b_wptr -= hdr_length;
 951 
 952                         ip6h = (ip6_t *)mp->b_rptr;
 953                         /* Don't call hdr_length_v6() unless you have to. */
 954                         if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
 955                                 hdr_length = ip_hdr_length_v6(mp, ip6h);
 956                         else
 957                                 hdr_length = IPV6_HDR_LEN;
 958 
 959                         /*
 960                          * Verify the modified message before any further
 961                          * processes.
 962                          */
 963                         icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
 964                         if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
 965                                 freemsg(mp);
 966                                 return;
 967                         }
 968 
 969                         /*
 970                          * Now recurse, and see what I _really_ should be
 971                          * doing here.
 972                          */
 973                         icmp_inbound_error_fanout_v6(mp, icmp6, ira);
 974                         return;
 975                 }
 976                 /* FALLTHRU */
 977         }
 978         case IPPROTO_ENCAP:
 979                 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
 980                     &rip6h.ip6_dst, ipst)) != NULL) {
 981                         ira->ira_flags |= IRAF_ICMP_ERROR;
 982                         connp->conn_recvicmp(connp, mp, NULL, ira);
 983                         CONN_DEC_REF(connp);
 984                         ira->ira_flags &= ~IRAF_ICMP_ERROR;
 985                         return;
 986                 }
 987                 /*
 988                  * No IP tunnel is interested, fallthrough and see
 989                  * if a raw socket will want it.
 990                  */
 991                 /* FALLTHRU */
 992         default:
 993                 ira->ira_flags |= IRAF_ICMP_ERROR;
 994                 ASSERT(ira->ira_protocol == nexthdr);
 995                 ip_fanout_proto_v6(mp, &rip6h, ira);
 996                 ira->ira_flags &= ~IRAF_ICMP_ERROR;
 997                 return;
 998         }
 999         /* NOTREACHED */
1000 drop_pkt:
1001         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
1002         ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
1003         freemsg(mp);
1004 }
1005 
1006 /*
1007  * Process received IPv6 ICMP Redirect messages.
1008  * Assumes the caller has verified that the headers are in the pulled up mblk.
1009  * Consumes mp.
1010  */
1011 /* ARGSUSED */
1012 static void
1013 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1014     ip_recv_attr_t *ira)
1015 {
1016         ire_t           *ire, *nire;
1017         ire_t           *prev_ire = NULL;
1018         ire_t           *redir_ire;
1019         in6_addr_t      *src, *dst, *gateway;
1020         nd_opt_hdr_t    *opt;
1021         nce_t           *nce;
1022         int             ncec_flags = 0;
1023         int             err = 0;
1024         boolean_t       redirect_to_router = B_FALSE;
1025         int             len;
1026         int             optlen;
1027         ill_t           *ill = ira->ira_rill;
1028         ill_t           *rill = ira->ira_rill;
1029         ip_stack_t      *ipst = ill->ill_ipst;
1030 
1031         /*
1032          * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1033          * and make it be the IPMP upper so avoid being confused by a packet
1034          * addressed to a unicast address on a different ill.
1035          */
1036         if (IS_UNDER_IPMP(rill)) {
1037                 rill = ipmp_ill_hold_ipmp_ill(rill);
1038                 if (rill == NULL) {
1039                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1040                         ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1041                             mp, ill);
1042                         freemsg(mp);
1043                         return;
1044                 }
1045                 ASSERT(rill != ira->ira_rill);
1046         }
1047 
1048         len = mp->b_wptr - (uchar_t *)rd;
1049         src = &ip6h->ip6_src;
1050         dst = &rd->nd_rd_dst;
1051         gateway = &rd->nd_rd_target;
1052 
1053         /* Verify if it is a valid redirect */
1054         if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1055             (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1056             (rd->nd_rd_code != 0) ||
1057             (len < sizeof (nd_redirect_t)) ||
1058             (IN6_IS_ADDR_V4MAPPED(dst)) ||
1059             (IN6_IS_ADDR_MULTICAST(dst))) {
1060                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1061                 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1062                 goto fail_redirect;
1063         }
1064 
1065         if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1066             IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1067                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1068                 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1069                     mp, ill);
1070                 goto fail_redirect;
1071         }
1072 
1073         optlen = len - sizeof (nd_redirect_t);
1074         if (optlen != 0) {
1075                 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1076                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1077                         ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1078                             mp, ill);
1079                         goto fail_redirect;
1080                 }
1081         }
1082 
1083         if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1084                 redirect_to_router = B_TRUE;
1085                 ncec_flags |= NCE_F_ISROUTER;
1086         } else {
1087                 gateway = dst;  /* Add nce for dst */
1088         }
1089 
1090 
1091         /*
1092          * Verify that the IP source address of the redirect is
1093          * the same as the current first-hop router for the specified
1094          * ICMP destination address.
1095          * Also, Make sure we had a route for the dest in question and
1096          * that route was pointing to the old gateway (the source of the
1097          * redirect packet.)
1098          * We do longest match and then compare ire_gateway_addr_v6 below.
1099          */
1100         prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1101             ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL);
1102 
1103         /*
1104          * Check that
1105          *      the redirect was not from ourselves
1106          *      old gateway is still directly reachable
1107          */
1108         if (prev_ire == NULL ||
1109             (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1110             (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1111             !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1112                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1113                 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1114                 goto fail_redirect;
1115         }
1116 
1117         ASSERT(prev_ire->ire_ill != NULL);
1118         if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1119                 ncec_flags |= NCE_F_NONUD;
1120 
1121         opt = (nd_opt_hdr_t *)&rd[1];
1122         opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1123         if (opt != NULL) {
1124                 err = nce_lookup_then_add_v6(rill,
1125                     (uchar_t *)&opt[1],             /* Link layer address */
1126                     rill->ill_phys_addr_length,
1127                     gateway, ncec_flags, ND_STALE, &nce);
1128                 switch (err) {
1129                 case 0:
1130                         nce_refrele(nce);
1131                         break;
1132                 case EEXIST:
1133                         /*
1134                          * Check to see if link layer address has changed and
1135                          * process the ncec_state accordingly.
1136                          */
1137                         nce_process(nce->nce_common,
1138                             (uchar_t *)&opt[1], 0, B_FALSE);
1139                         nce_refrele(nce);
1140                         break;
1141                 default:
1142                         ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1143                             err));
1144                         goto fail_redirect;
1145                 }
1146         }
1147         if (redirect_to_router) {
1148                 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1149 
1150                 /*
1151                  * Create a Route Association.  This will allow us to remember
1152                  * a router told us to use the particular gateway.
1153                  */
1154                 ire = ire_create_v6(
1155                     dst,
1156                     &ipv6_all_ones,         /* mask */
1157                     gateway,                    /* gateway addr */
1158                     IRE_HOST,
1159                     prev_ire->ire_ill,
1160                     ALL_ZONES,
1161                     (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1162                     NULL,
1163                     ipst);
1164         } else {
1165                 ipif_t *ipif;
1166                 in6_addr_t gw;
1167 
1168                 /*
1169                  * Just create an on link entry, i.e. interface route.
1170                  * The gateway field is our link-local on the ill.
1171                  */
1172                 mutex_enter(&rill->ill_lock);
1173                 for (ipif = rill->ill_ipif; ipif != NULL;
1174                     ipif = ipif->ipif_next) {
1175                         if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1176                             IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1177                                 break;
1178                 }
1179                 if (ipif == NULL) {
1180                         /* We have no link-local address! */
1181                         mutex_exit(&rill->ill_lock);
1182                         goto fail_redirect;
1183                 }
1184                 gw = ipif->ipif_v6lcl_addr;
1185                 mutex_exit(&rill->ill_lock);
1186 
1187                 ire = ire_create_v6(
1188                     dst,                                /* gateway == dst */
1189                     &ipv6_all_ones,                 /* mask */
1190                     &gw,                            /* gateway addr */
1191                     rill->ill_net_type,                      /* IF_[NO]RESOLVER */
1192                     prev_ire->ire_ill,
1193                     ALL_ZONES,
1194                     (RTF_DYNAMIC | RTF_HOST),
1195                     NULL,
1196                     ipst);
1197         }
1198 
1199         if (ire == NULL)
1200                 goto fail_redirect;
1201 
1202         nire = ire_add(ire);
1203         /* Check if it was a duplicate entry */
1204         if (nire != NULL && nire != ire) {
1205                 ASSERT(nire->ire_identical_ref > 1);
1206                 ire_delete(nire);
1207                 ire_refrele(nire);
1208                 nire = NULL;
1209         }
1210         ire = nire;
1211         if (ire != NULL) {
1212                 ire_refrele(ire);               /* Held in ire_add */
1213 
1214                 /* tell routing sockets that we received a redirect */
1215                 ip_rts_change_v6(RTM_REDIRECT,
1216                     &rd->nd_rd_dst,
1217                     &rd->nd_rd_target,
1218                     &ipv6_all_ones, 0, src,
1219                     (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1220                     (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1221 
1222                 /*
1223                  * Delete any existing IRE_HOST type ires for this destination.
1224                  * This together with the added IRE has the effect of
1225                  * modifying an existing redirect.
1226                  */
1227                 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1228                     prev_ire->ire_ill, ALL_ZONES, NULL,
1229                     (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1230                     NULL);
1231 
1232                 if (redir_ire != NULL) {
1233                         if (redir_ire->ire_flags & RTF_DYNAMIC)
1234                                 ire_delete(redir_ire);
1235                         ire_refrele(redir_ire);
1236                 }
1237         }
1238 
1239         ire_refrele(prev_ire);
1240         prev_ire = NULL;
1241 
1242 fail_redirect:
1243         if (prev_ire != NULL)
1244                 ire_refrele(prev_ire);
1245         freemsg(mp);
1246         if (rill != ira->ira_rill)
1247                 ill_refrele(rill);
1248 }
1249 
1250 /*
1251  * Build and ship an IPv6 ICMP message using the packet data in mp,
1252  * and the ICMP header pointed to by "stuff".  (May be called as
1253  * writer.)
1254  * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1255  * verify that an icmp error packet can be sent.
1256  *
1257  * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1258  * source address (see above function).
1259  */
1260 static void
1261 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1262     const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1263 {
1264         ip6_t           *ip6h;
1265         in6_addr_t      v6dst;
1266         size_t          len_needed;
1267         size_t          msg_len;
1268         mblk_t          *mp1;
1269         icmp6_t         *icmp6;
1270         in6_addr_t      v6src;
1271         ill_t           *ill = ira->ira_ill;
1272         ip_stack_t      *ipst = ill->ill_ipst;
1273         ip_xmit_attr_t  ixas;
1274 
1275         ip6h = (ip6_t *)mp->b_rptr;
1276 
1277         bzero(&ixas, sizeof (ixas));
1278         ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1279         ixas.ixa_zoneid = ira->ira_zoneid;
1280         ixas.ixa_ifindex = 0;
1281         ixas.ixa_ipst = ipst;
1282         ixas.ixa_cred = kcred;
1283         ixas.ixa_cpid = NOPID;
1284         ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */
1285         ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1286 
1287         /*
1288          * If the source of the original packet was link-local, then
1289          * make sure we send on the same ill (group) as we received it on.
1290          */
1291         if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1292                 ixas.ixa_flags |= IXAF_SCOPEID_SET;
1293                 if (IS_UNDER_IPMP(ill))
1294                         ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1295                 else
1296                         ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1297         }
1298 
1299         if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1300                 /*
1301                  * Apply IPsec based on how IPsec was applied to
1302                  * the packet that had the error.
1303                  *
1304                  * If it was an outbound packet that caused the ICMP
1305                  * error, then the caller will have setup the IRA
1306                  * appropriately.
1307                  */
1308                 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1309                         BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1310                         /* Note: mp already consumed and ip_drop_packet done */
1311                         return;
1312                 }
1313         } else {
1314                 /*
1315                  * This is in clear. The icmp message we are building
1316                  * here should go out in clear, independent of our policy.
1317                  */
1318                 ixas.ixa_flags |= IXAF_NO_IPSEC;
1319         }
1320 
1321         /*
1322          * If the caller specified the source we use that.
1323          * Otherwise, if the packet was for one of our unicast addresses, make
1324          * sure we respond with that as the source. Otherwise
1325          * have ip_output_simple pick the source address.
1326          */
1327         if (v6src_ptr != NULL) {
1328                 v6src = *v6src_ptr;
1329         } else {
1330                 ire_t *ire;
1331                 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1332 
1333                 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1334                     IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1335                         match_flags |= MATCH_IRE_ILL;
1336 
1337                 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1338                     (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL,
1339                     match_flags, 0, ipst, NULL);
1340                 if (ire != NULL) {
1341                         v6src = ip6h->ip6_dst;
1342                         ire_refrele(ire);
1343                 } else {
1344                         v6src = ipv6_all_zeros;
1345                         ixas.ixa_flags |= IXAF_SET_SOURCE;
1346                 }
1347         }
1348         v6dst = ip6h->ip6_src;
1349         len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1350         msg_len = msgdsize(mp);
1351         if (msg_len > len_needed) {
1352                 if (!adjmsg(mp, len_needed - msg_len)) {
1353                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1354                         freemsg(mp);
1355                         return;
1356                 }
1357                 msg_len = len_needed;
1358         }
1359         mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1360         if (mp1 == NULL) {
1361                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1362                 freemsg(mp);
1363                 return;
1364         }
1365         mp1->b_cont = mp;
1366         mp = mp1;
1367 
1368         /*
1369          * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1370          * node generates be accepted in peace by all on-host destinations.
1371          * If we do NOT assume that all on-host destinations trust
1372          * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1373          * (Look for IXAF_TRUSTED_ICMP).
1374          */
1375         ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1376 
1377         ip6h = (ip6_t *)mp->b_rptr;
1378         mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1379 
1380         ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1381         ip6h->ip6_nxt = IPPROTO_ICMPV6;
1382         ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1383         ip6h->ip6_dst = v6dst;
1384         ip6h->ip6_src = v6src;
1385         msg_len += IPV6_HDR_LEN + len;
1386         if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1387                 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1388                 msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1389         }
1390         ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1391         icmp6 = (icmp6_t *)&ip6h[1];
1392         bcopy(stuff, (char *)icmp6, len);
1393         /*
1394          * Prepare for checksum by putting icmp length in the icmp
1395          * checksum field. The checksum is calculated in ip_output_wire_v6.
1396          */
1397         icmp6->icmp6_cksum = ip6h->ip6_plen;
1398         if (icmp6->icmp6_type == ND_REDIRECT) {
1399                 ip6h->ip6_hops = IPV6_MAX_HOPS;
1400         }
1401 
1402         (void) ip_output_simple(mp, &ixas);
1403         ixa_cleanup(&ixas);
1404 }
1405 
1406 /*
1407  * Update the output mib when ICMPv6 packets are sent.
1408  */
1409 void
1410 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1411 {
1412         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1413 
1414         switch (icmp6->icmp6_type) {
1415         case ICMP6_DST_UNREACH:
1416                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1417                 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1418                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1419                 break;
1420 
1421         case ICMP6_TIME_EXCEEDED:
1422                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1423                 break;
1424 
1425         case ICMP6_PARAM_PROB:
1426                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1427                 break;
1428 
1429         case ICMP6_PACKET_TOO_BIG:
1430                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1431                 break;
1432 
1433         case ICMP6_ECHO_REQUEST:
1434                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1435                 break;
1436 
1437         case ICMP6_ECHO_REPLY:
1438                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1439                 break;
1440 
1441         case ND_ROUTER_SOLICIT:
1442                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1443                 break;
1444 
1445         case ND_ROUTER_ADVERT:
1446                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1447                 break;
1448 
1449         case ND_NEIGHBOR_SOLICIT:
1450                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1451                 break;
1452 
1453         case ND_NEIGHBOR_ADVERT:
1454                 BUMP_MIB(ill->ill_icmp6_mib,
1455                     ipv6IfIcmpOutNeighborAdvertisements);
1456                 break;
1457 
1458         case ND_REDIRECT:
1459                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1460                 break;
1461 
1462         case MLD_LISTENER_QUERY:
1463                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1464                 break;
1465 
1466         case MLD_LISTENER_REPORT:
1467         case MLD_V2_LISTENER_REPORT:
1468                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1469                 break;
1470 
1471         case MLD_LISTENER_REDUCTION:
1472                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1473                 break;
1474         }
1475 }
1476 
1477 /*
1478  * Check if it is ok to send an ICMPv6 error packet in
1479  * response to the IP packet in mp.
1480  * Free the message and return null if no
1481  * ICMP error packet should be sent.
1482  */
1483 static mblk_t *
1484 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1485 {
1486         ill_t           *ill = ira->ira_ill;
1487         ip_stack_t      *ipst = ill->ill_ipst;
1488         boolean_t       llbcast;
1489         ip6_t           *ip6h;
1490 
1491         if (!mp)
1492                 return (NULL);
1493 
1494         /* We view multicast and broadcast as the same.. */
1495         llbcast = (ira->ira_flags &
1496             (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1497         ip6h = (ip6_t *)mp->b_rptr;
1498 
1499         /* Check if source address uniquely identifies the host */
1500 
1501         if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1502             IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1503             IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1504                 freemsg(mp);
1505                 return (NULL);
1506         }
1507 
1508         if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1509                 size_t  len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1510                 icmp6_t         *icmp6;
1511 
1512                 if (mp->b_wptr - mp->b_rptr < len_needed) {
1513                         if (!pullupmsg(mp, len_needed)) {
1514                                 BUMP_MIB(ill->ill_icmp6_mib,
1515                                     ipv6IfIcmpInErrors);
1516                                 freemsg(mp);
1517                                 return (NULL);
1518                         }
1519                         ip6h = (ip6_t *)mp->b_rptr;
1520                 }
1521                 icmp6 = (icmp6_t *)&ip6h[1];
1522                 /* Explicitly do not generate errors in response to redirects */
1523                 if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1524                     icmp6->icmp6_type == ND_REDIRECT) {
1525                         freemsg(mp);
1526                         return (NULL);
1527                 }
1528         }
1529         /*
1530          * Check that the destination is not multicast and that the packet
1531          * was not sent on link layer broadcast or multicast.  (Exception
1532          * is Packet too big message as per the draft - when mcast_ok is set.)
1533          */
1534         if (!mcast_ok &&
1535             (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1536                 freemsg(mp);
1537                 return (NULL);
1538         }
1539         /*
1540          * If this is a labeled system, then check to see if we're allowed to
1541          * send a response to this particular sender.  If not, then just drop.
1542          */
1543         if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) {
1544                 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1545                 freemsg(mp);
1546                 return (NULL);
1547         }
1548 
1549         if (icmp_err_rate_limit(ipst)) {
1550                 /*
1551                  * Only send ICMP error packets every so often.
1552                  * This should be done on a per port/source basis,
1553                  * but for now this will suffice.
1554                  */
1555                 freemsg(mp);
1556                 return (NULL);
1557         }
1558         return (mp);
1559 }
1560 
1561 /*
1562  * Called when a packet was sent out the same link that it arrived on.
1563  * Check if it is ok to send a redirect and then send it.
1564  */
1565 void
1566 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1567     ip_recv_attr_t *ira)
1568 {
1569         ill_t           *ill = ira->ira_ill;
1570         ip_stack_t      *ipst = ill->ill_ipst;
1571         in6_addr_t      *v6targ;
1572         ire_t           *src_ire_v6 = NULL;
1573         mblk_t          *mp1;
1574         ire_t           *nhop_ire = NULL;
1575 
1576         /*
1577          * Don't send a redirect when forwarding a source
1578          * routed packet.
1579          */
1580         if (ip_source_routed_v6(ip6h, mp, ipst))
1581                 return;
1582 
1583         if (ire->ire_type & IRE_ONLINK) {
1584                 /* Target is directly connected */
1585                 v6targ = &ip6h->ip6_dst;
1586         } else {
1587                 /* Determine the most specific IRE used to send the packets */
1588                 nhop_ire = ire_nexthop(ire);
1589                 if (nhop_ire == NULL)
1590                         return;
1591 
1592                 /*
1593                  * We won't send redirects to a router
1594                  * that doesn't have a link local
1595                  * address, but will forward.
1596                  */
1597                 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1598                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1599                         ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1600                         ire_refrele(nhop_ire);
1601                         return;
1602                 }
1603                 v6targ = &nhop_ire->ire_addr_v6;
1604         }
1605         src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1606             NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL,
1607             MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1608 
1609         if (src_ire_v6 == NULL) {
1610                 if (nhop_ire != NULL)
1611                         ire_refrele(nhop_ire);
1612                 return;
1613         }
1614 
1615         /*
1616          * The source is directly connected.
1617          */
1618         mp1 = copymsg(mp);
1619         if (mp1 != NULL)
1620                 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1621 
1622         if (nhop_ire != NULL)
1623                 ire_refrele(nhop_ire);
1624         ire_refrele(src_ire_v6);
1625 }
1626 
1627 /*
1628  * Generate an ICMPv6 redirect message.
1629  * Include target link layer address option if it exits.
1630  * Always include redirect header.
1631  */
1632 static void
1633 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1634     ip_recv_attr_t *ira)
1635 {
1636         nd_redirect_t   *rd;
1637         nd_opt_rd_hdr_t *rdh;
1638         uchar_t         *buf;
1639         ncec_t          *ncec = NULL;
1640         nd_opt_hdr_t    *opt;
1641         int             len;
1642         int             ll_opt_len = 0;
1643         int             max_redir_hdr_data_len;
1644         int             pkt_len;
1645         in6_addr_t      *srcp;
1646         ill_t           *ill;
1647         boolean_t       need_refrele;
1648         ip_stack_t      *ipst = ira->ira_ill->ill_ipst;
1649 
1650         mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1651         if (mp == NULL)
1652                 return;
1653 
1654         if (IS_UNDER_IPMP(ira->ira_ill)) {
1655                 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1656                 if (ill == NULL) {
1657                         ill = ira->ira_ill;
1658                         BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1659                         ip_drop_output("no IPMP ill for sending redirect",
1660                             mp, ill);
1661                         freemsg(mp);
1662                         return;
1663                 }
1664                 need_refrele = B_TRUE;
1665         } else {
1666                 ill = ira->ira_ill;
1667                 need_refrele = B_FALSE;
1668         }
1669 
1670         ncec = ncec_lookup_illgrp_v6(ill, targetp);
1671         if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1672             ncec->ncec_lladdr != NULL) {
1673                 ll_opt_len = (sizeof (nd_opt_hdr_t) +
1674                     ill->ill_phys_addr_length + 7)/8 * 8;
1675         }
1676         len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1677         ASSERT(len % 4 == 0);
1678         buf = kmem_alloc(len, KM_NOSLEEP);
1679         if (buf == NULL) {
1680                 if (ncec != NULL)
1681                         ncec_refrele(ncec);
1682                 if (need_refrele)
1683                         ill_refrele(ill);
1684                 freemsg(mp);
1685                 return;
1686         }
1687 
1688         rd = (nd_redirect_t *)buf;
1689         rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1690         rd->nd_rd_code = 0;
1691         rd->nd_rd_reserved = 0;
1692         rd->nd_rd_target = *targetp;
1693         rd->nd_rd_dst = *dest;
1694 
1695         opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1696         if (ncec != NULL && ll_opt_len != 0) {
1697                 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1698                 opt->nd_opt_len = ll_opt_len/8;
1699                 bcopy((char *)ncec->ncec_lladdr, &opt[1],
1700                     ill->ill_phys_addr_length);
1701         }
1702         if (ncec != NULL)
1703                 ncec_refrele(ncec);
1704         rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1705         rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1706         /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1707         max_redir_hdr_data_len =
1708             (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1709         pkt_len = msgdsize(mp);
1710         /* Make sure mp is 8 byte aligned */
1711         if (pkt_len > max_redir_hdr_data_len) {
1712                 rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1713                     sizeof (nd_opt_rd_hdr_t))/8;
1714                 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1715         } else {
1716                 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1717                 (void) adjmsg(mp, -(pkt_len % 8));
1718         }
1719         rdh->nd_opt_rh_reserved1 = 0;
1720         rdh->nd_opt_rh_reserved2 = 0;
1721         /* ipif_v6lcl_addr contains the link-local source address */
1722         srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1723 
1724         /* Redirects sent by router, and router is global zone */
1725         ASSERT(ira->ira_zoneid == ALL_ZONES);
1726         ira->ira_zoneid = GLOBAL_ZONEID;
1727         icmp_pkt_v6(mp, buf, len, srcp, ira);
1728         kmem_free(buf, len);
1729         if (need_refrele)
1730                 ill_refrele(ill);
1731 }
1732 
1733 
1734 /* Generate an ICMP time exceeded message.  (May be called as writer.) */
1735 void
1736 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1737     ip_recv_attr_t *ira)
1738 {
1739         icmp6_t icmp6;
1740 
1741         mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1742         if (mp == NULL)
1743                 return;
1744 
1745         bzero(&icmp6, sizeof (icmp6_t));
1746         icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1747         icmp6.icmp6_code = code;
1748         icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1749 }
1750 
1751 /*
1752  * Generate an ICMP unreachable message.
1753  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1754  * constructed by the caller.
1755  */
1756 void
1757 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1758     ip_recv_attr_t *ira)
1759 {
1760         icmp6_t icmp6;
1761 
1762         mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1763         if (mp == NULL)
1764                 return;
1765 
1766         bzero(&icmp6, sizeof (icmp6_t));
1767         icmp6.icmp6_type = ICMP6_DST_UNREACH;
1768         icmp6.icmp6_code = code;
1769         icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1770 }
1771 
1772 /*
1773  * Generate an ICMP pkt too big message.
1774  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1775  * constructed by the caller.
1776  */
1777 void
1778 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1779     ip_recv_attr_t *ira)
1780 {
1781         icmp6_t icmp6;
1782 
1783         mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1784         if (mp == NULL)
1785                 return;
1786 
1787         bzero(&icmp6, sizeof (icmp6_t));
1788         icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1789         icmp6.icmp6_code = 0;
1790         icmp6.icmp6_mtu = htonl(mtu);
1791 
1792         icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1793 }
1794 
1795 /*
1796  * Generate an ICMP parameter problem message. (May be called as writer.)
1797  * 'offset' is the offset from the beginning of the packet in error.
1798  * When called from ip_output side a minimal ip_recv_attr_t needs to be
1799  * constructed by the caller.
1800  */
1801 static void
1802 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1803     boolean_t mcast_ok, ip_recv_attr_t *ira)
1804 {
1805         icmp6_t icmp6;
1806 
1807         mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1808         if (mp == NULL)
1809                 return;
1810 
1811         bzero((char *)&icmp6, sizeof (icmp6_t));
1812         icmp6.icmp6_type = ICMP6_PARAM_PROB;
1813         icmp6.icmp6_code = code;
1814         icmp6.icmp6_pptr = htonl(offset);
1815         icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1816 }
1817 
1818 void
1819 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1820     ip_recv_attr_t *ira)
1821 {
1822         ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
1823         uint16_t        hdr_length;
1824         uint8_t         *nexthdrp;
1825         uint32_t        offset;
1826         ill_t           *ill = ira->ira_ill;
1827 
1828         /* Determine the offset of the bad nexthdr value */
1829         if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1830                 /* Malformed packet */
1831                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1832                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1833                 freemsg(mp);
1834                 return;
1835         }
1836 
1837         offset = nexthdrp - mp->b_rptr;
1838         icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1839             mcast_ok, ira);
1840 }
1841 
1842 /*
1843  * Verify whether or not the IP address is a valid local address.
1844  * Could be a unicast, including one for a down interface.
1845  * If allow_mcbc then a multicast or broadcast address is also
1846  * acceptable.
1847  *
1848  * In the case of a multicast address, however, the
1849  * upper protocol is expected to reset the src address
1850  * to zero when we return IPVL_MCAST so that
1851  * no packets are emitted with multicast address as
1852  * source address.
1853  * The addresses valid for bind are:
1854  *      (1) - in6addr_any
1855  *      (2) - IP address of an UP interface
1856  *      (3) - IP address of a DOWN interface
1857  *      (4) - a multicast address. In this case
1858  *      the conn will only receive packets destined to
1859  *      the specified multicast address. Note: the
1860  *      application still has to issue an
1861  *      IPV6_JOIN_GROUP socket option.
1862  *
1863  * In all the above cases, the bound address must be valid in the current zone.
1864  * When the address is loopback or multicast, there might be many matching IREs
1865  * so bind has to look up based on the zone.
1866  */
1867 ip_laddr_t
1868 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1869     ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1870 {
1871         ire_t           *src_ire;
1872         uint_t          match_flags;
1873         ill_t           *ill = NULL;
1874 
1875         ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1876         ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1877 
1878         match_flags = MATCH_IRE_ZONEONLY;
1879         if (scopeid != 0) {
1880                 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1881                 if (ill == NULL)
1882                         return (IPVL_BAD);
1883                 match_flags |= MATCH_IRE_ILL;
1884         }
1885 
1886         src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1887             ill, zoneid, NULL, match_flags, 0, ipst, NULL);
1888         if (ill != NULL)
1889                 ill_refrele(ill);
1890 
1891         /*
1892          * If an address other than in6addr_any is requested,
1893          * we verify that it is a valid address for bind
1894          * Note: Following code is in if-else-if form for
1895          * readability compared to a condition check.
1896          */
1897         if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1898                 /*
1899                  * (2) Bind to address of local UP interface
1900                  */
1901                 ire_refrele(src_ire);
1902                 return (IPVL_UNICAST_UP);
1903         } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1904                 /* (4) bind to multicast address. */
1905                 if (src_ire != NULL)
1906                         ire_refrele(src_ire);
1907 
1908                 /*
1909                  * Note: caller should take IPV6_MULTICAST_IF
1910                  * into account when selecting a real source address.
1911                  */
1912                 if (allow_mcbc)
1913                         return (IPVL_MCAST);
1914                 else
1915                         return (IPVL_BAD);
1916         } else {
1917                 ipif_t *ipif;
1918 
1919                 /*
1920                  * (3) Bind to address of local DOWN interface?
1921                  * (ipif_lookup_addr() looks up all interfaces
1922                  * but we do not get here for UP interfaces
1923                  * - case (2) above)
1924                  */
1925                 if (src_ire != NULL)
1926                         ire_refrele(src_ire);
1927 
1928                 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1929                 if (ipif == NULL)
1930                         return (IPVL_BAD);
1931 
1932                 /* Not a useful source? */
1933                 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1934                         ipif_refrele(ipif);
1935                         return (IPVL_BAD);
1936                 }
1937                 ipif_refrele(ipif);
1938                 return (IPVL_UNICAST_DOWN);
1939         }
1940 }
1941 
1942 /*
1943  * Verify that both the source and destination addresses are valid.  If
1944  * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1945  * i.e. have no route to it.  Protocols like TCP want to verify destination
1946  * reachability, while tunnels do not.
1947  *
1948  * Determine the route, the interface, and (optionally) the source address
1949  * to use to reach a given destination.
1950  * Note that we allow connect to broadcast and multicast addresses when
1951  * IPDF_ALLOW_MCBC is set.
1952  * first_hop and dst_addr are normally the same, but if source routing
1953  * they will differ; in that case the first_hop is what we'll use for the
1954  * routing lookup but the dce and label checks will be done on dst_addr,
1955  *
1956  * If uinfo is set, then we fill in the best available information
1957  * we have for the destination. This is based on (in priority order) any
1958  * metrics and path MTU stored in a dce_t, route metrics, and finally the
1959  * ill_mtu/ill_mc_mtu.
1960  *
1961  * Tsol note: If we have a source route then dst_addr != firsthop. But we
1962  * always do the label check on dst_addr.
1963  *
1964  * Assumes that the caller has set ixa_scopeid for link-local communication.
1965  */
1966 int
1967 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1968     const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1969     uint32_t flags, uint_t mac_mode)
1970 {
1971         ire_t           *ire;
1972         int             error = 0;
1973         in6_addr_t      setsrc;                         /* RTF_SETSRC */
1974         zoneid_t        zoneid = ixa->ixa_zoneid;    /* Honors SO_ALLZONES */
1975         ip_stack_t      *ipst = ixa->ixa_ipst;
1976         dce_t           *dce;
1977         uint_t          pmtu;
1978         uint_t          ifindex;
1979         uint_t          generation;
1980         nce_t           *nce;
1981         ill_t           *ill = NULL;
1982         boolean_t       multirt = B_FALSE;
1983 
1984         ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1985 
1986         ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1987 
1988         /*
1989          * We never send to zero; the ULPs map it to the loopback address.
1990          * We can't allow it since we use zero to mean unitialized in some
1991          * places.
1992          */
1993         ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1994 
1995         if (is_system_labeled()) {
1996                 ts_label_t *tsl = NULL;
1997 
1998                 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION,
1999                     mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl);
2000                 if (error != 0)
2001                         return (error);
2002                 if (tsl != NULL) {
2003                         /* Update the label */
2004                         ip_xmit_attr_replace_tsl(ixa, tsl);
2005                 }
2006         }
2007 
2008         setsrc = ipv6_all_zeros;
2009         /*
2010          * Select a route; For IPMP interfaces, we would only select
2011          * a "hidden" route (i.e., going through a specific under_ill)
2012          * if ixa_ifindex has been specified.
2013          */
2014         ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
2015             &setsrc, &error, &multirt);
2016         ASSERT(ire != NULL);    /* IRE_NOROUTE if none found */
2017         if (error != 0)
2018                 goto bad_addr;
2019 
2020         /*
2021          * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
2022          * If IPDF_VERIFY_DST is set, the destination must be reachable.
2023          * Otherwise the destination needn't be reachable.
2024          *
2025          * If we match on a reject or black hole, then we've got a
2026          * local failure.  May as well fail out the connect() attempt,
2027          * since it's never going to succeed.
2028          */
2029         if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2030                 /*
2031                  * If we're verifying destination reachability, we always want
2032                  * to complain here.
2033                  *
2034                  * If we're not verifying destination reachability but the
2035                  * destination has a route, we still want to fail on the
2036                  * temporary address and broadcast address tests.
2037                  *
2038                  * In both cases do we let the code continue so some reasonable
2039                  * information is returned to the caller. That enables the
2040                  * caller to use (and even cache) the IRE. conn_ip_ouput will
2041                  * use the generation mismatch path to check for the unreachable
2042                  * case thereby avoiding any specific check in the main path.
2043                  */
2044                 ASSERT(generation == IRE_GENERATION_VERIFY);
2045                 if (flags & IPDF_VERIFY_DST) {
2046                         /*
2047                          * Set errno but continue to set up ixa_ire to be
2048                          * the RTF_REJECT|RTF_BLACKHOLE IRE.
2049                          * That allows callers to use ip_output to get an
2050                          * ICMP error back.
2051                          */
2052                         if (!(ire->ire_type & IRE_HOST))
2053                                 error = ENETUNREACH;
2054                         else
2055                                 error = EHOSTUNREACH;
2056                 }
2057         }
2058 
2059         if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2060             !(flags & IPDF_ALLOW_MCBC)) {
2061                 ire_refrele(ire);
2062                 ire = ire_reject(ipst, B_FALSE);
2063                 generation = IRE_GENERATION_VERIFY;
2064                 error = ENETUNREACH;
2065         }
2066 
2067         /* Cache things */
2068         if (ixa->ixa_ire != NULL)
2069                 ire_refrele_notr(ixa->ixa_ire);
2070 #ifdef DEBUG
2071         ire_refhold_notr(ire);
2072         ire_refrele(ire);
2073 #endif
2074         ixa->ixa_ire = ire;
2075         ixa->ixa_ire_generation = generation;
2076 
2077         /*
2078          * Ensure that ixa_dce is always set any time that ixa_ire is set,
2079          * since some callers will send a packet to conn_ip_output() even if
2080          * there's an error.
2081          */
2082         ifindex = 0;
2083         if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2084                 /* If we are creating a DCE we'd better have an ifindex */
2085                 if (ill != NULL)
2086                         ifindex = ill->ill_phyint->phyint_ifindex;
2087                 else
2088                         flags &= ~IPDF_UNIQUE_DCE;
2089         }
2090 
2091         if (flags & IPDF_UNIQUE_DCE) {
2092                 /* Fallback to the default dce if allocation fails */
2093                 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2094                 if (dce != NULL) {
2095                         generation = dce->dce_generation;
2096                 } else {
2097                         dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2098                             &generation);
2099                 }
2100         } else {
2101                 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2102         }
2103         ASSERT(dce != NULL);
2104         if (ixa->ixa_dce != NULL)
2105                 dce_refrele_notr(ixa->ixa_dce);
2106 #ifdef DEBUG
2107         dce_refhold_notr(dce);
2108         dce_refrele(dce);
2109 #endif
2110         ixa->ixa_dce = dce;
2111         ixa->ixa_dce_generation = generation;
2112 
2113 
2114         /*
2115          * For multicast with multirt we have a flag passed back from
2116          * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2117          * possible multicast address.
2118          * We also need a flag for multicast since we can't check
2119          * whether RTF_MULTIRT is set in ixa_ire for multicast.
2120          */
2121         if (multirt) {
2122                 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2123                 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2124         } else {
2125                 ixa->ixa_postfragfn = ire->ire_postfragfn;
2126                 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2127         }
2128         if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2129                 /* Get an nce to cache. */
2130                 nce = ire_to_nce(ire, NULL, firsthop);
2131                 if (nce == NULL) {
2132                         /* Allocation failure? */
2133                         ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2134                 } else {
2135                         if (ixa->ixa_nce != NULL)
2136                                 nce_refrele(ixa->ixa_nce);
2137                         ixa->ixa_nce = nce;
2138                 }
2139         }
2140 
2141         /*
2142          * If the source address is a loopback address, the
2143          * destination had best be local or multicast.
2144          * If we are sending to an IRE_LOCAL using a loopback source then
2145          * it had better be the same zoneid.
2146          */
2147         if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2148                 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2149                         ire = NULL;     /* Stored in ixa_ire */
2150                         error = EADDRNOTAVAIL;
2151                         goto bad_addr;
2152                 }
2153                 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2154                         ire = NULL;     /* Stored in ixa_ire */
2155                         error = EADDRNOTAVAIL;
2156                         goto bad_addr;
2157                 }
2158         }
2159 
2160         /*
2161          * Does the caller want us to pick a source address?
2162          */
2163         if (flags & IPDF_SELECT_SRC) {
2164                 in6_addr_t      src_addr;
2165 
2166                 /*
2167                  * We use use ire_nexthop_ill to avoid the under ipmp
2168                  * interface for source address selection. Note that for ipmp
2169                  * probe packets, ixa_ifindex would have been specified, and
2170                  * the ip_select_route() invocation would have picked an ire
2171                  * will ire_ill pointing at an under interface.
2172                  */
2173                 ill = ire_nexthop_ill(ire);
2174 
2175                 /* If unreachable we have no ill but need some source */
2176                 if (ill == NULL) {
2177                         src_addr = ipv6_loopback;
2178                         /* Make sure we look for a better source address */
2179                         generation = SRC_GENERATION_VERIFY;
2180                 } else {
2181                         error = ip_select_source_v6(ill, &setsrc, dst_addr,
2182                             zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2183                             &src_addr, &generation, NULL);
2184                         if (error != 0) {
2185                                 ire = NULL;     /* Stored in ixa_ire */
2186                                 goto bad_addr;
2187                         }
2188                 }
2189 
2190                 /*
2191                  * We allow the source address to to down.
2192                  * However, we check that we don't use the loopback address
2193                  * as a source when sending out on the wire.
2194                  */
2195                 if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2196                     !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2197                     !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2198                         ire = NULL;     /* Stored in ixa_ire */
2199                         error = EADDRNOTAVAIL;
2200                         goto bad_addr;
2201                 }
2202 
2203                 *src_addrp = src_addr;
2204                 ixa->ixa_src_generation = generation;
2205         }
2206 
2207         /*
2208          * Make sure we don't leave an unreachable ixa_nce in place
2209          * since ip_select_route is used when we unplumb i.e., remove
2210          * references on ixa_ire, ixa_nce, and ixa_dce.
2211          */
2212         nce = ixa->ixa_nce;
2213         if (nce != NULL && nce->nce_is_condemned) {
2214                 nce_refrele(nce);
2215                 ixa->ixa_nce = NULL;
2216                 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2217         }
2218 
2219         /*
2220          * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2221          * multicast. But pmtu discovery is only enabled for connected
2222          * sockets in general.
2223          */
2224 
2225         /*
2226          * Set initial value for fragmentation limit.  Either conn_ip_output
2227          * or ULP might updates it when there are routing changes.
2228          * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2229          */
2230         pmtu = ip_get_pmtu(ixa);
2231         ixa->ixa_fragsize = pmtu;
2232         /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2233         if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2234                 ixa->ixa_pmtu = pmtu;
2235 
2236         /*
2237          * Extract information useful for some transports.
2238          * First we look for DCE metrics. Then we take what we have in
2239          * the metrics in the route, where the offlink is used if we have
2240          * one.
2241          */
2242         if (uinfo != NULL) {
2243                 bzero(uinfo, sizeof (*uinfo));
2244 
2245                 if (dce->dce_flags & DCEF_UINFO)
2246                         *uinfo = dce->dce_uinfo;
2247 
2248                 rts_merge_metrics(uinfo, &ire->ire_metrics);
2249 
2250                 /* Allow ire_metrics to decrease the path MTU from above */
2251                 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2252                         uinfo->iulp_mtu = pmtu;
2253 
2254                 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2255                 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2256                 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2257         }
2258 
2259         if (ill != NULL)
2260                 ill_refrele(ill);
2261 
2262         return (error);
2263 
2264 bad_addr:
2265         if (ire != NULL)
2266                 ire_refrele(ire);
2267 
2268         if (ill != NULL)
2269                 ill_refrele(ill);
2270 
2271         /*
2272          * Make sure we don't leave an unreachable ixa_nce in place
2273          * since ip_select_route is used when we unplumb i.e., remove
2274          * references on ixa_ire, ixa_nce, and ixa_dce.
2275          */
2276         nce = ixa->ixa_nce;
2277         if (nce != NULL && nce->nce_is_condemned) {
2278                 nce_refrele(nce);
2279                 ixa->ixa_nce = NULL;
2280                 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2281         }
2282 
2283         return (error);
2284 }
2285 
2286 /*
2287  * Handle protocols with which IP is less intimate.  There
2288  * can be more than one stream bound to a particular
2289  * protocol.  When this is the case, normally each one gets a copy
2290  * of any incoming packets.
2291  *
2292  * Zones notes:
2293  * Packets will be distributed to conns in all zones. This is really only
2294  * useful for ICMPv6 as only applications in the global zone can create raw
2295  * sockets for other protocols.
2296  */
2297 void
2298 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2299 {
2300         mblk_t          *mp1;
2301         in6_addr_t      laddr = ip6h->ip6_dst;
2302         conn_t          *connp, *first_connp, *next_connp;
2303         connf_t         *connfp;
2304         ill_t           *ill = ira->ira_ill;
2305         ip_stack_t      *ipst = ill->ill_ipst;
2306 
2307         connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2308         mutex_enter(&connfp->connf_lock);
2309         connp = connfp->connf_head;
2310         for (connp = connfp->connf_head; connp != NULL;
2311             connp = connp->conn_next) {
2312                 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2313                 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2314                     (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2315                     tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2316                         break;
2317         }
2318 
2319         if (connp == NULL) {
2320                 /*
2321                  * No one bound to this port.  Is
2322                  * there a client that wants all
2323                  * unclaimed datagrams?
2324                  */
2325                 mutex_exit(&connfp->connf_lock);
2326                 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2327                     ICMP6_PARAMPROB_NEXTHEADER, ira);
2328                 return;
2329         }
2330 
2331         ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2332 
2333         CONN_INC_REF(connp);
2334         first_connp = connp;
2335 
2336         /*
2337          * XXX: Fix the multiple protocol listeners case. We should not
2338          * be walking the conn->conn_next list here.
2339          */
2340         connp = connp->conn_next;
2341         for (;;) {
2342                 while (connp != NULL) {
2343                         /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2344                         if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) &&
2345                             (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2346                             tsol_receive_local(mp, &laddr, IPV6_VERSION,
2347                             ira, connp)))
2348                                 break;
2349                         connp = connp->conn_next;
2350                 }
2351 
2352                 if (connp == NULL) {
2353                         /* No more interested clients */
2354                         connp = first_connp;
2355                         break;
2356                 }
2357                 if (((mp1 = dupmsg(mp)) == NULL) &&
2358                     ((mp1 = copymsg(mp)) == NULL)) {
2359                         /* Memory allocation failed */
2360                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2361                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
2362                         connp = first_connp;
2363                         break;
2364                 }
2365 
2366                 CONN_INC_REF(connp);
2367                 mutex_exit(&connfp->connf_lock);
2368 
2369                 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2370                     ira);
2371 
2372                 mutex_enter(&connfp->connf_lock);
2373                 /* Follow the next pointer before releasing the conn. */
2374                 next_connp = connp->conn_next;
2375                 CONN_DEC_REF(connp);
2376                 connp = next_connp;
2377         }
2378 
2379         /* Last one.  Send it upstream. */
2380         mutex_exit(&connfp->connf_lock);
2381 
2382         ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2383 
2384         CONN_DEC_REF(connp);
2385 }
2386 
2387 /*
2388  * Called when it is conceptually a ULP that would sent the packet
2389  * e.g., port unreachable and nexthdr unknown. Check that the packet
2390  * would have passed the IPsec global policy before sending the error.
2391  *
2392  * Send an ICMP error after patching up the packet appropriately.
2393  * Uses ip_drop_input and bumps the appropriate MIB.
2394  * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2395  */
2396 void
2397 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2398     ip_recv_attr_t *ira)
2399 {
2400         ip6_t           *ip6h;
2401         boolean_t       secure;
2402         ill_t           *ill = ira->ira_ill;
2403         ip_stack_t      *ipst = ill->ill_ipst;
2404         netstack_t      *ns = ipst->ips_netstack;
2405         ipsec_stack_t   *ipss = ns->netstack_ipsec;
2406 
2407         secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2408 
2409         /*
2410          * We are generating an icmp error for some inbound packet.
2411          * Called from all ip_fanout_(udp, tcp, proto) functions.
2412          * Before we generate an error, check with global policy
2413          * to see whether this is allowed to enter the system. As
2414          * there is no "conn", we are checking with global policy.
2415          */
2416         ip6h = (ip6_t *)mp->b_rptr;
2417         if (secure || ipss->ipsec_inbound_v6_policy_present) {
2418                 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2419                 if (mp == NULL)
2420                         return;
2421         }
2422 
2423         /* We never send errors for protocols that we do implement */
2424         if (ira->ira_protocol == IPPROTO_ICMPV6) {
2425                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2426                 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2427                 freemsg(mp);
2428                 return;
2429         }
2430 
2431         switch (icmp_type) {
2432         case ICMP6_DST_UNREACH:
2433                 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2434 
2435                 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2436                 ip_drop_input("ipIfStatsNoPorts", mp, ill);
2437 
2438                 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2439                 break;
2440         case ICMP6_PARAM_PROB:
2441                 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2442 
2443                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2444                 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2445 
2446                 /* Let the system determine the offset for this one */
2447                 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2448                 break;
2449         default:
2450 #ifdef DEBUG
2451                 panic("ip_fanout_send_icmp_v6: wrong type");
2452                 /*NOTREACHED*/
2453 #else
2454                 freemsg(mp);
2455                 break;
2456 #endif
2457         }
2458 }
2459 
2460 /*
2461  * Fanout for UDP packets that are multicast or ICMP errors.
2462  * (Unicast fanout is handled in ip_input_v6.)
2463  *
2464  * If SO_REUSEADDR is set all multicast packets
2465  * will be delivered to all conns bound to the same port.
2466  *
2467  * Fanout for UDP packets.
2468  * The caller puts <fport, lport> in the ports parameter.
2469  * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2470  *
2471  * If SO_REUSEADDR is set all multicast and broadcast packets
2472  * will be delivered to all conns bound to the same port.
2473  *
2474  * Zones notes:
2475  * Earlier in ip_input on a system with multiple shared-IP zones we
2476  * duplicate the multicast and broadcast packets and send them up
2477  * with each explicit zoneid that exists on that ill.
2478  * This means that here we can match the zoneid with SO_ALLZONES being special.
2479  */
2480 void
2481 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2482     ip_recv_attr_t *ira)
2483 {
2484         in6_addr_t      laddr;
2485         conn_t          *connp;
2486         connf_t         *connfp;
2487         in6_addr_t      faddr;
2488         ill_t           *ill = ira->ira_ill;
2489         ip_stack_t      *ipst = ill->ill_ipst;
2490 
2491         ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2492 
2493         laddr = ip6h->ip6_dst;
2494         faddr = ip6h->ip6_src;
2495 
2496         /* Attempt to find a client stream based on destination port. */
2497         connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2498         mutex_enter(&connfp->connf_lock);
2499         connp = connfp->connf_head;
2500         while (connp != NULL) {
2501                 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2502                     conn_wantpacket_v6(connp, ira, ip6h) &&
2503                     (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2504                     tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp)))
2505                         break;
2506                 connp = connp->conn_next;
2507         }
2508 
2509         if (connp == NULL)
2510                 goto notfound;
2511 
2512         CONN_INC_REF(connp);
2513 
2514         if (connp->conn_reuseaddr) {
2515                 conn_t          *first_connp = connp;
2516                 conn_t          *next_connp;
2517                 mblk_t          *mp1;
2518 
2519                 connp = connp->conn_next;
2520                 for (;;) {
2521                         while (connp != NULL) {
2522                                 if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2523                                     fport, faddr) &&
2524                                     conn_wantpacket_v6(connp, ira, ip6h) &&
2525                                     (!(ira->ira_flags & IRAF_SYSTEM_LABELED) ||
2526                                     tsol_receive_local(mp, &laddr, IPV6_VERSION,
2527                                     ira, connp)))
2528                                         break;
2529                                 connp = connp->conn_next;
2530                         }
2531                         if (connp == NULL) {
2532                                 /* No more interested clients */
2533                                 connp = first_connp;
2534                                 break;
2535                         }
2536                         if (((mp1 = dupmsg(mp)) == NULL) &&
2537                             ((mp1 = copymsg(mp)) == NULL)) {
2538                                 /* Memory allocation failed */
2539                                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2540                                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2541                                 connp = first_connp;
2542                                 break;
2543                         }
2544 
2545                         CONN_INC_REF(connp);
2546                         mutex_exit(&connfp->connf_lock);
2547 
2548                         IP6_STAT(ipst, ip6_udp_fanmb);
2549                         ip_fanout_udp_conn(connp, mp1, NULL,
2550                             (ip6_t *)mp1->b_rptr, ira);
2551 
2552                         mutex_enter(&connfp->connf_lock);
2553                         /* Follow the next pointer before releasing the conn. */
2554                         next_connp = connp->conn_next;
2555                         IP6_STAT(ipst, ip6_udp_fanmb);
2556                         CONN_DEC_REF(connp);
2557                         connp = next_connp;
2558                 }
2559         }
2560 
2561         /* Last one.  Send it upstream. */
2562         mutex_exit(&connfp->connf_lock);
2563 
2564         IP6_STAT(ipst, ip6_udp_fanmb);
2565         ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2566         CONN_DEC_REF(connp);
2567         return;
2568 
2569 notfound:
2570         mutex_exit(&connfp->connf_lock);
2571         /*
2572          * No one bound to this port.  Is
2573          * there a client that wants all
2574          * unclaimed datagrams?
2575          */
2576         if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2577                 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2578                 ip_fanout_proto_v6(mp, ip6h, ira);
2579         } else {
2580                 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2581                     ICMP6_DST_UNREACH_NOPORT, ira);
2582         }
2583 }
2584 
2585 /*
2586  * int ip_find_hdr_v6()
2587  *
2588  * This routine is used by the upper layer protocols, iptun, and IPsec:
2589  * - Set extension header pointers to appropriate locations
2590  * - Determine IPv6 header length and return it
2591  * - Return a pointer to the last nexthdr value
2592  *
2593  * The caller must initialize ipp_fields.
2594  * The upper layer protocols normally set label_separate which makes the
2595  * routine put the TX label in ipp_label_v6. If this is not set then
2596  * the hop-by-hop options including the label are placed in ipp_hopopts.
2597  *
2598  * NOTE: If multiple extension headers of the same type are present,
2599  * ip_find_hdr_v6() will set the respective extension header pointers
2600  * to the first one that it encounters in the IPv6 header.  It also
2601  * skips fragment headers.  This routine deals with malformed packets
2602  * of various sorts in which case the returned length is up to the
2603  * malformed part.
2604  */
2605 int
2606 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp,
2607     uint8_t *nexthdrp)
2608 {
2609         uint_t  length, ehdrlen;
2610         uint8_t nexthdr;
2611         uint8_t *whereptr, *endptr;
2612         ip6_dest_t *tmpdstopts;
2613         ip6_rthdr_t *tmprthdr;
2614         ip6_hbh_t *tmphopopts;
2615         ip6_frag_t *tmpfraghdr;
2616 
2617         ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2618         ipp->ipp_hoplimit = ip6h->ip6_hops;
2619         ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2620         ipp->ipp_addr = ip6h->ip6_dst;
2621 
2622         length = IPV6_HDR_LEN;
2623         whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2624         endptr = mp->b_wptr;
2625 
2626         nexthdr = ip6h->ip6_nxt;
2627         while (whereptr < endptr) {
2628                 /* Is there enough left for len + nexthdr? */
2629                 if (whereptr + MIN_EHDR_LEN > endptr)
2630                         goto done;
2631 
2632                 switch (nexthdr) {
2633                 case IPPROTO_HOPOPTS: {
2634                         /* We check for any CIPSO */
2635                         uchar_t *secopt;
2636                         boolean_t hbh_needed;
2637                         uchar_t *after_secopt;
2638 
2639                         tmphopopts = (ip6_hbh_t *)whereptr;
2640                         ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2641                         if ((uchar_t *)tmphopopts +  ehdrlen > endptr)
2642                                 goto done;
2643                         nexthdr = tmphopopts->ip6h_nxt;
2644 
2645                         if (!label_separate) {
2646                                 secopt = NULL;
2647                                 after_secopt = whereptr;
2648                         } else {
2649                                 /*
2650                                  * We have dropped packets with bad options in
2651                                  * ip6_input. No need to check return value
2652                                  * here.
2653                                  */
2654                                 (void) tsol_find_secopt_v6(whereptr, ehdrlen,
2655                                     &secopt, &after_secopt, &hbh_needed);
2656                         }
2657                         if (secopt != NULL && after_secopt - whereptr > 0) {
2658                                 ipp->ipp_fields |= IPPF_LABEL_V6;
2659                                 ipp->ipp_label_v6 = secopt;
2660                                 ipp->ipp_label_len_v6 = after_secopt - whereptr;
2661                         } else {
2662                                 ipp->ipp_label_len_v6 = 0;
2663                                 after_secopt = whereptr;
2664                                 hbh_needed = B_TRUE;
2665                         }
2666                         /* return only 1st hbh */
2667                         if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) {
2668                                 ipp->ipp_fields |= IPPF_HOPOPTS;
2669                                 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt;
2670                                 ipp->ipp_hopoptslen = ehdrlen -
2671                                     ipp->ipp_label_len_v6;
2672                         }
2673                         break;
2674                 }
2675                 case IPPROTO_DSTOPTS:
2676                         tmpdstopts = (ip6_dest_t *)whereptr;
2677                         ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2678                         if ((uchar_t *)tmpdstopts +  ehdrlen > endptr)
2679                                 goto done;
2680                         nexthdr = tmpdstopts->ip6d_nxt;
2681                         /*
2682                          * ipp_dstopts is set to the destination header after a
2683                          * routing header.
2684                          * Assume it is a post-rthdr destination header
2685                          * and adjust when we find an rthdr.
2686                          */
2687                         if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2688                                 ipp->ipp_fields |= IPPF_DSTOPTS;
2689                                 ipp->ipp_dstopts = tmpdstopts;
2690                                 ipp->ipp_dstoptslen = ehdrlen;
2691                         }
2692                         break;
2693                 case IPPROTO_ROUTING:
2694                         tmprthdr = (ip6_rthdr_t *)whereptr;
2695                         ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2696                         if ((uchar_t *)tmprthdr +  ehdrlen > endptr)
2697                                 goto done;
2698                         nexthdr = tmprthdr->ip6r_nxt;
2699                         /* return only 1st rthdr */
2700                         if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2701                                 ipp->ipp_fields |= IPPF_RTHDR;
2702                                 ipp->ipp_rthdr = tmprthdr;
2703                                 ipp->ipp_rthdrlen = ehdrlen;
2704                         }
2705                         /*
2706                          * Make any destination header we've seen be a
2707                          * pre-rthdr destination header.
2708                          */
2709                         if (ipp->ipp_fields & IPPF_DSTOPTS) {
2710                                 ipp->ipp_fields &= ~IPPF_DSTOPTS;
2711                                 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2712                                 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2713                                 ipp->ipp_dstopts = NULL;
2714                                 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2715                                 ipp->ipp_dstoptslen = 0;
2716                         }
2717                         break;
2718                 case IPPROTO_FRAGMENT:
2719                         tmpfraghdr = (ip6_frag_t *)whereptr;
2720                         ehdrlen = sizeof (ip6_frag_t);
2721                         if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2722                                 goto done;
2723                         nexthdr = tmpfraghdr->ip6f_nxt;
2724                         if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2725                                 ipp->ipp_fields |= IPPF_FRAGHDR;
2726                                 ipp->ipp_fraghdr = tmpfraghdr;
2727                                 ipp->ipp_fraghdrlen = ehdrlen;
2728                         }
2729                         break;
2730                 case IPPROTO_NONE:
2731                 default:
2732                         goto done;
2733                 }
2734                 length += ehdrlen;
2735                 whereptr += ehdrlen;
2736         }
2737 done:
2738         if (nexthdrp != NULL)
2739                 *nexthdrp = nexthdr;
2740         return (length);
2741 }
2742 
2743 /*
2744  * Try to determine where and what are the IPv6 header length and
2745  * pointer to nexthdr value for the upper layer protocol (or an
2746  * unknown next hdr).
2747  *
2748  * Parameters returns a pointer to the nexthdr value;
2749  * Must handle malformed packets of various sorts.
2750  * Function returns failure for malformed cases.
2751  */
2752 boolean_t
2753 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2754     uint8_t **nexthdrpp)
2755 {
2756         uint16_t length;
2757         uint_t  ehdrlen;
2758         uint8_t *nexthdrp;
2759         uint8_t *whereptr;
2760         uint8_t *endptr;
2761         ip6_dest_t *desthdr;
2762         ip6_rthdr_t *rthdr;
2763         ip6_frag_t *fraghdr;
2764 
2765         ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2766         length = IPV6_HDR_LEN;
2767         whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2768         endptr = mp->b_wptr;
2769 
2770         nexthdrp = &ip6h->ip6_nxt;
2771         while (whereptr < endptr) {
2772                 /* Is there enough left for len + nexthdr? */
2773                 if (whereptr + MIN_EHDR_LEN > endptr)
2774                         break;
2775 
2776                 switch (*nexthdrp) {
2777                 case IPPROTO_HOPOPTS:
2778                 case IPPROTO_DSTOPTS:
2779                         /* Assumes the headers are identical for hbh and dst */
2780                         desthdr = (ip6_dest_t *)whereptr;
2781                         ehdrlen = 8 * (desthdr->ip6d_len + 1);
2782                         if ((uchar_t *)desthdr +  ehdrlen > endptr)
2783                                 return (B_FALSE);
2784                         nexthdrp = &desthdr->ip6d_nxt;
2785                         break;
2786                 case IPPROTO_ROUTING:
2787                         rthdr = (ip6_rthdr_t *)whereptr;
2788                         ehdrlen =  8 * (rthdr->ip6r_len + 1);
2789                         if ((uchar_t *)rthdr +  ehdrlen > endptr)
2790                                 return (B_FALSE);
2791                         nexthdrp = &rthdr->ip6r_nxt;
2792                         break;
2793                 case IPPROTO_FRAGMENT:
2794                         fraghdr = (ip6_frag_t *)whereptr;
2795                         ehdrlen = sizeof (ip6_frag_t);
2796                         if ((uchar_t *)&fraghdr[1] > endptr)
2797                                 return (B_FALSE);
2798                         nexthdrp = &fraghdr->ip6f_nxt;
2799                         break;
2800                 case IPPROTO_NONE:
2801                         /* No next header means we're finished */
2802                 default:
2803                         *hdr_length_ptr = length;
2804                         *nexthdrpp = nexthdrp;
2805                         return (B_TRUE);
2806                 }
2807                 length += ehdrlen;
2808                 whereptr += ehdrlen;
2809                 *hdr_length_ptr = length;
2810                 *nexthdrpp = nexthdrp;
2811         }
2812         switch (*nexthdrp) {
2813         case IPPROTO_HOPOPTS:
2814         case IPPROTO_DSTOPTS:
2815         case IPPROTO_ROUTING:
2816         case IPPROTO_FRAGMENT:
2817                 /*
2818                  * If any know extension headers are still to be processed,
2819                  * the packet's malformed (or at least all the IP header(s) are
2820                  * not in the same mblk - and that should never happen.
2821                  */
2822                 return (B_FALSE);
2823 
2824         default:
2825                 /*
2826                  * If we get here, we know that all of the IP headers were in
2827                  * the same mblk, even if the ULP header is in the next mblk.
2828                  */
2829                 *hdr_length_ptr = length;
2830                 *nexthdrpp = nexthdrp;
2831                 return (B_TRUE);
2832         }
2833 }
2834 
2835 /*
2836  * Return the length of the IPv6 related headers (including extension headers)
2837  * Returns a length even if the packet is malformed.
2838  */
2839 int
2840 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2841 {
2842         uint16_t hdr_len;
2843         uint8_t *nexthdrp;
2844 
2845         (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2846         return (hdr_len);
2847 }
2848 
2849 /*
2850  * Parse and process any hop-by-hop or destination options.
2851  *
2852  * Assumes that q is an ill read queue so that ICMP errors for link-local
2853  * destinations are sent out the correct interface.
2854  *
2855  * Returns -1 if there was an error and mp has been consumed.
2856  * Returns 0 if no special action is needed.
2857  * Returns 1 if the packet contained a router alert option for this node
2858  * which is verified to be "interesting/known" for our implementation.
2859  *
2860  * XXX Note: In future as more hbh or dest options are defined,
2861  * it may be better to have different routines for hbh and dest
2862  * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2863  * may have same value in different namespaces. Or is it same namespace ??
2864  * Current code checks for each opt_type (other than pads) if it is in
2865  * the expected  nexthdr (hbh or dest)
2866  */
2867 int
2868 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2869     uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2870 {
2871         uint8_t opt_type;
2872         uint_t optused;
2873         int ret = 0;
2874         const char *errtype;
2875         ill_t           *ill = ira->ira_ill;
2876         ip_stack_t      *ipst = ill->ill_ipst;
2877 
2878         while (optlen != 0) {
2879                 opt_type = *optptr;
2880                 if (opt_type == IP6OPT_PAD1) {
2881                         optused = 1;
2882                 } else {
2883                         if (optlen < 2)
2884                                 goto bad_opt;
2885                         errtype = "malformed";
2886                         if (opt_type == ip6opt_ls) {
2887                                 optused = 2 + optptr[1];
2888                                 if (optused > optlen)
2889                                         goto bad_opt;
2890                         } else switch (opt_type) {
2891                         case IP6OPT_PADN:
2892                                 /*
2893                                  * Note:We don't verify that (N-2) pad octets
2894                                  * are zero as required by spec. Adhere to
2895                                  * "be liberal in what you accept..." part of
2896                                  * implementation philosophy (RFC791,RFC1122)
2897                                  */
2898                                 optused = 2 + optptr[1];
2899                                 if (optused > optlen)
2900                                         goto bad_opt;
2901                                 break;
2902 
2903                         case IP6OPT_JUMBO:
2904                                 if (hdr_type != IPPROTO_HOPOPTS)
2905                                         goto opt_error;
2906                                 goto opt_error; /* XXX Not implemented! */
2907 
2908                         case IP6OPT_ROUTER_ALERT: {
2909                                 struct ip6_opt_router *or;
2910 
2911                                 if (hdr_type != IPPROTO_HOPOPTS)
2912                                         goto opt_error;
2913                                 optused = 2 + optptr[1];
2914                                 if (optused > optlen)
2915                                         goto bad_opt;
2916                                 or = (struct ip6_opt_router *)optptr;
2917                                 /* Check total length and alignment */
2918                                 if (optused != sizeof (*or) ||
2919                                     ((uintptr_t)or->ip6or_value & 0x1) != 0)
2920                                         goto opt_error;
2921                                 /* Check value */
2922                                 switch (*((uint16_t *)or->ip6or_value)) {
2923                                 case IP6_ALERT_MLD:
2924                                 case IP6_ALERT_RSVP:
2925                                         ret = 1;
2926                                 }
2927                                 break;
2928                         }
2929                         case IP6OPT_HOME_ADDRESS: {
2930                                 /*
2931                                  * Minimal support for the home address option
2932                                  * (which is required by all IPv6 nodes).
2933                                  * Implement by just swapping the home address
2934                                  * and source address.
2935                                  * XXX Note: this has IPsec implications since
2936                                  * AH needs to take this into account.
2937                                  * Also, when IPsec is used we need to ensure
2938                                  * that this is only processed once
2939                                  * in the received packet (to avoid swapping
2940                                  * back and forth).
2941                                  * NOTE:This option processing is considered
2942                                  * to be unsafe and prone to a denial of
2943                                  * service attack.
2944                                  * The current processing is not safe even with
2945                                  * IPsec secured IP packets. Since the home
2946                                  * address option processing requirement still
2947                                  * is in the IETF draft and in the process of
2948                                  * being redefined for its usage, it has been
2949                                  * decided to turn off the option by default.
2950                                  * If this section of code needs to be executed,
2951                                  * ndd variable ip6_ignore_home_address_opt
2952                                  * should be set to 0 at the user's own risk.
2953                                  */
2954                                 struct ip6_opt_home_address *oh;
2955                                 in6_addr_t tmp;
2956 
2957                                 if (ipst->ips_ipv6_ignore_home_address_opt)
2958                                         goto opt_error;
2959 
2960                                 if (hdr_type != IPPROTO_DSTOPTS)
2961                                         goto opt_error;
2962                                 optused = 2 + optptr[1];
2963                                 if (optused > optlen)
2964                                         goto bad_opt;
2965 
2966                                 /*
2967                                  * We did this dest. opt the first time
2968                                  * around (i.e. before AH processing).
2969                                  * If we've done AH... stop now.
2970                                  */
2971                                 if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2972                                     ira->ira_ipsec_ah_sa != NULL)
2973                                         break;
2974 
2975                                 oh = (struct ip6_opt_home_address *)optptr;
2976                                 /* Check total length and alignment */
2977                                 if (optused < sizeof (*oh) ||
2978                                     ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2979                                         goto opt_error;
2980                                 /* Swap ip6_src and the home address */
2981                                 tmp = ip6h->ip6_src;
2982                                 /* XXX Note: only 8 byte alignment option */
2983                                 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2984                                 *(in6_addr_t *)oh->ip6oh_addr = tmp;
2985                                 break;
2986                         }
2987 
2988                         case IP6OPT_TUNNEL_LIMIT:
2989                                 if (hdr_type != IPPROTO_DSTOPTS) {
2990                                         goto opt_error;
2991                                 }
2992                                 optused = 2 + optptr[1];
2993                                 if (optused > optlen) {
2994                                         goto bad_opt;
2995                                 }
2996                                 if (optused != 3) {
2997                                         goto opt_error;
2998                                 }
2999                                 break;
3000 
3001                         default:
3002                                 errtype = "unknown";
3003                                 /* FALLTHROUGH */
3004                         opt_error:
3005                                 /* Determine which zone should send error */
3006                                 switch (IP6OPT_TYPE(opt_type)) {
3007                                 case IP6OPT_TYPE_SKIP:
3008                                         optused = 2 + optptr[1];
3009                                         if (optused > optlen)
3010                                                 goto bad_opt;
3011                                         ip1dbg(("ip_process_options_v6: %s "
3012                                             "opt 0x%x skipped\n",
3013                                             errtype, opt_type));
3014                                         break;
3015                                 case IP6OPT_TYPE_DISCARD:
3016                                         ip1dbg(("ip_process_options_v6: %s "
3017                                             "opt 0x%x; packet dropped\n",
3018                                             errtype, opt_type));
3019                                         BUMP_MIB(ill->ill_ip_mib,
3020                                             ipIfStatsInHdrErrors);
3021                                         ip_drop_input("ipIfStatsInHdrErrors",
3022                                             mp, ill);
3023                                         freemsg(mp);
3024                                         return (-1);
3025                                 case IP6OPT_TYPE_ICMP:
3026                                         BUMP_MIB(ill->ill_ip_mib,
3027                                             ipIfStatsInHdrErrors);
3028                                         ip_drop_input("ipIfStatsInHdrErrors",
3029                                             mp, ill);
3030                                         icmp_param_problem_v6(mp,
3031                                             ICMP6_PARAMPROB_OPTION,
3032                                             (uint32_t)(optptr -
3033                                             (uint8_t *)ip6h),
3034                                             B_FALSE, ira);
3035                                         return (-1);
3036                                 case IP6OPT_TYPE_FORCEICMP:
3037                                         BUMP_MIB(ill->ill_ip_mib,
3038                                             ipIfStatsInHdrErrors);
3039                                         ip_drop_input("ipIfStatsInHdrErrors",
3040                                             mp, ill);
3041                                         icmp_param_problem_v6(mp,
3042                                             ICMP6_PARAMPROB_OPTION,
3043                                             (uint32_t)(optptr -
3044                                             (uint8_t *)ip6h),
3045                                             B_TRUE, ira);
3046                                         return (-1);
3047                                 default:
3048                                         ASSERT(0);
3049                                 }
3050                         }
3051                 }
3052                 optlen -= optused;
3053                 optptr += optused;
3054         }
3055         return (ret);
3056 
3057 bad_opt:
3058         /* Determine which zone should send error */
3059         ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3060         icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
3061             (uint32_t)(optptr - (uint8_t *)ip6h),
3062             B_FALSE, ira);
3063         return (-1);
3064 }
3065 
3066 /*
3067  * Process a routing header that is not yet empty.
3068  * Because of RFC 5095, we now reject all route headers.
3069  */
3070 void
3071 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
3072     ip_recv_attr_t *ira)
3073 {
3074         ill_t           *ill = ira->ira_ill;
3075         ip_stack_t      *ipst = ill->ill_ipst;
3076 
3077         ASSERT(rth->ip6r_segleft != 0);
3078 
3079         if (!ipst->ips_ipv6_forward_src_routed) {
3080                 /* XXX Check for source routed out same interface? */
3081                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
3082                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
3083                 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
3084                 freemsg(mp);
3085                 return;
3086         }
3087 
3088         ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3089         icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3090             (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3091             B_FALSE, ira);
3092 }
3093 
3094 /*
3095  * Read side put procedure for IPv6 module.
3096  */
3097 void
3098 ip_rput_v6(queue_t *q, mblk_t *mp)
3099 {
3100         ill_t           *ill;
3101 
3102         ill = (ill_t *)q->q_ptr;
3103         if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3104                 union DL_primitives *dl;
3105 
3106                 dl = (union DL_primitives *)mp->b_rptr;
3107                 /*
3108                  * Things are opening or closing - only accept DLPI
3109                  * ack messages. If the stream is closing and ip_wsrv
3110                  * has completed, ip_close is out of the qwait, but has
3111                  * not yet completed qprocsoff. Don't proceed any further
3112                  * because the ill has been cleaned up and things hanging
3113                  * off the ill have been freed.
3114                  */
3115                 if ((mp->b_datap->db_type != M_PCPROTO) ||
3116                     (dl->dl_primitive == DL_UNITDATA_IND)) {
3117                         inet_freemsg(mp);
3118                         return;
3119                 }
3120         }
3121         if (DB_TYPE(mp) == M_DATA) {
3122                 struct mac_header_info_s mhi;
3123 
3124                 ip_mdata_to_mhi(ill, mp, &mhi);
3125                 ip_input_v6(ill, NULL, mp, &mhi);
3126         } else {
3127                 ip_rput_notdata(ill, mp);
3128         }
3129 }
3130 
3131 /*
3132  * Walk through the IPv6 packet in mp and see if there's an AH header
3133  * in it.  See if the AH header needs to get done before other headers in
3134  * the packet.  (Worker function for ipsec_early_ah_v6().)
3135  */
3136 #define IPSEC_HDR_DONT_PROCESS  0
3137 #define IPSEC_HDR_PROCESS       1
3138 #define IPSEC_MEMORY_ERROR      2 /* or malformed packet */
3139 static int
3140 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3141 {
3142         uint_t  length;
3143         uint_t  ehdrlen;
3144         uint8_t *whereptr;
3145         uint8_t *endptr;
3146         uint8_t *nexthdrp;
3147         ip6_dest_t *desthdr;
3148         ip6_rthdr_t *rthdr;
3149         ip6_t   *ip6h;
3150 
3151         /*
3152          * For now just pullup everything.  In general, the less pullups,
3153          * the better, but there's so much squirrelling through anyway,
3154          * it's just easier this way.
3155          */
3156         if (!pullupmsg(mp, -1)) {
3157                 return (IPSEC_MEMORY_ERROR);
3158         }
3159 
3160         ip6h = (ip6_t *)mp->b_rptr;
3161         length = IPV6_HDR_LEN;
3162         whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3163         endptr = mp->b_wptr;
3164 
3165         /*
3166          * We can't just use the argument nexthdr in the place
3167          * of nexthdrp becaue we don't dereference nexthdrp
3168          * till we confirm whether it is a valid address.
3169          */
3170         nexthdrp = &ip6h->ip6_nxt;
3171         while (whereptr < endptr) {
3172                 /* Is there enough left for len + nexthdr? */
3173                 if (whereptr + MIN_EHDR_LEN > endptr)
3174                         return (IPSEC_MEMORY_ERROR);
3175 
3176                 switch (*nexthdrp) {
3177                 case IPPROTO_HOPOPTS:
3178                 case IPPROTO_DSTOPTS:
3179                         /* Assumes the headers are identical for hbh and dst */
3180                         desthdr = (ip6_dest_t *)whereptr;
3181                         ehdrlen = 8 * (desthdr->ip6d_len + 1);
3182                         if ((uchar_t *)desthdr +  ehdrlen > endptr)
3183                                 return (IPSEC_MEMORY_ERROR);
3184                         /*
3185                          * Return DONT_PROCESS because the destination
3186                          * options header may be for each hop in a
3187                          * routing-header, and we only want AH if we're
3188                          * finished with routing headers.
3189                          */
3190                         if (*nexthdrp == IPPROTO_DSTOPTS)
3191                                 return (IPSEC_HDR_DONT_PROCESS);
3192                         nexthdrp = &desthdr->ip6d_nxt;
3193                         break;
3194                 case IPPROTO_ROUTING:
3195                         rthdr = (ip6_rthdr_t *)whereptr;
3196 
3197                         /*
3198                          * If there's more hops left on the routing header,
3199                          * return now with DON'T PROCESS.
3200                          */
3201                         if (rthdr->ip6r_segleft > 0)
3202                                 return (IPSEC_HDR_DONT_PROCESS);
3203 
3204                         ehdrlen =  8 * (rthdr->ip6r_len + 1);
3205                         if ((uchar_t *)rthdr +  ehdrlen > endptr)
3206                                 return (IPSEC_MEMORY_ERROR);
3207                         nexthdrp = &rthdr->ip6r_nxt;
3208                         break;
3209                 case IPPROTO_FRAGMENT:
3210                         /* Wait for reassembly */
3211                         return (IPSEC_HDR_DONT_PROCESS);
3212                 case IPPROTO_AH:
3213                         *nexthdr = IPPROTO_AH;
3214                         return (IPSEC_HDR_PROCESS);
3215                 case IPPROTO_NONE:
3216                         /* No next header means we're finished */
3217                 default:
3218                         return (IPSEC_HDR_DONT_PROCESS);
3219                 }
3220                 length += ehdrlen;
3221                 whereptr += ehdrlen;
3222         }
3223         /*
3224          * Malformed/truncated packet.
3225          */
3226         return (IPSEC_MEMORY_ERROR);
3227 }
3228 
3229 /*
3230  * Path for AH if options are present.
3231  * Returns NULL if the mblk was consumed.
3232  *
3233  * Sometimes AH needs to be done before other IPv6 headers for security
3234  * reasons.  This function (and its ipsec_needs_processing_v6() above)
3235  * indicates if that is so, and fans out to the appropriate IPsec protocol
3236  * for the datagram passed in.
3237  */
3238 mblk_t *
3239 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3240 {
3241         uint8_t nexthdr;
3242         ah_t *ah;
3243         ill_t           *ill = ira->ira_ill;
3244         ip_stack_t      *ipst = ill->ill_ipst;
3245         ipsec_stack_t   *ipss = ipst->ips_netstack->netstack_ipsec;
3246 
3247         switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3248         case IPSEC_MEMORY_ERROR:
3249                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3250                 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3251                 freemsg(mp);
3252                 return (NULL);
3253         case IPSEC_HDR_DONT_PROCESS:
3254                 return (mp);
3255         }
3256 
3257         /* Default means send it to AH! */
3258         ASSERT(nexthdr == IPPROTO_AH);
3259 
3260         if (!ipsec_loaded(ipss)) {
3261                 ip_proto_not_sup(mp, ira);
3262                 return (NULL);
3263         }
3264 
3265         mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3266         if (mp == NULL)
3267                 return (NULL);
3268         ASSERT(ah != NULL);
3269         ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3270         ASSERT(ira->ira_ipsec_ah_sa != NULL);
3271         ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3272         mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3273 
3274         if (mp == NULL) {
3275                 /*
3276                  * Either it failed or is pending. In the former case
3277                  * ipIfStatsInDiscards was increased.
3278                  */
3279                 return (NULL);
3280         }
3281 
3282         /* we're done with IPsec processing, send it up */
3283         ip_input_post_ipsec(mp, ira);
3284         return (NULL);
3285 }
3286 
3287 /*
3288  * Reassemble fragment.
3289  * When it returns a completed message the first mblk will only contain
3290  * the headers prior to the fragment header, with the nexthdr value updated
3291  * to be the header after the fragment header.
3292  */
3293 mblk_t *
3294 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3295     ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3296 {
3297         uint32_t        ident = ntohl(fraghdr->ip6f_ident);
3298         uint16_t        offset;
3299         boolean_t       more_frags;
3300         uint8_t         nexthdr = fraghdr->ip6f_nxt;
3301         in6_addr_t      *v6dst_ptr;
3302         in6_addr_t      *v6src_ptr;
3303         uint_t          end;
3304         uint_t          hdr_length;
3305         size_t          count;
3306         ipf_t           *ipf;
3307         ipf_t           **ipfp;
3308         ipfb_t          *ipfb;
3309         mblk_t          *mp1;
3310         uint8_t         ecn_info = 0;
3311         size_t          msg_len;
3312         mblk_t          *tail_mp;
3313         mblk_t          *t_mp;
3314         boolean_t       pruned = B_FALSE;
3315         uint32_t        sum_val;
3316         uint16_t        sum_flags;
3317         ill_t           *ill = ira->ira_ill;
3318         ip_stack_t      *ipst = ill->ill_ipst;
3319         uint_t          prev_nexthdr_offset;
3320         uint8_t         prev_nexthdr;
3321         uint8_t         *ptr;
3322         uint32_t        packet_size;
3323 
3324         /*
3325          * We utilize hardware computed checksum info only for UDP since
3326          * IP fragmentation is a normal occurence for the protocol.  In
3327          * addition, checksum offload support for IP fragments carrying
3328          * UDP payload is commonly implemented across network adapters.
3329          */
3330         ASSERT(ira->ira_rill != NULL);
3331         if (nexthdr == IPPROTO_UDP && dohwcksum &&
3332             ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3333             (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3334                 mblk_t *mp1 = mp->b_cont;
3335                 int32_t len;
3336 
3337                 /* Record checksum information from the packet */
3338                 sum_val = (uint32_t)DB_CKSUM16(mp);
3339                 sum_flags = DB_CKSUMFLAGS(mp);
3340 
3341                 /* fragmented payload offset from beginning of mblk */
3342                 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3343 
3344                 if ((sum_flags & HCK_PARTIALCKSUM) &&
3345                     (mp1 == NULL || mp1->b_cont == NULL) &&
3346                     offset >= DB_CKSUMSTART(mp) &&
3347                     ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3348                         uint32_t adj;
3349                         /*
3350                          * Partial checksum has been calculated by hardware
3351                          * and attached to the packet; in addition, any
3352                          * prepended extraneous data is even byte aligned.
3353                          * If any such data exists, we adjust the checksum;
3354                          * this would also handle any postpended data.
3355                          */
3356                         IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3357                             mp, mp1, len, adj);
3358 
3359                         /* One's complement subtract extraneous checksum */
3360                         if (adj >= sum_val)
3361                                 sum_val = ~(adj - sum_val) & 0xFFFF;
3362                         else
3363                                 sum_val -= adj;
3364                 }
3365         } else {
3366                 sum_val = 0;
3367                 sum_flags = 0;
3368         }
3369 
3370         /* Clear hardware checksumming flag */
3371         DB_CKSUMFLAGS(mp) = 0;
3372 
3373         /*
3374          * Determine the offset (from the begining of the IP header)
3375          * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3376          * this when removing the fragment header from the packet.
3377          * This packet consists of the IPv6 header, a potential
3378          * hop-by-hop options header, a potential pre-routing-header
3379          * destination options header, and a potential routing header.
3380          */
3381         prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3382         prev_nexthdr = ip6h->ip6_nxt;
3383         ptr = (uint8_t *)&ip6h[1];
3384 
3385         if (prev_nexthdr == IPPROTO_HOPOPTS) {
3386                 ip6_hbh_t       *hbh_hdr;
3387                 uint_t          hdr_len;
3388 
3389                 hbh_hdr = (ip6_hbh_t *)ptr;
3390                 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3391                 prev_nexthdr = hbh_hdr->ip6h_nxt;
3392                 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3393                     - (uint8_t *)ip6h;
3394                 ptr += hdr_len;
3395         }
3396         if (prev_nexthdr == IPPROTO_DSTOPTS) {
3397                 ip6_dest_t      *dest_hdr;
3398                 uint_t          hdr_len;
3399 
3400                 dest_hdr = (ip6_dest_t *)ptr;
3401                 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3402                 prev_nexthdr = dest_hdr->ip6d_nxt;
3403                 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3404                     - (uint8_t *)ip6h;
3405                 ptr += hdr_len;
3406         }
3407         if (prev_nexthdr == IPPROTO_ROUTING) {
3408                 ip6_rthdr_t     *rthdr;
3409                 uint_t          hdr_len;
3410 
3411                 rthdr = (ip6_rthdr_t *)ptr;
3412                 prev_nexthdr = rthdr->ip6r_nxt;
3413                 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3414                     - (uint8_t *)ip6h;
3415                 hdr_len = 8 * (rthdr->ip6r_len + 1);
3416                 ptr += hdr_len;
3417         }
3418         if (prev_nexthdr != IPPROTO_FRAGMENT) {
3419                 /* Can't handle other headers before the fragment header */
3420                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3421                 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3422                 freemsg(mp);
3423                 return (NULL);
3424         }
3425 
3426         /*
3427          * Note: Fragment offset in header is in 8-octet units.
3428          * Clearing least significant 3 bits not only extracts
3429          * it but also gets it in units of octets.
3430          */
3431         offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3432         more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3433 
3434         /*
3435          * Is the more frags flag on and the payload length not a multiple
3436          * of eight?
3437          */
3438         if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3439                 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3440                 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3441                     (uint32_t)((char *)&ip6h->ip6_plen -
3442                     (char *)ip6h), B_FALSE, ira);
3443                 return (NULL);
3444         }
3445 
3446         v6src_ptr = &ip6h->ip6_src;
3447         v6dst_ptr = &ip6h->ip6_dst;
3448         end = remlen;
3449 
3450         hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3451         end += offset;
3452 
3453         /*
3454          * Would fragment cause reassembled packet to have a payload length
3455          * greater than IP_MAXPACKET - the max payload size?
3456          */
3457         if (end > IP_MAXPACKET) {
3458                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3459                 ip_drop_input("Reassembled packet too large", mp, ill);
3460                 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3461                     (uint32_t)((char *)&fraghdr->ip6f_offlg -
3462                     (char *)ip6h), B_FALSE, ira);
3463                 return (NULL);
3464         }
3465 
3466         /*
3467          * This packet just has one fragment. Reassembly not
3468          * needed.
3469          */
3470         if (!more_frags && offset == 0) {
3471                 goto reass_done;
3472         }
3473 
3474         /*
3475          * Drop the fragmented as early as possible, if
3476          * we don't have resource(s) to re-assemble.
3477          */
3478         if (ipst->ips_ip_reass_queue_bytes == 0) {
3479                 freemsg(mp);
3480                 return (NULL);
3481         }
3482 
3483         /* Record the ECN field info. */
3484         ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3485         /*
3486          * If this is not the first fragment, dump the unfragmentable
3487          * portion of the packet.
3488          */
3489         if (offset)
3490                 mp->b_rptr = (uchar_t *)&fraghdr[1];
3491 
3492         /*
3493          * Fragmentation reassembly.  Each ILL has a hash table for
3494          * queueing packets undergoing reassembly for all IPIFs
3495          * associated with the ILL.  The hash is based on the packet
3496          * IP ident field.  The ILL frag hash table was allocated
3497          * as a timer block at the time the ILL was created.  Whenever
3498          * there is anything on the reassembly queue, the timer will
3499          * be running.
3500          */
3501         /* Handle vnic loopback of fragments */
3502         if (mp->b_datap->db_ref > 2)
3503                 msg_len = 0;
3504         else
3505                 msg_len = MBLKSIZE(mp);
3506 
3507         tail_mp = mp;
3508         while (tail_mp->b_cont != NULL) {
3509                 tail_mp = tail_mp->b_cont;
3510                 if (tail_mp->b_datap->db_ref <= 2)
3511                         msg_len += MBLKSIZE(tail_mp);
3512         }
3513         /*
3514          * If the reassembly list for this ILL will get too big
3515          * prune it.
3516          */
3517 
3518         if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3519             ipst->ips_ip_reass_queue_bytes) {
3520                 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3521                     uint_t, ill->ill_frag_count,
3522                     uint_t, ipst->ips_ip_reass_queue_bytes);
3523                 ill_frag_prune(ill,
3524                     (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3525                     (ipst->ips_ip_reass_queue_bytes - msg_len));
3526                 pruned = B_TRUE;
3527         }
3528 
3529         ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3530         mutex_enter(&ipfb->ipfb_lock);
3531 
3532         ipfp = &ipfb->ipfb_ipf;
3533         /* Try to find an existing fragment queue for this packet. */
3534         for (;;) {
3535                 ipf = ipfp[0];
3536                 if (ipf) {
3537                         /*
3538                          * It has to match on ident, source address, and
3539                          * dest address.
3540                          */
3541                         if (ipf->ipf_ident == ident &&
3542                             IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3543                             IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3544 
3545                                 /*
3546                                  * If we have received too many
3547                                  * duplicate fragments for this packet
3548                                  * free it.
3549                                  */
3550                                 if (ipf->ipf_num_dups > ip_max_frag_dups) {
3551                                         ill_frag_free_pkts(ill, ipfb, ipf, 1);
3552                                         freemsg(mp);
3553                                         mutex_exit(&ipfb->ipfb_lock);
3554                                         return (NULL);
3555                                 }
3556 
3557                                 break;
3558                         }
3559                         ipfp = &ipf->ipf_hash_next;
3560                         continue;
3561                 }
3562 
3563 
3564                 /*
3565                  * If we pruned the list, do we want to store this new
3566                  * fragment?. We apply an optimization here based on the
3567                  * fact that most fragments will be received in order.
3568                  * So if the offset of this incoming fragment is zero,
3569                  * it is the first fragment of a new packet. We will
3570                  * keep it.  Otherwise drop the fragment, as we have
3571                  * probably pruned the packet already (since the
3572                  * packet cannot be found).
3573                  */
3574 
3575                 if (pruned && offset != 0) {
3576                         mutex_exit(&ipfb->ipfb_lock);
3577                         freemsg(mp);
3578                         return (NULL);
3579                 }
3580 
3581                 /* New guy.  Allocate a frag message. */
3582                 mp1 = allocb(sizeof (*ipf), BPRI_MED);
3583                 if (!mp1) {
3584                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3585                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
3586                         freemsg(mp);
3587         partial_reass_done:
3588                         mutex_exit(&ipfb->ipfb_lock);
3589                         return (NULL);
3590                 }
3591 
3592                 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst))  {
3593                         /*
3594                          * Too many fragmented packets in this hash bucket.
3595                          * Free the oldest.
3596                          */
3597                         ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3598                 }
3599 
3600                 mp1->b_cont = mp;
3601 
3602                 /* Initialize the fragment header. */
3603                 ipf = (ipf_t *)mp1->b_rptr;
3604                 ipf->ipf_mp = mp1;
3605                 ipf->ipf_ptphn = ipfp;
3606                 ipfp[0] = ipf;
3607                 ipf->ipf_hash_next = NULL;
3608                 ipf->ipf_ident = ident;
3609                 ipf->ipf_v6src = *v6src_ptr;
3610                 ipf->ipf_v6dst = *v6dst_ptr;
3611                 /* Record reassembly start time. */
3612                 ipf->ipf_timestamp = gethrestime_sec();
3613                 /* Record ipf generation and account for frag header */
3614                 ipf->ipf_gen = ill->ill_ipf_gen++;
3615                 ipf->ipf_count = MBLKSIZE(mp1);
3616                 ipf->ipf_protocol = nexthdr;
3617                 ipf->ipf_nf_hdr_len = 0;
3618                 ipf->ipf_prev_nexthdr_offset = 0;
3619                 ipf->ipf_last_frag_seen = B_FALSE;
3620                 ipf->ipf_ecn = ecn_info;
3621                 ipf->ipf_num_dups = 0;
3622                 ipfb->ipfb_frag_pkts++;
3623                 ipf->ipf_checksum = 0;
3624                 ipf->ipf_checksum_flags = 0;
3625 
3626                 /* Store checksum value in fragment header */
3627                 if (sum_flags != 0) {
3628                         sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3629                         sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3630                         ipf->ipf_checksum = sum_val;
3631                         ipf->ipf_checksum_flags = sum_flags;
3632                 }
3633 
3634                 /*
3635                  * We handle reassembly two ways.  In the easy case,
3636                  * where all the fragments show up in order, we do
3637                  * minimal bookkeeping, and just clip new pieces on
3638                  * the end.  If we ever see a hole, then we go off
3639                  * to ip_reassemble which has to mark the pieces and
3640                  * keep track of the number of holes, etc.  Obviously,
3641                  * the point of having both mechanisms is so we can
3642                  * handle the easy case as efficiently as possible.
3643                  */
3644                 if (offset == 0) {
3645                         /* Easy case, in-order reassembly so far. */
3646                         /* Update the byte count */
3647                         ipf->ipf_count += msg_len;
3648                         ipf->ipf_tail_mp = tail_mp;
3649                         /*
3650                          * Keep track of next expected offset in
3651                          * ipf_end.
3652                          */
3653                         ipf->ipf_end = end;
3654                         ipf->ipf_nf_hdr_len = hdr_length;
3655                         ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3656                 } else {
3657                         /* Hard case, hole at the beginning. */
3658                         ipf->ipf_tail_mp = NULL;
3659                         /*
3660                          * ipf_end == 0 means that we have given up
3661                          * on easy reassembly.
3662                          */
3663                         ipf->ipf_end = 0;
3664 
3665                         /* Forget checksum offload from now on */
3666                         ipf->ipf_checksum_flags = 0;
3667 
3668                         /*
3669                          * ipf_hole_cnt is set by ip_reassemble.
3670                          * ipf_count is updated by ip_reassemble.
3671                          * No need to check for return value here
3672                          * as we don't expect reassembly to complete or
3673                          * fail for the first fragment itself.
3674                          */
3675                         (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3676                             msg_len);
3677                 }
3678                 /* Update per ipfb and ill byte counts */
3679                 ipfb->ipfb_count += ipf->ipf_count;
3680                 ASSERT(ipfb->ipfb_count > 0);     /* Wraparound */
3681                 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3682                 /* If the frag timer wasn't already going, start it. */
3683                 mutex_enter(&ill->ill_lock);
3684                 ill_frag_timer_start(ill);
3685                 mutex_exit(&ill->ill_lock);
3686                 goto partial_reass_done;
3687         }
3688 
3689         /*
3690          * If the packet's flag has changed (it could be coming up
3691          * from an interface different than the previous, therefore
3692          * possibly different checksum capability), then forget about
3693          * any stored checksum states.  Otherwise add the value to
3694          * the existing one stored in the fragment header.
3695          */
3696         if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3697                 sum_val += ipf->ipf_checksum;
3698                 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3699                 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3700                 ipf->ipf_checksum = sum_val;
3701         } else if (ipf->ipf_checksum_flags != 0) {
3702                 /* Forget checksum offload from now on */
3703                 ipf->ipf_checksum_flags = 0;
3704         }
3705 
3706         /*
3707          * We have a new piece of a datagram which is already being
3708          * reassembled.  Update the ECN info if all IP fragments
3709          * are ECN capable.  If there is one which is not, clear
3710          * all the info.  If there is at least one which has CE
3711          * code point, IP needs to report that up to transport.
3712          */
3713         if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3714                 if (ecn_info == IPH_ECN_CE)
3715                         ipf->ipf_ecn = IPH_ECN_CE;
3716         } else {
3717                 ipf->ipf_ecn = IPH_ECN_NECT;
3718         }
3719 
3720         if (offset && ipf->ipf_end == offset) {
3721                 /* The new fragment fits at the end */
3722                 ipf->ipf_tail_mp->b_cont = mp;
3723                 /* Update the byte count */
3724                 ipf->ipf_count += msg_len;
3725                 /* Update per ipfb and ill byte counts */
3726                 ipfb->ipfb_count += msg_len;
3727                 ASSERT(ipfb->ipfb_count > 0);     /* Wraparound */
3728                 atomic_add_32(&ill->ill_frag_count, msg_len);
3729                 if (more_frags) {
3730                         /* More to come. */
3731                         ipf->ipf_end = end;
3732                         ipf->ipf_tail_mp = tail_mp;
3733                         goto partial_reass_done;
3734                 }
3735         } else {
3736                 /*
3737                  * Go do the hard cases.
3738                  * Call ip_reassemble().
3739                  */
3740                 int ret;
3741 
3742                 if (offset == 0) {
3743                         if (ipf->ipf_prev_nexthdr_offset == 0) {
3744                                 ipf->ipf_nf_hdr_len = hdr_length;
3745                                 ipf->ipf_prev_nexthdr_offset =
3746                                     prev_nexthdr_offset;
3747                         }
3748                 }
3749                 /* Save current byte count */
3750                 count = ipf->ipf_count;
3751                 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3752 
3753                 /* Count of bytes added and subtracted (freeb()ed) */
3754                 count = ipf->ipf_count - count;
3755                 if (count) {
3756                         /* Update per ipfb and ill byte counts */
3757                         ipfb->ipfb_count += count;
3758                         ASSERT(ipfb->ipfb_count > 0);     /* Wraparound */
3759                         atomic_add_32(&ill->ill_frag_count, count);
3760                 }
3761                 if (ret == IP_REASS_PARTIAL) {
3762                         goto partial_reass_done;
3763                 } else if (ret == IP_REASS_FAILED) {
3764                         /* Reassembly failed. Free up all resources */
3765                         ill_frag_free_pkts(ill, ipfb, ipf, 1);
3766                         for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3767                                 IP_REASS_SET_START(t_mp, 0);
3768                                 IP_REASS_SET_END(t_mp, 0);
3769                         }
3770                         freemsg(mp);
3771                         goto partial_reass_done;
3772                 }
3773 
3774                 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3775         }
3776         /*
3777          * We have completed reassembly.  Unhook the frag header from
3778          * the reassembly list.
3779          *
3780          * Grab the unfragmentable header length next header value out
3781          * of the first fragment
3782          */
3783         ASSERT(ipf->ipf_nf_hdr_len != 0);
3784         hdr_length = ipf->ipf_nf_hdr_len;
3785 
3786         /*
3787          * Before we free the frag header, record the ECN info
3788          * to report back to the transport.
3789          */
3790         ecn_info = ipf->ipf_ecn;
3791 
3792         /*
3793          * Store the nextheader field in the header preceding the fragment
3794          * header
3795          */
3796         nexthdr = ipf->ipf_protocol;
3797         prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3798         ipfp = ipf->ipf_ptphn;
3799 
3800         /* We need to supply these to caller */
3801         if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3802                 sum_val = ipf->ipf_checksum;
3803         else
3804                 sum_val = 0;
3805 
3806         mp1 = ipf->ipf_mp;
3807         count = ipf->ipf_count;
3808         ipf = ipf->ipf_hash_next;
3809         if (ipf)
3810                 ipf->ipf_ptphn = ipfp;
3811         ipfp[0] = ipf;
3812         atomic_add_32(&ill->ill_frag_count, -count);
3813         ASSERT(ipfb->ipfb_count >= count);
3814         ipfb->ipfb_count -= count;
3815         ipfb->ipfb_frag_pkts--;
3816         mutex_exit(&ipfb->ipfb_lock);
3817         /* Ditch the frag header. */
3818         mp = mp1->b_cont;
3819         freeb(mp1);
3820 
3821         /*
3822          * Make sure the packet is good by doing some sanity
3823          * check. If bad we can silentely drop the packet.
3824          */
3825 reass_done:
3826         if (hdr_length < sizeof (ip6_frag_t)) {
3827                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3828                 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3829                 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3830                 freemsg(mp);
3831                 return (NULL);
3832         }
3833 
3834         /*
3835          * Remove the fragment header from the initial header by
3836          * splitting the mblk into the non-fragmentable header and
3837          * everthing after the fragment extension header.  This has the
3838          * side effect of putting all the headers that need destination
3839          * processing into the b_cont block-- on return this fact is
3840          * used in order to avoid having to look at the extensions
3841          * already processed.
3842          *
3843          * Note that this code assumes that the unfragmentable portion
3844          * of the header is in the first mblk and increments
3845          * the read pointer past it.  If this assumption is broken
3846          * this code fails badly.
3847          */
3848         if (mp->b_rptr + hdr_length != mp->b_wptr) {
3849                 mblk_t *nmp;
3850 
3851                 if (!(nmp = dupb(mp))) {
3852                         ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3853                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3854                         ip_drop_input("ipIfStatsInDiscards", mp, ill);
3855                         freemsg(mp);
3856                         return (NULL);
3857                 }
3858                 nmp->b_cont = mp->b_cont;
3859                 mp->b_cont = nmp;
3860                 nmp->b_rptr += hdr_length;
3861         }
3862         mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3863 
3864         ip6h = (ip6_t *)mp->b_rptr;
3865         ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3866 
3867         /* Restore original IP length in header. */
3868         packet_size = msgdsize(mp);
3869         ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3870         /* Record the ECN info. */
3871         ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3872         ip6h->ip6_vcf |= htonl(ecn_info << 20);
3873 
3874         /* Update the receive attributes */
3875         ira->ira_pktlen = packet_size;
3876         ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3877         ira->ira_protocol = nexthdr;
3878 
3879         /* Reassembly is successful; set checksum information in packet */
3880         DB_CKSUM16(mp) = (uint16_t)sum_val;
3881         DB_CKSUMFLAGS(mp) = sum_flags;
3882         DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3883 
3884         return (mp);
3885 }
3886 
3887 /*
3888  * Given an mblk and a ptr, find the destination address in an IPv6 routing
3889  * header.
3890  */
3891 static in6_addr_t
3892 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3893 {
3894         ip6_rthdr0_t *rt0;
3895         int segleft, numaddr;
3896         in6_addr_t *ap, rv = oldrv;
3897 
3898         rt0 = (ip6_rthdr0_t *)whereptr;
3899         if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3900                 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3901                     uint8_t *, whereptr);
3902                 return (rv);
3903         }
3904         segleft = rt0->ip6r0_segleft;
3905         numaddr = rt0->ip6r0_len / 2;
3906 
3907         if ((rt0->ip6r0_len & 0x1) ||
3908             (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3909             (segleft > rt0->ip6r0_len / 2)) {
3910                 /*
3911                  * Corrupt packet.  Either the routing header length is odd
3912                  * (can't happen) or mismatched compared to the packet, or the
3913                  * number of addresses is.  Return what we can.  This will
3914                  * only be a problem on forwarded packets that get squeezed
3915                  * through an outbound tunnel enforcing IPsec Tunnel Mode.
3916                  */
3917                 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3918                     whereptr);
3919                 return (rv);
3920         }
3921 
3922         if (segleft != 0) {
3923                 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3924                 rv = ap[numaddr - 1];
3925         }
3926 
3927         return (rv);
3928 }
3929 
3930 /*
3931  * Walk through the options to see if there is a routing header.
3932  * If present get the destination which is the last address of
3933  * the option.
3934  * mp needs to be provided in cases when the extension headers might span
3935  * b_cont; mp is never modified by this function.
3936  */
3937 in6_addr_t
3938 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3939 {
3940         const mblk_t *current_mp = mp;
3941         uint8_t nexthdr;
3942         uint8_t *whereptr;
3943         int ehdrlen;
3944         in6_addr_t rv;
3945 
3946         whereptr = (uint8_t *)ip6h;
3947         ehdrlen = sizeof (ip6_t);
3948 
3949         /* We assume at least the IPv6 base header is within one mblk. */
3950         ASSERT(mp == NULL ||
3951             (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3952 
3953         rv = ip6h->ip6_dst;
3954         nexthdr = ip6h->ip6_nxt;
3955         if (is_fragment != NULL)
3956                 *is_fragment = B_FALSE;
3957 
3958         /*
3959          * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3960          * no extension headers will be split across mblks.
3961          */
3962 
3963         while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3964             nexthdr == IPPROTO_ROUTING) {
3965                 if (nexthdr == IPPROTO_ROUTING)
3966                         rv = pluck_out_dst(current_mp, whereptr, rv);
3967 
3968                 /*
3969                  * All IPv6 extension headers have the next-header in byte
3970                  * 0, and the (length - 8) in 8-byte-words.
3971                  */
3972                 while (current_mp != NULL &&
3973                     whereptr + ehdrlen >= current_mp->b_wptr) {
3974                         ehdrlen -= (current_mp->b_wptr - whereptr);
3975                         current_mp = current_mp->b_cont;
3976                         if (current_mp == NULL) {
3977                                 /* Bad packet.  Return what we can. */
3978                                 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3979                                     mp, mblk_t *, current_mp, ip6_t *, ip6h);
3980                                 goto done;
3981                         }
3982                         whereptr = current_mp->b_rptr;
3983                 }
3984                 whereptr += ehdrlen;
3985 
3986                 nexthdr = *whereptr;
3987                 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3988                 ehdrlen = (*(whereptr + 1) + 1) * 8;
3989         }
3990 
3991 done:
3992         if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3993                 *is_fragment = B_TRUE;
3994         return (rv);
3995 }
3996 
3997 /*
3998  * ip_source_routed_v6:
3999  * This function is called by redirect code (called from ip_input_v6) to
4000  * know whether this packet is source routed through this node i.e
4001  * whether this node (router) is part of the journey. This
4002  * function is called under two cases :
4003  *
4004  * case 1 : Routing header was processed by this node and
4005  *          ip_process_rthdr replaced ip6_dst with the next hop
4006  *          and we are forwarding the packet to the next hop.
4007  *
4008  * case 2 : Routing header was not processed by this node and we
4009  *          are just forwarding the packet.
4010  *
4011  * For case (1) we don't want to send redirects. For case(2) we
4012  * want to send redirects.
4013  */
4014 static boolean_t
4015 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
4016 {
4017         uint8_t         nexthdr;
4018         in6_addr_t      *addrptr;
4019         ip6_rthdr0_t    *rthdr;
4020         uint8_t         numaddr;
4021         ip6_hbh_t       *hbhhdr;
4022         uint_t          ehdrlen;
4023         uint8_t         *byteptr;
4024 
4025         ip2dbg(("ip_source_routed_v6\n"));
4026         nexthdr = ip6h->ip6_nxt;
4027         ehdrlen = IPV6_HDR_LEN;
4028 
4029         /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
4030         while (nexthdr == IPPROTO_HOPOPTS ||
4031             nexthdr == IPPROTO_DSTOPTS) {
4032                 byteptr = (uint8_t *)ip6h + ehdrlen;
4033                 /*
4034                  * Check if we have already processed
4035                  * packets or we are just a forwarding
4036                  * router which only pulled up msgs up
4037                  * to IPV6HDR and  one HBH ext header
4038                  */
4039                 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4040                         ip2dbg(("ip_source_routed_v6: Extension"
4041                             " headers not processed\n"));
4042                         return (B_FALSE);
4043                 }
4044                 hbhhdr = (ip6_hbh_t *)byteptr;
4045                 nexthdr = hbhhdr->ip6h_nxt;
4046                 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
4047         }
4048         switch (nexthdr) {
4049         case IPPROTO_ROUTING:
4050                 byteptr = (uint8_t *)ip6h + ehdrlen;
4051                 /*
4052                  * If for some reason, we haven't pulled up
4053                  * the routing hdr data mblk, then we must
4054                  * not have processed it at all. So for sure
4055                  * we are not part of the source routed journey.
4056                  */
4057                 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
4058                         ip2dbg(("ip_source_routed_v6: Routing"
4059                             " header not processed\n"));
4060                         return (B_FALSE);
4061                 }
4062                 rthdr = (ip6_rthdr0_t *)byteptr;
4063                 /*
4064                  * Either we are an intermediate router or the
4065                  * last hop before destination and we have
4066                  * already processed the routing header.
4067                  * If segment_left is greater than or equal to zero,
4068                  * then we must be the (numaddr - segleft) entry
4069                  * of the routing header. Although ip6r0_segleft
4070                  * is a unit8_t variable, we still check for zero
4071                  * or greater value, if in case the data type
4072                  * is changed someday in future.
4073                  */
4074                 if (rthdr->ip6r0_segleft > 0 ||
4075                     rthdr->ip6r0_segleft == 0) {
4076                         numaddr = rthdr->ip6r0_len / 2;
4077                         addrptr = (in6_addr_t *)((char *)rthdr +
4078                             sizeof (*rthdr));
4079                         addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
4080                         if (addrptr != NULL) {
4081                                 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
4082                                         return (B_TRUE);
4083                                 ip1dbg(("ip_source_routed_v6: Not local\n"));
4084                         }
4085                 }
4086         /* FALLTHRU */
4087         default:
4088                 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4089                 return (B_FALSE);
4090         }
4091 }
4092 
4093 /*
4094  * IPv6 fragmentation.  Essentially the same as IPv4 fragmentation.
4095  * We have not optimized this in terms of number of mblks
4096  * allocated. For instance, for each fragment sent we always allocate a
4097  * mblk to hold the IPv6 header and fragment header.
4098  *
4099  * Assumes that all the extension headers are contained in the first mblk
4100  * and that the fragment header has has already been added by calling
4101  * ip_fraghdr_add_v6.
4102  */
4103 int
4104 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4105     uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4106     pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4107 {
4108         ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
4109         ip6_t           *fip6h;
4110         mblk_t          *hmp;
4111         mblk_t          *hmp0;
4112         mblk_t          *dmp;
4113         ip6_frag_t      *fraghdr;
4114         size_t          unfragmentable_len;
4115         size_t          mlen;
4116         size_t          max_chunk;
4117         uint16_t        off_flags;
4118         uint16_t        offset = 0;
4119         ill_t           *ill = nce->nce_ill;
4120         uint8_t         nexthdr;
4121         uint8_t         *ptr;
4122         ip_stack_t      *ipst = ill->ill_ipst;
4123         uint_t          priority = mp->b_band;
4124         int             error = 0;
4125 
4126         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4127         if (max_frag == 0) {
4128                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4129                 ip_drop_output("FragFails: zero max_frag", mp, ill);
4130                 freemsg(mp);
4131                 return (EINVAL);
4132         }
4133 
4134         /*
4135          * Caller should have added fraghdr_t to pkt_len, and also
4136          * updated ip6_plen.
4137          */
4138         ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4139         ASSERT(msgdsize(mp) == pkt_len);
4140 
4141         /*
4142          * Determine the length of the unfragmentable portion of this
4143          * datagram.  This consists of the IPv6 header, a potential
4144          * hop-by-hop options header, a potential pre-routing-header
4145          * destination options header, and a potential routing header.
4146          */
4147         nexthdr = ip6h->ip6_nxt;
4148         ptr = (uint8_t *)&ip6h[1];
4149 
4150         if (nexthdr == IPPROTO_HOPOPTS) {
4151                 ip6_hbh_t       *hbh_hdr;
4152                 uint_t          hdr_len;
4153 
4154                 hbh_hdr = (ip6_hbh_t *)ptr;
4155                 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4156                 nexthdr = hbh_hdr->ip6h_nxt;
4157                 ptr += hdr_len;
4158         }
4159         if (nexthdr == IPPROTO_DSTOPTS) {
4160                 ip6_dest_t      *dest_hdr;
4161                 uint_t          hdr_len;
4162 
4163                 dest_hdr = (ip6_dest_t *)ptr;
4164                 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4165                         hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4166                         nexthdr = dest_hdr->ip6d_nxt;
4167                         ptr += hdr_len;
4168                 }
4169         }
4170         if (nexthdr == IPPROTO_ROUTING) {
4171                 ip6_rthdr_t     *rthdr;
4172                 uint_t          hdr_len;
4173 
4174                 rthdr = (ip6_rthdr_t *)ptr;
4175                 nexthdr = rthdr->ip6r_nxt;
4176                 hdr_len = 8 * (rthdr->ip6r_len + 1);
4177                 ptr += hdr_len;
4178         }
4179         if (nexthdr != IPPROTO_FRAGMENT) {
4180                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4181                 ip_drop_output("FragFails: bad nexthdr", mp, ill);
4182                 freemsg(mp);
4183                 return (EINVAL);
4184         }
4185         unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4186         unfragmentable_len += sizeof (ip6_frag_t);
4187 
4188         max_chunk = (max_frag - unfragmentable_len) & ~7;
4189 
4190         /*
4191          * Allocate an mblk with enough room for the link-layer
4192          * header and the unfragmentable part of the datagram, which includes
4193          * the fragment header.  This (or a copy) will be used as the
4194          * first mblk for each fragment we send.
4195          */
4196         hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4197         if (hmp == NULL) {
4198                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4199                 ip_drop_output("FragFails: no hmp", mp, ill);
4200                 freemsg(mp);
4201                 return (ENOBUFS);
4202         }
4203         hmp->b_rptr += ipst->ips_ip_wroff_extra;
4204         hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4205 
4206         fip6h = (ip6_t *)hmp->b_rptr;
4207         bcopy(ip6h, fip6h, unfragmentable_len);
4208 
4209         /*
4210          * pkt_len is set to the total length of the fragmentable data in this
4211          * datagram.  For each fragment sent, we will decrement pkt_len
4212          * by the amount of fragmentable data sent in that fragment
4213          * until len reaches zero.
4214          */
4215         pkt_len -= unfragmentable_len;
4216 
4217         /*
4218          * Move read ptr past unfragmentable portion, we don't want this part
4219          * of the data in our fragments.
4220          */
4221         mp->b_rptr += unfragmentable_len;
4222         if (mp->b_rptr == mp->b_wptr) {
4223                 mblk_t *mp1 = mp->b_cont;
4224                 freeb(mp);
4225                 mp = mp1;
4226         }
4227 
4228         while (pkt_len != 0) {
4229                 mlen = MIN(pkt_len, max_chunk);
4230                 pkt_len -= mlen;
4231                 if (pkt_len != 0) {
4232                         /* Not last */
4233                         hmp0 = copyb(hmp);
4234                         if (hmp0 == NULL) {
4235                                 BUMP_MIB(ill->ill_ip_mib,
4236                                     ipIfStatsOutFragFails);
4237                                 ip_drop_output("FragFails: copyb failed",
4238                                     mp, ill);
4239                                 freeb(hmp);
4240                                 freemsg(mp);
4241                                 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4242                                 return (ENOBUFS);
4243                         }
4244                         off_flags = IP6F_MORE_FRAG;
4245                 } else {
4246                         /* Last fragment */
4247                         hmp0 = hmp;
4248                         hmp = NULL;
4249                         off_flags = 0;
4250                 }
4251                 fip6h = (ip6_t *)(hmp0->b_rptr);
4252                 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4253                     sizeof (ip6_frag_t));
4254 
4255                 fip6h->ip6_plen = htons((uint16_t)(mlen +
4256                     unfragmentable_len - IPV6_HDR_LEN));
4257                 /*
4258                  * Note: Optimization alert.
4259                  * In IPv6 (and IPv4) protocol header, Fragment Offset
4260                  * ("offset") is 13 bits wide and in 8-octet units.
4261                  * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4262                  * it occupies the most significant 13 bits.
4263                  * (least significant 13 bits in IPv4).
4264                  * We do not do any shifts here. Not shifting is same effect
4265                  * as taking offset value in octet units, dividing by 8 and
4266                  * then shifting 3 bits left to line it up in place in proper
4267                  * place protocol header.
4268                  */
4269                 fraghdr->ip6f_offlg = htons(offset) | off_flags;
4270 
4271                 if (!(dmp = ip_carve_mp(&mp, mlen))) {
4272                         /* mp has already been freed by ip_carve_mp() */
4273                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4274                         ip_drop_output("FragFails: could not carve mp",
4275                             hmp0, ill);
4276                         if (hmp != NULL)
4277                                 freeb(hmp);
4278                         freeb(hmp0);
4279                         ip1dbg(("ip_carve_mp: failed\n"));
4280                         return (ENOBUFS);
4281                 }
4282                 hmp0->b_cont = dmp;
4283                 /* Get the priority marking, if any */
4284                 hmp0->b_band = priority;
4285 
4286                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4287 
4288                 error = postfragfn(hmp0, nce, ixaflags,
4289                     mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4290                     ixa_cookie);
4291                 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4292                         /* No point in sending the other fragments */
4293                         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4294                         ip_drop_output("FragFails: postfragfn failed",
4295                             hmp, ill);
4296                         freeb(hmp);
4297                         freemsg(mp);
4298                         return (error);
4299                 }
4300                 /* No need to redo state machine in loop */
4301                 ixaflags &= ~IXAF_REACH_CONF;
4302 
4303                 offset += mlen;
4304         }
4305         BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4306         return (error);
4307 }
4308 
4309 /*
4310  * Add a fragment header to an IPv6 packet.
4311  * Assumes that all the extension headers are contained in the first mblk.
4312  *
4313  * The fragment header is inserted after an hop-by-hop options header
4314  * and after [an optional destinations header followed by] a routing header.
4315  */
4316 mblk_t *
4317 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4318 {
4319         ip6_t           *ip6h = (ip6_t *)mp->b_rptr;
4320         ip6_t           *fip6h;
4321         mblk_t          *hmp;
4322         ip6_frag_t      *fraghdr;
4323         size_t          unfragmentable_len;
4324         uint8_t         nexthdr;
4325         uint_t          prev_nexthdr_offset;
4326         uint8_t         *ptr;
4327         uint_t          priority = mp->b_band;
4328         ip_stack_t      *ipst = ixa->ixa_ipst;
4329 
4330         /*
4331          * Determine the length of the unfragmentable portion of this
4332          * datagram.  This consists of the IPv6 header, a potential
4333          * hop-by-hop options header, a potential pre-routing-header
4334          * destination options header, and a potential routing header.
4335          */
4336         nexthdr = ip6h->ip6_nxt;
4337         prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4338         ptr = (uint8_t *)&ip6h[1];
4339 
4340         if (nexthdr == IPPROTO_HOPOPTS) {
4341                 ip6_hbh_t       *hbh_hdr;
4342                 uint_t          hdr_len;
4343 
4344                 hbh_hdr = (ip6_hbh_t *)ptr;
4345                 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4346                 nexthdr = hbh_hdr->ip6h_nxt;
4347                 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4348                     - (uint8_t *)ip6h;
4349                 ptr += hdr_len;
4350         }
4351         if (nexthdr == IPPROTO_DSTOPTS) {
4352                 ip6_dest_t      *dest_hdr;
4353                 uint_t          hdr_len;
4354 
4355                 dest_hdr = (ip6_dest_t *)ptr;
4356                 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4357                         hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4358                         nexthdr = dest_hdr->ip6d_nxt;
4359                         prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4360                             - (uint8_t *)ip6h;
4361                         ptr += hdr_len;
4362                 }
4363         }
4364         if (nexthdr == IPPROTO_ROUTING) {
4365                 ip6_rthdr_t     *rthdr;
4366                 uint_t          hdr_len;
4367 
4368                 rthdr = (ip6_rthdr_t *)ptr;
4369                 nexthdr = rthdr->ip6r_nxt;
4370                 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4371                     - (uint8_t *)ip6h;
4372                 hdr_len = 8 * (rthdr->ip6r_len + 1);
4373                 ptr += hdr_len;
4374         }
4375         unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4376 
4377         /*
4378          * Allocate an mblk with enough room for the link-layer
4379          * header, the unfragmentable part of the datagram, and the
4380          * fragment header.
4381          */
4382         hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4383             ipst->ips_ip_wroff_extra, mp);
4384         if (hmp == NULL) {
4385                 ill_t *ill = ixa->ixa_nce->nce_ill;
4386 
4387                 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4388                 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4389                 freemsg(mp);
4390                 return (NULL);
4391         }
4392         hmp->b_rptr += ipst->ips_ip_wroff_extra;
4393         hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4394 
4395         fip6h = (ip6_t *)hmp->b_rptr;
4396         fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4397 
4398         bcopy(ip6h, fip6h, unfragmentable_len);
4399         fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4400         hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4401 
4402         fraghdr->ip6f_nxt = nexthdr;
4403         fraghdr->ip6f_reserved = 0;
4404         fraghdr->ip6f_offlg = 0;
4405         fraghdr->ip6f_ident = htonl(ident);
4406 
4407         /* Get the priority marking, if any */
4408         hmp->b_band = priority;
4409 
4410         /*
4411          * Move read ptr past unfragmentable portion, we don't want this part
4412          * of the data in our fragments.
4413          */
4414         mp->b_rptr += unfragmentable_len;
4415         hmp->b_cont = mp;
4416         return (hmp);
4417 }
4418 
4419 /*
4420  * Determine if the ill and multicast aspects of that packets
4421  * "matches" the conn.
4422  */
4423 boolean_t
4424 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4425 {
4426         ill_t           *ill = ira->ira_rill;
4427         zoneid_t        zoneid = ira->ira_zoneid;
4428         uint_t          in_ifindex;
4429         in6_addr_t      *v6dst_ptr = &ip6h->ip6_dst;
4430         in6_addr_t      *v6src_ptr = &ip6h->ip6_src;
4431 
4432         /*
4433          * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4434          * scopeid. This is used to limit
4435          * unicast and multicast reception to conn_incoming_ifindex.
4436          * conn_wantpacket_v6 is called both for unicast and
4437          * multicast packets.
4438          */
4439         in_ifindex = connp->conn_incoming_ifindex;
4440 
4441         /* mpathd can bind to the under IPMP interface, which we allow */
4442         if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4443                 if (!IS_UNDER_IPMP(ill))
4444                         return (B_FALSE);
4445 
4446                 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4447                         return (B_FALSE);
4448         }
4449 
4450         if (!IPCL_ZONE_MATCH(connp, zoneid))
4451                 return (B_FALSE);
4452 
4453         if (!(ira->ira_flags & IRAF_MULTICAST))
4454                 return (B_TRUE);
4455 
4456         if (connp->conn_multi_router)
4457                 return (B_TRUE);
4458 
4459         if (ira->ira_protocol == IPPROTO_RSVP)
4460                 return (B_TRUE);
4461 
4462         return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4463             ira->ira_ill));
4464 }
4465 
4466 /*
4467  * pr_addr_dbg function provides the needed buffer space to call
4468  * inet_ntop() function's 3rd argument. This function should be
4469  * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4470  * stack buffer space in it's own stack frame. This function uses
4471  * a buffer from it's own stack and prints the information.
4472  * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4473  *
4474  * Note:    This function can call inet_ntop() once.
4475  */
4476 void
4477 pr_addr_dbg(char *fmt1, int af, const void *addr)
4478 {
4479         char    buf[INET6_ADDRSTRLEN];
4480 
4481         if (fmt1 == NULL) {
4482                 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4483                 return;
4484         }
4485 
4486         /*
4487          * This does not compare debug level and just prints
4488          * out. Thus it is the responsibility of the caller
4489          * to check the appropriate debug-level before calling
4490          * this function.
4491          */
4492         if (ip_debug > 0) {
4493                 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4494         }
4495 
4496 
4497 }
4498 
4499 
4500 /*
4501  * Return the length in bytes of the IPv6 headers (base header
4502  * extension headers) that will be needed based on the
4503  * ip_pkt_t structure passed by the caller.
4504  *
4505  * The returned length does not include the length of the upper level
4506  * protocol (ULP) header.
4507  */
4508 int
4509 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4510 {
4511         int len;
4512 
4513         len = IPV6_HDR_LEN;
4514 
4515         /*
4516          * If there's a security label here, then we ignore any hop-by-hop
4517          * options the user may try to set.
4518          */
4519         if (ipp->ipp_fields & IPPF_LABEL_V6) {
4520                 uint_t hopoptslen;
4521                 /*
4522                  * Note that ipp_label_len_v6 is just the option - not
4523                  * the hopopts extension header. It also needs to be padded
4524                  * to a multiple of 8 bytes.
4525                  */
4526                 ASSERT(ipp->ipp_label_len_v6 != 0);
4527                 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4528                 hopoptslen = (hopoptslen + 7)/8 * 8;
4529                 len += hopoptslen;
4530         } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4531                 ASSERT(ipp->ipp_hopoptslen != 0);
4532                 len += ipp->ipp_hopoptslen;
4533         }
4534 
4535         /*
4536          * En-route destination options
4537          * Only do them if there's a routing header as well
4538          */
4539         if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4540             (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4541                 ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4542                 len += ipp->ipp_rthdrdstoptslen;
4543         }
4544         if (ipp->ipp_fields & IPPF_RTHDR) {
4545                 ASSERT(ipp->ipp_rthdrlen != 0);
4546                 len += ipp->ipp_rthdrlen;
4547         }
4548         if (ipp->ipp_fields & IPPF_DSTOPTS) {
4549                 ASSERT(ipp->ipp_dstoptslen != 0);
4550                 len += ipp->ipp_dstoptslen;
4551         }
4552         return (len);
4553 }
4554 
4555 /*
4556  * All-purpose routine to build a header chain of an IPv6 header
4557  * followed by any required extension headers and a proto header.
4558  *
4559  * The caller has to set the source and destination address as well as
4560  * ip6_plen. The caller has to massage any routing header and compensate
4561  * for the ULP pseudo-header checksum due to the source route.
4562  *
4563  * The extension headers will all be fully filled in.
4564  */
4565 void
4566 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4567     uint8_t protocol, uint32_t flowinfo)
4568 {
4569         uint8_t *nxthdr_ptr;
4570         uint8_t *cp;
4571         ip6_t   *ip6h = (ip6_t *)buf;
4572 
4573         /* Initialize IPv6 header */
4574         ip6h->ip6_vcf =
4575             (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4576             (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4577 
4578         if (ipp->ipp_fields & IPPF_TCLASS) {
4579                 /* Overrides the class part of flowinfo */
4580                 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4581                     ipp->ipp_tclass);
4582         }
4583 
4584         if (ipp->ipp_fields & IPPF_HOPLIMIT)
4585                 ip6h->ip6_hops = ipp->ipp_hoplimit;
4586         else
4587                 ip6h->ip6_hops = ipp->ipp_unicast_hops;
4588 
4589         if ((ipp->ipp_fields & IPPF_ADDR) &&
4590             !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4591                 ip6h->ip6_src = ipp->ipp_addr;
4592 
4593         nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4594         cp = (uint8_t *)&ip6h[1];
4595         /*
4596          * Here's where we have to start stringing together
4597          * any extension headers in the right order:
4598          * Hop-by-hop, destination, routing, and final destination opts.
4599          */
4600         /*
4601          * If there's a security label here, then we ignore any hop-by-hop
4602          * options the user may try to set.
4603          */
4604         if (ipp->ipp_fields & IPPF_LABEL_V6) {
4605                 /*
4606                  * Hop-by-hop options with the label.
4607                  * Note that ipp_label_v6 is just the option - not
4608                  * the hopopts extension header. It also needs to be padded
4609                  * to a multiple of 8 bytes.
4610                  */
4611                 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4612                 uint_t hopoptslen;
4613                 uint_t padlen;
4614 
4615                 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t);
4616                 hopoptslen = (padlen + 7)/8 * 8;
4617                 padlen = hopoptslen - padlen;
4618 
4619                 *nxthdr_ptr = IPPROTO_HOPOPTS;
4620                 nxthdr_ptr = &hbh->ip6h_nxt;
4621                 hbh->ip6h_len = hopoptslen/8 - 1;
4622                 cp += sizeof (ip6_hbh_t);
4623                 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6);
4624                 cp += ipp->ipp_label_len_v6;
4625 
4626                 ASSERT(padlen <= 7);
4627                 switch (padlen) {
4628                 case 0:
4629                         break;
4630                 case 1:
4631                         cp[0] = IP6OPT_PAD1;
4632                         break;
4633                 default:
4634                         cp[0] = IP6OPT_PADN;
4635                         cp[1] = padlen - 2;
4636                         bzero(&cp[2], padlen - 2);
4637                         break;
4638                 }
4639                 cp += padlen;
4640         } else if (ipp->ipp_fields & IPPF_HOPOPTS) {
4641                 /* Hop-by-hop options */
4642                 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4643 
4644                 *nxthdr_ptr = IPPROTO_HOPOPTS;
4645                 nxthdr_ptr = &hbh->ip6h_nxt;
4646 
4647                 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4648                 cp += ipp->ipp_hopoptslen;
4649         }
4650         /*
4651          * En-route destination options
4652          * Only do them if there's a routing header as well
4653          */
4654         if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4655             (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4656                 ip6_dest_t *dst = (ip6_dest_t *)cp;
4657 
4658                 *nxthdr_ptr = IPPROTO_DSTOPTS;
4659                 nxthdr_ptr = &dst->ip6d_nxt;
4660 
4661                 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4662                 cp += ipp->ipp_rthdrdstoptslen;
4663         }
4664         /*
4665          * Routing header next
4666          */
4667         if (ipp->ipp_fields & IPPF_RTHDR) {
4668                 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4669 
4670                 *nxthdr_ptr = IPPROTO_ROUTING;
4671                 nxthdr_ptr = &rt->ip6r_nxt;
4672 
4673                 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4674                 cp += ipp->ipp_rthdrlen;
4675         }
4676         /*
4677          * Do ultimate destination options
4678          */
4679         if (ipp->ipp_fields & IPPF_DSTOPTS) {
4680                 ip6_dest_t *dest = (ip6_dest_t *)cp;
4681 
4682                 *nxthdr_ptr = IPPROTO_DSTOPTS;
4683                 nxthdr_ptr = &dest->ip6d_nxt;
4684 
4685                 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4686                 cp += ipp->ipp_dstoptslen;
4687         }
4688         /*
4689          * Now set the last header pointer to the proto passed in
4690          */
4691         *nxthdr_ptr = protocol;
4692         ASSERT((int)(cp - buf) == buf_len);
4693 }
4694 
4695 /*
4696  * Return a pointer to the routing header extension header
4697  * in the IPv6 header(s) chain passed in.
4698  * If none found, return NULL
4699  * Assumes that all extension headers are in same mblk as the v6 header
4700  */
4701 ip6_rthdr_t *
4702 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4703 {
4704         ip6_dest_t      *desthdr;
4705         ip6_frag_t      *fraghdr;
4706         uint_t          hdrlen;
4707         uint8_t         nexthdr;
4708         uint8_t         *ptr = (uint8_t *)&ip6h[1];
4709 
4710         if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4711                 return ((ip6_rthdr_t *)ptr);
4712 
4713         /*
4714          * The routing header will precede all extension headers
4715          * other than the hop-by-hop and destination options
4716          * extension headers, so if we see anything other than those,
4717          * we're done and didn't find it.
4718          * We could see a destination options header alone but no
4719          * routing header, in which case we'll return NULL as soon as
4720          * we see anything after that.
4721          * Hop-by-hop and destination option headers are identical,
4722          * so we can use either one we want as a template.
4723          */
4724         nexthdr = ip6h->ip6_nxt;
4725         while (ptr < endptr) {
4726                 /* Is there enough left for len + nexthdr? */
4727                 if (ptr + MIN_EHDR_LEN > endptr)
4728                         return (NULL);
4729 
4730                 switch (nexthdr) {
4731                 case IPPROTO_HOPOPTS:
4732                 case IPPROTO_DSTOPTS:
4733                         /* Assumes the headers are identical for hbh and dst */
4734                         desthdr = (ip6_dest_t *)ptr;
4735                         hdrlen = 8 * (desthdr->ip6d_len + 1);
4736                         nexthdr = desthdr->ip6d_nxt;
4737                         break;
4738 
4739                 case IPPROTO_ROUTING:
4740                         return ((ip6_rthdr_t *)ptr);
4741 
4742                 case IPPROTO_FRAGMENT:
4743                         fraghdr = (ip6_frag_t *)ptr;
4744                         hdrlen = sizeof (ip6_frag_t);
4745                         nexthdr = fraghdr->ip6f_nxt;
4746                         break;
4747 
4748                 default:
4749                         return (NULL);
4750                 }
4751                 ptr += hdrlen;
4752         }
4753         return (NULL);
4754 }
4755 
4756 /*
4757  * Called for source-routed packets originating on this node.
4758  * Manipulates the original routing header by moving every entry up
4759  * one slot, placing the first entry in the v6 header's v6_dst field,
4760  * and placing the ultimate destination in the routing header's last
4761  * slot.
4762  *
4763  * Returns the checksum diference between the ultimate destination
4764  * (last hop in the routing header when the packet is sent) and
4765  * the first hop (ip6_dst when the packet is sent)
4766  */
4767 /* ARGSUSED2 */
4768 uint32_t
4769 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4770 {
4771         uint_t          numaddr;
4772         uint_t          i;
4773         in6_addr_t      *addrptr;
4774         in6_addr_t      tmp;
4775         ip6_rthdr0_t    *rthdr = (ip6_rthdr0_t *)rth;
4776         uint32_t        cksm;
4777         uint32_t        addrsum = 0;
4778         uint16_t        *ptr;
4779 
4780         /*
4781          * Perform any processing needed for source routing.
4782          * We know that all extension headers will be in the same mblk
4783          * as the IPv6 header.
4784          */
4785 
4786         /*
4787          * If no segments left in header, or the header length field is zero,
4788          * don't move hop addresses around;
4789          * Checksum difference is zero.
4790          */
4791         if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4792                 return (0);
4793 
4794         ptr = (uint16_t *)&ip6h->ip6_dst;
4795         cksm = 0;
4796         for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4797                 cksm += ptr[i];
4798         }
4799         cksm = (cksm & 0xFFFF) + (cksm >> 16);
4800 
4801         /*
4802          * Here's where the fun begins - we have to
4803          * move all addresses up one spot, take the
4804          * first hop and make it our first ip6_dst,
4805          * and place the ultimate destination in the
4806          * newly-opened last slot.
4807          */
4808         addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4809         numaddr = rthdr->ip6r0_len / 2;
4810         tmp = *addrptr;
4811         for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4812                 *addrptr = addrptr[1];
4813         }
4814         *addrptr = ip6h->ip6_dst;
4815         ip6h->ip6_dst = tmp;
4816 
4817         /*
4818          * From the checksummed ultimate destination subtract the checksummed
4819          * current ip6_dst (the first hop address). Return that number.
4820          * (In the v4 case, the second part of this is done in each routine
4821          *  that calls ip_massage_options(). We do it all in this one place
4822          *  for v6).
4823          */
4824         ptr = (uint16_t *)&ip6h->ip6_dst;
4825         for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4826                 addrsum += ptr[i];
4827         }
4828         cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4829         if ((int)cksm < 0)
4830                 cksm--;
4831         cksm = (cksm & 0xFFFF) + (cksm >> 16);
4832 
4833         return (cksm);
4834 }
4835 
4836 void
4837 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4838 {
4839         kstat_t *ksp;
4840 
4841         ip6_stat_t template = {
4842                 { "ip6_udp_fannorm",    KSTAT_DATA_UINT64 },
4843                 { "ip6_udp_fanmb",      KSTAT_DATA_UINT64 },
4844                 { "ip6_recv_pullup",            KSTAT_DATA_UINT64 },
4845                 { "ip6_db_ref",                 KSTAT_DATA_UINT64 },
4846                 { "ip6_notaligned",             KSTAT_DATA_UINT64 },
4847                 { "ip6_multimblk",              KSTAT_DATA_UINT64 },
4848                 { "ipsec_proto_ahesp",          KSTAT_DATA_UINT64 },
4849                 { "ip6_out_sw_cksum",                   KSTAT_DATA_UINT64 },
4850                 { "ip6_out_sw_cksum_bytes",             KSTAT_DATA_UINT64 },
4851                 { "ip6_in_sw_cksum",                    KSTAT_DATA_UINT64 },
4852                 { "ip6_tcp_in_full_hw_cksum_err",       KSTAT_DATA_UINT64 },
4853                 { "ip6_tcp_in_part_hw_cksum_err",       KSTAT_DATA_UINT64 },
4854                 { "ip6_tcp_in_sw_cksum_err",            KSTAT_DATA_UINT64 },
4855                 { "ip6_udp_in_full_hw_cksum_err",       KSTAT_DATA_UINT64 },
4856                 { "ip6_udp_in_part_hw_cksum_err",       KSTAT_DATA_UINT64 },
4857                 { "ip6_udp_in_sw_cksum_err",            KSTAT_DATA_UINT64 },
4858         };
4859         ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4860             KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4861             KSTAT_FLAG_VIRTUAL, stackid);
4862 
4863         if (ksp == NULL)
4864                 return (NULL);
4865 
4866         bcopy(&template, ip6_statisticsp, sizeof (template));
4867         ksp->ks_data = (void *)ip6_statisticsp;
4868         ksp->ks_private = (void *)(uintptr_t)stackid;
4869 
4870         kstat_install(ksp);
4871         return (ksp);
4872 }
4873 
4874 void
4875 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4876 {
4877         if (ksp != NULL) {
4878                 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4879                 kstat_delete_netstack(ksp, stackid);
4880         }
4881 }
4882 
4883 /*
4884  * The following two functions set and get the value for the
4885  * IPV6_SRC_PREFERENCES socket option.
4886  */
4887 int
4888 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4889 {
4890         /*
4891          * We only support preferences that are covered by
4892          * IPV6_PREFER_SRC_MASK.
4893          */
4894         if (prefs & ~IPV6_PREFER_SRC_MASK)
4895                 return (EINVAL);
4896 
4897         /*
4898          * Look for conflicting preferences or default preferences.  If
4899          * both bits of a related pair are clear, the application wants the
4900          * system's default value for that pair.  Both bits in a pair can't
4901          * be set.
4902          */
4903         if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4904                 prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4905         } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4906             IPV6_PREFER_SRC_MIPMASK) {
4907                 return (EINVAL);
4908         }
4909         if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4910                 prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4911         } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4912             IPV6_PREFER_SRC_TMPMASK) {
4913                 return (EINVAL);
4914         }
4915         if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4916                 prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4917         } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4918             IPV6_PREFER_SRC_CGAMASK) {
4919                 return (EINVAL);
4920         }
4921 
4922         ixa->ixa_src_preferences = prefs;
4923         return (0);
4924 }
4925 
4926 size_t
4927 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4928 {
4929         *val = ixa->ixa_src_preferences;
4930         return (sizeof (ixa->ixa_src_preferences));
4931 }
4932 
4933 /*
4934  * Get the size of the IP options (including the IP headers size)
4935  * without including the AH header's size. If till_ah is B_FALSE,
4936  * and if AH header is present, dest options beyond AH header will
4937  * also be included in the returned size.
4938  */
4939 int
4940 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4941 {
4942         ip6_t *ip6h;
4943         uint8_t nexthdr;
4944         uint8_t *whereptr;
4945         ip6_hbh_t *hbhhdr;
4946         ip6_dest_t *dsthdr;
4947         ip6_rthdr_t *rthdr;
4948         int ehdrlen;
4949         int size;
4950         ah_t *ah;
4951 
4952         ip6h = (ip6_t *)mp->b_rptr;
4953         size = IPV6_HDR_LEN;
4954         nexthdr = ip6h->ip6_nxt;
4955         whereptr = (uint8_t *)&ip6h[1];
4956         for (;;) {
4957                 /* Assume IP has already stripped it */
4958                 ASSERT(nexthdr != IPPROTO_FRAGMENT);
4959                 switch (nexthdr) {
4960                 case IPPROTO_HOPOPTS:
4961                         hbhhdr = (ip6_hbh_t *)whereptr;
4962                         nexthdr = hbhhdr->ip6h_nxt;
4963                         ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4964                         break;
4965                 case IPPROTO_DSTOPTS:
4966                         dsthdr = (ip6_dest_t *)whereptr;
4967                         nexthdr = dsthdr->ip6d_nxt;
4968                         ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4969                         break;
4970                 case IPPROTO_ROUTING:
4971                         rthdr = (ip6_rthdr_t *)whereptr;
4972                         nexthdr = rthdr->ip6r_nxt;
4973                         ehdrlen = 8 * (rthdr->ip6r_len + 1);
4974                         break;
4975                 default :
4976                         if (till_ah) {
4977                                 ASSERT(nexthdr == IPPROTO_AH);
4978                                 return (size);
4979                         }
4980                         /*
4981                          * If we don't have a AH header to traverse,
4982                          * return now. This happens normally for
4983                          * outbound datagrams where we have not inserted
4984                          * the AH header.
4985                          */
4986                         if (nexthdr != IPPROTO_AH) {
4987                                 return (size);
4988                         }
4989 
4990                         /*
4991                          * We don't include the AH header's size
4992                          * to be symmetrical with other cases where
4993                          * we either don't have a AH header (outbound)
4994                          * or peek into the AH header yet (inbound and
4995                          * not pulled up yet).
4996                          */
4997                         ah = (ah_t *)whereptr;
4998                         nexthdr = ah->ah_nexthdr;
4999                         ehdrlen = (ah->ah_length << 2) + 8;
5000 
5001                         if (nexthdr == IPPROTO_DSTOPTS) {
5002                                 if (whereptr + ehdrlen >= mp->b_wptr) {
5003                                         /*
5004                                          * The destination options header
5005                                          * is not part of the first mblk.
5006                                          */
5007                                         whereptr = mp->b_cont->b_rptr;
5008                                 } else {
5009                                         whereptr += ehdrlen;
5010                                 }
5011 
5012                                 dsthdr = (ip6_dest_t *)whereptr;
5013                                 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
5014                                 size += ehdrlen;
5015                         }
5016                         return (size);
5017                 }
5018                 whereptr += ehdrlen;
5019                 size += ehdrlen;
5020         }
5021 }
5022 
5023 /*
5024  * Utility routine that checks if `v6srcp' is a valid address on underlying
5025  * interface `ill'.  If `ipifp' is non-NULL, it's set to a held ipif
5026  * associated with `v6srcp' on success.  NOTE: if this is not called from
5027  * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
5028  * group during or after this lookup.
5029  */
5030 boolean_t
5031 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
5032 {
5033         ipif_t *ipif;
5034 
5035 
5036         ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
5037         if (ipif != NULL) {
5038                 if (ipifp != NULL)
5039                         *ipifp = ipif;
5040                 else
5041                         ipif_refrele(ipif);
5042                 return (B_TRUE);
5043         }
5044 
5045         if (ip_debug > 2) {
5046                 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
5047                     "src %s\n", AF_INET6, v6srcp);
5048         }
5049         return (B_FALSE);
5050 }