1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 1990 Mentat Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/dlpi.h> 29 #include <sys/stropts.h> 30 #include <sys/sysmacros.h> 31 #include <sys/strsun.h> 32 #include <sys/strlog.h> 33 #include <sys/strsubr.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/sdt.h> 41 #include <sys/kobj.h> 42 #include <sys/zone.h> 43 #include <sys/neti.h> 44 #include <sys/hook.h> 45 46 #include <sys/kmem.h> 47 #include <sys/systm.h> 48 #include <sys/param.h> 49 #include <sys/socket.h> 50 #include <sys/vtrace.h> 51 #include <sys/isa_defs.h> 52 #include <sys/atomic.h> 53 #include <sys/policy.h> 54 #include <sys/mac.h> 55 #include <net/if.h> 56 #include <net/if_types.h> 57 #include <net/route.h> 58 #include <net/if_dl.h> 59 #include <sys/sockio.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/sctp.h> 64 65 #include <inet/common.h> 66 #include <inet/mi.h> 67 #include <inet/optcom.h> 68 #include <inet/mib2.h> 69 #include <inet/nd.h> 70 #include <inet/arp.h> 71 72 #include <inet/ip.h> 73 #include <inet/ip_impl.h> 74 #include <inet/ip6.h> 75 #include <inet/ip6_asp.h> 76 #include <inet/tcp.h> 77 #include <inet/tcp_impl.h> 78 #include <inet/udp_impl.h> 79 #include <inet/ipp_common.h> 80 81 #include <inet/ip_multi.h> 82 #include <inet/ip_if.h> 83 #include <inet/ip_ire.h> 84 #include <inet/ip_rts.h> 85 #include <inet/ip_ndp.h> 86 #include <net/pfkeyv2.h> 87 #include <inet/sadb.h> 88 #include <inet/ipsec_impl.h> 89 #include <inet/iptun/iptun_impl.h> 90 #include <inet/sctp_ip.h> 91 #include <sys/pattr.h> 92 #include <inet/ipclassifier.h> 93 #include <inet/ipsecah.h> 94 #include <inet/rawip_impl.h> 95 #include <inet/rts_impl.h> 96 #include <sys/squeue_impl.h> 97 #include <sys/squeue.h> 98 99 #include <sys/tsol/label.h> 100 #include <sys/tsol/tnet.h> 101 102 /* Temporary; for CR 6451644 work-around */ 103 #include <sys/ethernet.h> 104 105 /* 106 * Naming conventions: 107 * These rules should be judiciously applied 108 * if there is a need to identify something as IPv6 versus IPv4 109 * IPv6 funcions will end with _v6 in the ip module. 110 * IPv6 funcions will end with _ipv6 in the transport modules. 111 * IPv6 macros: 112 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6 113 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY 114 * And then there are ..V4_PART_OF_V6. 115 * The intent is that macros in the ip module end with _V6. 116 * IPv6 global variables will start with ipv6_ 117 * IPv6 structures will start with ipv6 118 * IPv6 defined constants should start with IPV6_ 119 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc) 120 */ 121 122 /* 123 * ip6opt_ls is used to enable IPv6 (via /etc/system on TX systems). 124 * We need to do this because we didn't obtain the IP6OPT_LS (0x0a) 125 * from IANA. This mechanism will remain in effect until an official 126 * number is obtained. 127 */ 128 uchar_t ip6opt_ls; 129 130 const in6_addr_t ipv6_all_ones = { 131 {{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }}}; 132 const in6_addr_t ipv6_all_zeros = {{{ 0, 0, 0, 0 }}}; 133 134 #ifdef _BIG_ENDIAN 135 const in6_addr_t ipv6_unspecified_group = {{{ 0xff000000U, 0, 0, 0 }}}; 136 #else /* _BIG_ENDIAN */ 137 const in6_addr_t ipv6_unspecified_group = {{{ 0x000000ffU, 0, 0, 0 }}}; 138 #endif /* _BIG_ENDIAN */ 139 140 #ifdef _BIG_ENDIAN 141 const in6_addr_t ipv6_loopback = {{{ 0, 0, 0, 0x00000001U }}}; 142 #else /* _BIG_ENDIAN */ 143 const in6_addr_t ipv6_loopback = {{{ 0, 0, 0, 0x01000000U }}}; 144 #endif /* _BIG_ENDIAN */ 145 146 #ifdef _BIG_ENDIAN 147 const in6_addr_t ipv6_all_hosts_mcast = {{{ 0xff020000U, 0, 0, 0x00000001U }}}; 148 #else /* _BIG_ENDIAN */ 149 const in6_addr_t ipv6_all_hosts_mcast = {{{ 0x000002ffU, 0, 0, 0x01000000U }}}; 150 #endif /* _BIG_ENDIAN */ 151 152 #ifdef _BIG_ENDIAN 153 const in6_addr_t ipv6_all_rtrs_mcast = {{{ 0xff020000U, 0, 0, 0x00000002U }}}; 154 #else /* _BIG_ENDIAN */ 155 const in6_addr_t ipv6_all_rtrs_mcast = {{{ 0x000002ffU, 0, 0, 0x02000000U }}}; 156 #endif /* _BIG_ENDIAN */ 157 158 #ifdef _BIG_ENDIAN 159 const in6_addr_t ipv6_all_v2rtrs_mcast = {{{ 0xff020000U, 0, 0, 0x00000016U }}}; 160 #else /* _BIG_ENDIAN */ 161 const in6_addr_t ipv6_all_v2rtrs_mcast = {{{ 0x000002ffU, 0, 0, 0x16000000U }}}; 162 #endif /* _BIG_ENDIAN */ 163 164 #ifdef _BIG_ENDIAN 165 const in6_addr_t ipv6_solicited_node_mcast = { 166 {{ 0xff020000U, 0, 0x00000001U, 0xff000000U }}}; 167 #else /* _BIG_ENDIAN */ 168 const in6_addr_t ipv6_solicited_node_mcast = { 169 {{ 0x000002ffU, 0, 0x01000000U, 0x000000ffU }}}; 170 #endif /* _BIG_ENDIAN */ 171 172 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *); 173 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *); 174 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *, 175 ip_recv_attr_t *); 176 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *, 177 ip_recv_attr_t *); 178 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *, 179 in6_addr_t *, ip_recv_attr_t *); 180 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *, 181 ip_recv_attr_t *); 182 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *); 183 184 /* 185 * icmp_inbound_v6 deals with ICMP messages that are handled by IP. 186 * If the ICMP message is consumed by IP, i.e., it should not be delivered 187 * to any IPPROTO_ICMP raw sockets, then it returns NULL. 188 * Likewise, if the ICMP error is misformed (too short, etc), then it 189 * returns NULL. The caller uses this to determine whether or not to send 190 * to raw sockets. 191 * 192 * All error messages are passed to the matching transport stream. 193 * 194 * See comment for icmp_inbound_v4() on how IPsec is handled. 195 */ 196 mblk_t * 197 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira) 198 { 199 icmp6_t *icmp6; 200 ip6_t *ip6h; /* Outer header */ 201 int ip_hdr_length; /* Outer header length */ 202 boolean_t interested; 203 ill_t *ill = ira->ira_ill; 204 ip_stack_t *ipst = ill->ill_ipst; 205 mblk_t *mp_ret = NULL; 206 207 ip6h = (ip6_t *)mp->b_rptr; 208 209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs); 210 211 /* Check for Martian packets */ 212 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) { 213 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 214 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill); 215 freemsg(mp); 216 return (NULL); 217 } 218 219 /* Make sure ira_l2src is set for ndp_input */ 220 if (!(ira->ira_flags & IRAF_L2SRC_SET)) 221 ip_setl2src(mp, ira, ira->ira_rill); 222 223 ip_hdr_length = ira->ira_ip_hdr_length; 224 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) { 225 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) { 226 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 227 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 228 freemsg(mp); 229 return (NULL); 230 } 231 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira); 232 if (ip6h == NULL) { 233 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 234 freemsg(mp); 235 return (NULL); 236 } 237 } 238 239 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 240 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6); 241 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type, 242 icmp6->icmp6_code)); 243 244 /* 245 * We will set "interested" to "true" if we should pass a copy to 246 * the transport i.e., if it is an error message. 247 */ 248 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK); 249 250 switch (icmp6->icmp6_type) { 251 case ICMP6_DST_UNREACH: 252 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs); 253 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 254 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs); 255 break; 256 257 case ICMP6_TIME_EXCEEDED: 258 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds); 259 break; 260 261 case ICMP6_PARAM_PROB: 262 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems); 263 break; 264 265 case ICMP6_PACKET_TOO_BIG: 266 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs); 267 break; 268 269 case ICMP6_ECHO_REQUEST: 270 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos); 271 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) && 272 !ipst->ips_ipv6_resp_echo_mcast) 273 break; 274 275 /* 276 * We must have exclusive use of the mblk to convert it to 277 * a response. 278 * If not, we copy it. 279 */ 280 if (mp->b_datap->db_ref > 1) { 281 mblk_t *mp1; 282 283 mp1 = copymsg(mp); 284 if (mp1 == NULL) { 285 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 286 ip_drop_input("ipIfStatsInDiscards - copymsg", 287 mp, ill); 288 freemsg(mp); 289 return (NULL); 290 } 291 freemsg(mp); 292 mp = mp1; 293 ip6h = (ip6_t *)mp->b_rptr; 294 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 295 } 296 297 icmp6->icmp6_type = ICMP6_ECHO_REPLY; 298 icmp_send_reply_v6(mp, ip6h, icmp6, ira); 299 return (NULL); 300 301 case ICMP6_ECHO_REPLY: 302 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies); 303 break; 304 305 case ND_ROUTER_SOLICIT: 306 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits); 307 break; 308 309 case ND_ROUTER_ADVERT: 310 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements); 311 break; 312 313 case ND_NEIGHBOR_SOLICIT: 314 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits); 315 ndp_input(mp, ira); 316 return (NULL); 317 318 case ND_NEIGHBOR_ADVERT: 319 BUMP_MIB(ill->ill_icmp6_mib, 320 ipv6IfIcmpInNeighborAdvertisements); 321 ndp_input(mp, ira); 322 return (NULL); 323 324 case ND_REDIRECT: 325 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects); 326 327 if (ipst->ips_ipv6_ignore_redirect) 328 break; 329 330 /* We now allow a RAW socket to receive this. */ 331 interested = B_TRUE; 332 break; 333 334 /* 335 * The next three icmp messages will be handled by MLD. 336 * Pass all valid MLD packets up to any process(es) 337 * listening on a raw ICMP socket. 338 */ 339 case MLD_LISTENER_QUERY: 340 case MLD_LISTENER_REPORT: 341 case MLD_LISTENER_REDUCTION: 342 mp = mld_input(mp, ira); 343 return (mp); 344 default: 345 break; 346 } 347 /* 348 * See if there is an ICMP client to avoid an extra copymsg/freemsg 349 * if there isn't one. 350 */ 351 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) { 352 /* If there is an ICMP client and we want one too, copy it. */ 353 354 if (!interested) { 355 /* Caller will deliver to RAW sockets */ 356 return (mp); 357 } 358 mp_ret = copymsg(mp); 359 if (mp_ret == NULL) { 360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 361 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 362 } 363 } else if (!interested) { 364 /* Neither we nor raw sockets are interested. Drop packet now */ 365 freemsg(mp); 366 return (NULL); 367 } 368 369 /* 370 * ICMP error or redirect packet. Make sure we have enough of 371 * the header and that db_ref == 1 since we might end up modifying 372 * the packet. 373 */ 374 if (mp->b_cont != NULL) { 375 if (ip_pullup(mp, -1, ira) == NULL) { 376 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 377 ip_drop_input("ipIfStatsInDiscards - ip_pullup", 378 mp, ill); 379 freemsg(mp); 380 return (mp_ret); 381 } 382 } 383 384 if (mp->b_datap->db_ref > 1) { 385 mblk_t *mp1; 386 387 mp1 = copymsg(mp); 388 if (mp1 == NULL) { 389 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 390 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill); 391 freemsg(mp); 392 return (mp_ret); 393 } 394 freemsg(mp); 395 mp = mp1; 396 } 397 398 /* 399 * In case mp has changed, verify the message before any further 400 * processes. 401 */ 402 ip6h = (ip6_t *)mp->b_rptr; 403 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]); 404 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 405 freemsg(mp); 406 return (mp_ret); 407 } 408 409 switch (icmp6->icmp6_type) { 410 case ND_REDIRECT: 411 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira); 412 break; 413 case ICMP6_PACKET_TOO_BIG: 414 /* Update DCE and adjust MTU is icmp header if needed */ 415 icmp_inbound_too_big_v6(icmp6, ira); 416 /* FALLTHRU */ 417 default: 418 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 419 break; 420 } 421 422 return (mp_ret); 423 } 424 425 /* 426 * Send an ICMP echo reply. 427 * The caller has already updated the payload part of the packet. 428 * We handle the ICMP checksum, IP source address selection and feed 429 * the packet into ip_output_simple. 430 */ 431 static void 432 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6, 433 ip_recv_attr_t *ira) 434 { 435 uint_t ip_hdr_length = ira->ira_ip_hdr_length; 436 ill_t *ill = ira->ira_ill; 437 ip_stack_t *ipst = ill->ill_ipst; 438 ip_xmit_attr_t ixas; 439 in6_addr_t origsrc; 440 441 /* 442 * Remove any extension headers (do not reverse a source route) 443 * and clear the flow id (keep traffic class for now). 444 */ 445 if (ip_hdr_length != IPV6_HDR_LEN) { 446 int i; 447 448 for (i = 0; i < IPV6_HDR_LEN; i++) { 449 mp->b_rptr[ip_hdr_length - i - 1] = 450 mp->b_rptr[IPV6_HDR_LEN - i - 1]; 451 } 452 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN); 453 ip6h = (ip6_t *)mp->b_rptr; 454 ip6h->ip6_nxt = IPPROTO_ICMPV6; 455 i = ntohs(ip6h->ip6_plen); 456 i -= (ip_hdr_length - IPV6_HDR_LEN); 457 ip6h->ip6_plen = htons(i); 458 ip_hdr_length = IPV6_HDR_LEN; 459 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp)); 460 } 461 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL; 462 463 /* Reverse the source and destination addresses. */ 464 origsrc = ip6h->ip6_src; 465 ip6h->ip6_src = ip6h->ip6_dst; 466 ip6h->ip6_dst = origsrc; 467 468 /* set the hop limit */ 469 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 470 471 /* 472 * Prepare for checksum by putting icmp length in the icmp 473 * checksum field. The checksum is calculated in ip_output 474 */ 475 icmp6->icmp6_cksum = ip6h->ip6_plen; 476 477 bzero(&ixas, sizeof (ixas)); 478 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 479 ixas.ixa_zoneid = ira->ira_zoneid; 480 ixas.ixa_cred = kcred; 481 ixas.ixa_cpid = NOPID; 482 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 483 ixas.ixa_ifindex = 0; 484 ixas.ixa_ipst = ipst; 485 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 486 487 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 488 /* 489 * This packet should go out the same way as it 490 * came in i.e in clear, independent of the IPsec 491 * policy for transmitting packets. 492 */ 493 ixas.ixa_flags |= IXAF_NO_IPSEC; 494 } else { 495 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 496 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 497 /* Note: mp already consumed and ip_drop_packet done */ 498 return; 499 } 500 } 501 502 /* Was the destination (now source) link-local? Send out same group */ 503 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 504 ixas.ixa_flags |= IXAF_SCOPEID_SET; 505 if (IS_UNDER_IPMP(ill)) 506 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 507 else 508 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 509 } 510 511 if (ira->ira_flags & IRAF_MULTIBROADCAST) { 512 /* 513 * Not one or our addresses (IRE_LOCALs), thus we let 514 * ip_output_simple pick the source. 515 */ 516 ip6h->ip6_src = ipv6_all_zeros; 517 ixas.ixa_flags |= IXAF_SET_SOURCE; 518 } 519 520 /* Should we send using dce_pmtu? */ 521 if (ipst->ips_ipv6_icmp_return_pmtu) 522 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY; 523 524 (void) ip_output_simple(mp, &ixas); 525 ixa_cleanup(&ixas); 526 527 } 528 529 /* 530 * Verify the ICMP messages for either for ICMP error or redirect packet. 531 * The caller should have fully pulled up the message. If it's a redirect 532 * packet, only basic checks on IP header will be done; otherwise, verify 533 * the packet by looking at the included ULP header. 534 * 535 * Called before icmp_inbound_error_fanout_v6 is called. 536 */ 537 static boolean_t 538 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 539 { 540 ill_t *ill = ira->ira_ill; 541 uint16_t hdr_length; 542 uint8_t *nexthdrp; 543 uint8_t nexthdr; 544 ip_stack_t *ipst = ill->ill_ipst; 545 conn_t *connp; 546 ip6_t *ip6h; /* Inner header */ 547 548 ip6h = (ip6_t *)&icmp6[1]; 549 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr) 550 goto truncated; 551 552 if (icmp6->icmp6_type == ND_REDIRECT) { 553 hdr_length = sizeof (nd_redirect_t); 554 } else { 555 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION)) 556 goto discard_pkt; 557 hdr_length = IPV6_HDR_LEN; 558 } 559 560 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr) 561 goto truncated; 562 563 /* 564 * Stop here for ICMP_REDIRECT. 565 */ 566 if (icmp6->icmp6_type == ND_REDIRECT) 567 return (B_TRUE); 568 569 /* 570 * ICMP errors only. 571 */ 572 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 573 goto discard_pkt; 574 nexthdr = *nexthdrp; 575 576 /* Try to pass the ICMP message to clients who need it */ 577 switch (nexthdr) { 578 case IPPROTO_UDP: 579 /* 580 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 581 * transport header. 582 */ 583 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 584 mp->b_wptr) 585 goto truncated; 586 break; 587 case IPPROTO_TCP: { 588 tcpha_t *tcpha; 589 590 /* 591 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 592 * transport header. 593 */ 594 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 595 mp->b_wptr) 596 goto truncated; 597 598 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 599 /* 600 * With IPMP we need to match across group, which we do 601 * since we have the upper ill from ira_ill. 602 */ 603 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN, 604 ill->ill_phyint->phyint_ifindex, ipst); 605 if (connp == NULL) 606 goto discard_pkt; 607 608 if ((connp->conn_verifyicmp != NULL) && 609 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) { 610 CONN_DEC_REF(connp); 611 goto discard_pkt; 612 } 613 CONN_DEC_REF(connp); 614 break; 615 } 616 case IPPROTO_SCTP: 617 /* 618 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of 619 * transport header. 620 */ 621 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN > 622 mp->b_wptr) 623 goto truncated; 624 break; 625 case IPPROTO_ESP: 626 case IPPROTO_AH: 627 break; 628 case IPPROTO_ENCAP: 629 case IPPROTO_IPV6: { 630 /* Look for self-encapsulated packets that caused an error */ 631 ip6_t *in_ip6h; 632 633 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 634 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ? 635 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr) 636 goto truncated; 637 break; 638 } 639 default: 640 break; 641 } 642 643 return (B_TRUE); 644 645 discard_pkt: 646 /* Bogus ICMP error. */ 647 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 648 return (B_FALSE); 649 650 truncated: 651 /* We pulled up everthing already. Must be truncated */ 652 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 653 return (B_FALSE); 654 } 655 656 /* 657 * Process received IPv6 ICMP Packet too big. 658 * The caller is responsible for validating the packet before passing it in 659 * and also to fanout the ICMP error to any matching transport conns. Assumes 660 * the message has been fully pulled up. 661 * 662 * Before getting here, the caller has called icmp_inbound_verify_v6() 663 * that should have verified with ULP to prevent undoing the changes we're 664 * going to make to DCE. For example, TCP might have verified that the packet 665 * which generated error is in the send window. 666 * 667 * In some cases modified this MTU in the ICMP header packet; the caller 668 * should pass to the matching ULP after this returns. 669 */ 670 static void 671 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira) 672 { 673 uint32_t mtu; 674 dce_t *dce; 675 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */ 676 ip_stack_t *ipst = ill->ill_ipst; 677 int old_max_frag; 678 in6_addr_t final_dst; 679 ip6_t *ip6h; /* Inner IP header */ 680 681 /* Caller has already pulled up everything. */ 682 ip6h = (ip6_t *)&icmp6[1]; 683 final_dst = ip_get_dst_v6(ip6h, NULL, NULL); 684 685 /* 686 * For link local destinations matching simply on address is not 687 * sufficient. Same link local addresses for different ILL's is 688 * possible. 689 */ 690 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) { 691 dce = dce_lookup_and_add_v6(&final_dst, 692 ill->ill_phyint->phyint_ifindex, ipst); 693 } else { 694 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst); 695 } 696 if (dce == NULL) { 697 /* Couldn't add a unique one - ENOMEM */ 698 if (ip_debug > 2) { 699 /* ip1dbg */ 700 pr_addr_dbg("icmp_inbound_too_big_v6:" 701 "no dce for dst %s\n", AF_INET6, 702 &final_dst); 703 } 704 return; 705 } 706 707 mtu = ntohl(icmp6->icmp6_mtu); 708 709 mutex_enter(&dce->dce_lock); 710 if (dce->dce_flags & DCEF_PMTU) 711 old_max_frag = dce->dce_pmtu; 712 else if (IN6_IS_ADDR_MULTICAST(&final_dst)) 713 old_max_frag = ill->ill_mc_mtu; 714 else 715 old_max_frag = ill->ill_mtu; 716 717 if (mtu < IPV6_MIN_MTU) { 718 ip1dbg(("Received mtu less than IPv6 " 719 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu)); 720 mtu = IPV6_MIN_MTU; 721 /* 722 * If an mtu less than IPv6 min mtu is received, 723 * we must include a fragment header in 724 * subsequent packets. 725 */ 726 dce->dce_flags |= DCEF_TOO_SMALL_PMTU; 727 } else { 728 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU; 729 } 730 ip1dbg(("Received mtu from router: %d\n", mtu)); 731 dce->dce_pmtu = MIN(old_max_frag, mtu); 732 733 /* Prepare to send the new max frag size for the ULP. */ 734 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) { 735 /* 736 * If we need a fragment header in every packet 737 * (above case or multirouting), make sure the 738 * ULP takes it into account when computing the 739 * payload size. 740 */ 741 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t)); 742 } else { 743 icmp6->icmp6_mtu = htonl(dce->dce_pmtu); 744 } 745 /* We now have a PMTU for sure */ 746 dce->dce_flags |= DCEF_PMTU; 747 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64()); 748 mutex_exit(&dce->dce_lock); 749 /* 750 * After dropping the lock the new value is visible to everyone. 751 * Then we bump the generation number so any cached values reinspect 752 * the dce_t. 753 */ 754 dce_increment_generation(dce); 755 dce_refrele(dce); 756 } 757 758 /* 759 * Fanout received ICMPv6 error packets to the transports. 760 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else. 761 * 762 * The caller must have called icmp_inbound_verify_v6. 763 */ 764 void 765 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira) 766 { 767 uint16_t *up; /* Pointer to ports in ULP header */ 768 uint32_t ports; /* reversed ports for fanout */ 769 ip6_t rip6h; /* With reversed addresses */ 770 ip6_t *ip6h; /* Inner IP header */ 771 uint16_t hdr_length; /* Inner IP header length */ 772 uint8_t *nexthdrp; 773 uint8_t nexthdr; 774 tcpha_t *tcpha; 775 conn_t *connp; 776 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */ 777 ip_stack_t *ipst = ill->ill_ipst; 778 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 779 780 /* Caller has already pulled up everything. */ 781 ip6h = (ip6_t *)&icmp6[1]; 782 ASSERT(mp->b_cont == NULL); 783 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 784 785 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 786 goto drop_pkt; 787 nexthdr = *nexthdrp; 788 ira->ira_protocol = nexthdr; 789 790 /* 791 * We need a separate IP header with the source and destination 792 * addresses reversed to do fanout/classification because the ip6h in 793 * the ICMPv6 error is in the form we sent it out. 794 */ 795 rip6h.ip6_src = ip6h->ip6_dst; 796 rip6h.ip6_dst = ip6h->ip6_src; 797 rip6h.ip6_nxt = nexthdr; 798 799 /* Try to pass the ICMP message to clients who need it */ 800 switch (nexthdr) { 801 case IPPROTO_UDP: { 802 /* Attempt to find a client stream based on port. */ 803 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 804 805 /* Note that we send error to all matches. */ 806 ira->ira_flags |= IRAF_ICMP_ERROR; 807 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira); 808 ira->ira_flags &= ~IRAF_ICMP_ERROR; 809 return; 810 } 811 case IPPROTO_TCP: { 812 /* 813 * Attempt to find a client stream based on port. 814 * Note that we do a reverse lookup since the header is 815 * in the form we sent it out. 816 */ 817 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length); 818 /* 819 * With IPMP we need to match across group, which we do 820 * since we have the upper ill from ira_ill. 821 */ 822 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, 823 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst); 824 if (connp == NULL) { 825 goto drop_pkt; 826 } 827 828 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) || 829 (ira->ira_flags & IRAF_IPSEC_SECURE)) { 830 mp = ipsec_check_inbound_policy(mp, connp, 831 NULL, ip6h, ira); 832 if (mp == NULL) { 833 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 834 /* Note that mp is NULL */ 835 ip_drop_input("ipIfStatsInDiscards", mp, ill); 836 CONN_DEC_REF(connp); 837 return; 838 } 839 } 840 841 ira->ira_flags |= IRAF_ICMP_ERROR; 842 if (IPCL_IS_TCP(connp)) { 843 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 844 connp->conn_recvicmp, connp, ira, SQ_FILL, 845 SQTAG_TCP6_INPUT_ICMP_ERR); 846 } else { 847 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 848 ill_t *rill = ira->ira_rill; 849 850 ira->ira_ill = ira->ira_rill = NULL; 851 (connp->conn_recv)(connp, mp, NULL, ira); 852 CONN_DEC_REF(connp); 853 ira->ira_ill = ill; 854 ira->ira_rill = rill; 855 } 856 ira->ira_flags &= ~IRAF_ICMP_ERROR; 857 return; 858 859 } 860 case IPPROTO_SCTP: 861 up = (uint16_t *)((uchar_t *)ip6h + hdr_length); 862 /* Find a SCTP client stream for this packet. */ 863 ((uint16_t *)&ports)[0] = up[1]; 864 ((uint16_t *)&ports)[1] = up[0]; 865 866 ira->ira_flags |= IRAF_ICMP_ERROR; 867 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira); 868 ira->ira_flags &= ~IRAF_ICMP_ERROR; 869 return; 870 871 case IPPROTO_ESP: 872 case IPPROTO_AH: 873 if (!ipsec_loaded(ipss)) { 874 ip_proto_not_sup(mp, ira); 875 return; 876 } 877 878 if (nexthdr == IPPROTO_ESP) 879 mp = ipsecesp_icmp_error(mp, ira); 880 else 881 mp = ipsecah_icmp_error(mp, ira); 882 if (mp == NULL) 883 return; 884 885 /* Just in case ipsec didn't preserve the NULL b_cont */ 886 if (mp->b_cont != NULL) { 887 if (!pullupmsg(mp, -1)) 888 goto drop_pkt; 889 } 890 891 /* 892 * If succesful, the mp has been modified to not include 893 * the ESP/AH header so we can fanout to the ULP's icmp 894 * error handler. 895 */ 896 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN) 897 goto drop_pkt; 898 899 ip6h = (ip6_t *)mp->b_rptr; 900 /* Don't call hdr_length_v6() unless you have to. */ 901 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 902 hdr_length = ip_hdr_length_v6(mp, ip6h); 903 else 904 hdr_length = IPV6_HDR_LEN; 905 906 /* Verify the modified message before any further processes. */ 907 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 908 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 909 freemsg(mp); 910 return; 911 } 912 913 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 914 return; 915 916 case IPPROTO_IPV6: { 917 /* Look for self-encapsulated packets that caused an error */ 918 ip6_t *in_ip6h; 919 920 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length); 921 922 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) && 923 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) { 924 /* 925 * Self-encapsulated case. As in the ipv4 case, 926 * we need to strip the 2nd IP header. Since mp 927 * is already pulled-up, we can simply bcopy 928 * the 3rd header + data over the 2nd header. 929 */ 930 uint16_t unused_len; 931 932 /* 933 * Make sure we don't do recursion more than once. 934 */ 935 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h, 936 &unused_len, &nexthdrp) || 937 *nexthdrp == IPPROTO_IPV6) { 938 goto drop_pkt; 939 } 940 941 /* 942 * Copy the 3rd header + remaining data on top 943 * of the 2nd header. 944 */ 945 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h); 946 947 /* 948 * Subtract length of the 2nd header. 949 */ 950 mp->b_wptr -= hdr_length; 951 952 ip6h = (ip6_t *)mp->b_rptr; 953 /* Don't call hdr_length_v6() unless you have to. */ 954 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) 955 hdr_length = ip_hdr_length_v6(mp, ip6h); 956 else 957 hdr_length = IPV6_HDR_LEN; 958 959 /* 960 * Verify the modified message before any further 961 * processes. 962 */ 963 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]); 964 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) { 965 freemsg(mp); 966 return; 967 } 968 969 /* 970 * Now recurse, and see what I _really_ should be 971 * doing here. 972 */ 973 icmp_inbound_error_fanout_v6(mp, icmp6, ira); 974 return; 975 } 976 /* FALLTHRU */ 977 } 978 case IPPROTO_ENCAP: 979 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src, 980 &rip6h.ip6_dst, ipst)) != NULL) { 981 ira->ira_flags |= IRAF_ICMP_ERROR; 982 connp->conn_recvicmp(connp, mp, NULL, ira); 983 CONN_DEC_REF(connp); 984 ira->ira_flags &= ~IRAF_ICMP_ERROR; 985 return; 986 } 987 /* 988 * No IP tunnel is interested, fallthrough and see 989 * if a raw socket will want it. 990 */ 991 /* FALLTHRU */ 992 default: 993 ira->ira_flags |= IRAF_ICMP_ERROR; 994 ASSERT(ira->ira_protocol == nexthdr); 995 ip_fanout_proto_v6(mp, &rip6h, ira); 996 ira->ira_flags &= ~IRAF_ICMP_ERROR; 997 return; 998 } 999 /* NOTREACHED */ 1000 drop_pkt: 1001 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 1002 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n")); 1003 freemsg(mp); 1004 } 1005 1006 /* 1007 * Process received IPv6 ICMP Redirect messages. 1008 * Assumes the caller has verified that the headers are in the pulled up mblk. 1009 * Consumes mp. 1010 */ 1011 /* ARGSUSED */ 1012 static void 1013 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd, 1014 ip_recv_attr_t *ira) 1015 { 1016 ire_t *ire, *nire; 1017 ire_t *prev_ire = NULL; 1018 ire_t *redir_ire; 1019 in6_addr_t *src, *dst, *gateway; 1020 nd_opt_hdr_t *opt; 1021 nce_t *nce; 1022 int ncec_flags = 0; 1023 int err = 0; 1024 boolean_t redirect_to_router = B_FALSE; 1025 int len; 1026 int optlen; 1027 ill_t *ill = ira->ira_rill; 1028 ill_t *rill = ira->ira_rill; 1029 ip_stack_t *ipst = ill->ill_ipst; 1030 1031 /* 1032 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill 1033 * and make it be the IPMP upper so avoid being confused by a packet 1034 * addressed to a unicast address on a different ill. 1035 */ 1036 if (IS_UNDER_IPMP(rill)) { 1037 rill = ipmp_ill_hold_ipmp_ill(rill); 1038 if (rill == NULL) { 1039 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1040 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill", 1041 mp, ill); 1042 freemsg(mp); 1043 return; 1044 } 1045 ASSERT(rill != ira->ira_rill); 1046 } 1047 1048 len = mp->b_wptr - (uchar_t *)rd; 1049 src = &ip6h->ip6_src; 1050 dst = &rd->nd_rd_dst; 1051 gateway = &rd->nd_rd_target; 1052 1053 /* Verify if it is a valid redirect */ 1054 if (!IN6_IS_ADDR_LINKLOCAL(src) || 1055 (ip6h->ip6_hops != IPV6_MAX_HOPS) || 1056 (rd->nd_rd_code != 0) || 1057 (len < sizeof (nd_redirect_t)) || 1058 (IN6_IS_ADDR_V4MAPPED(dst)) || 1059 (IN6_IS_ADDR_MULTICAST(dst))) { 1060 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1061 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill); 1062 goto fail_redirect; 1063 } 1064 1065 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) || 1066 IN6_ARE_ADDR_EQUAL(gateway, dst))) { 1067 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1068 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway", 1069 mp, ill); 1070 goto fail_redirect; 1071 } 1072 1073 optlen = len - sizeof (nd_redirect_t); 1074 if (optlen != 0) { 1075 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) { 1076 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1077 ip_drop_input("ipv6IfIcmpInBadRedirects - options", 1078 mp, ill); 1079 goto fail_redirect; 1080 } 1081 } 1082 1083 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) { 1084 redirect_to_router = B_TRUE; 1085 ncec_flags |= NCE_F_ISROUTER; 1086 } else { 1087 gateway = dst; /* Add nce for dst */ 1088 } 1089 1090 1091 /* 1092 * Verify that the IP source address of the redirect is 1093 * the same as the current first-hop router for the specified 1094 * ICMP destination address. 1095 * Also, Make sure we had a route for the dest in question and 1096 * that route was pointing to the old gateway (the source of the 1097 * redirect packet.) 1098 * We do longest match and then compare ire_gateway_addr_v6 below. 1099 */ 1100 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill, 1101 ALL_ZONES, NULL, MATCH_IRE_ILL, 0, ipst, NULL); 1102 1103 /* 1104 * Check that 1105 * the redirect was not from ourselves 1106 * old gateway is still directly reachable 1107 */ 1108 if (prev_ire == NULL || 1109 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) || 1110 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1111 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) { 1112 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1113 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill); 1114 goto fail_redirect; 1115 } 1116 1117 ASSERT(prev_ire->ire_ill != NULL); 1118 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD) 1119 ncec_flags |= NCE_F_NONUD; 1120 1121 opt = (nd_opt_hdr_t *)&rd[1]; 1122 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR); 1123 if (opt != NULL) { 1124 err = nce_lookup_then_add_v6(rill, 1125 (uchar_t *)&opt[1], /* Link layer address */ 1126 rill->ill_phys_addr_length, 1127 gateway, ncec_flags, ND_STALE, &nce); 1128 switch (err) { 1129 case 0: 1130 nce_refrele(nce); 1131 break; 1132 case EEXIST: 1133 /* 1134 * Check to see if link layer address has changed and 1135 * process the ncec_state accordingly. 1136 */ 1137 nce_process(nce->nce_common, 1138 (uchar_t *)&opt[1], 0, B_FALSE); 1139 nce_refrele(nce); 1140 break; 1141 default: 1142 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n", 1143 err)); 1144 goto fail_redirect; 1145 } 1146 } 1147 if (redirect_to_router) { 1148 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway)); 1149 1150 /* 1151 * Create a Route Association. This will allow us to remember 1152 * a router told us to use the particular gateway. 1153 */ 1154 ire = ire_create_v6( 1155 dst, 1156 &ipv6_all_ones, /* mask */ 1157 gateway, /* gateway addr */ 1158 IRE_HOST, 1159 prev_ire->ire_ill, 1160 ALL_ZONES, 1161 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 1162 NULL, 1163 ipst); 1164 } else { 1165 ipif_t *ipif; 1166 in6_addr_t gw; 1167 1168 /* 1169 * Just create an on link entry, i.e. interface route. 1170 * The gateway field is our link-local on the ill. 1171 */ 1172 mutex_enter(&rill->ill_lock); 1173 for (ipif = rill->ill_ipif; ipif != NULL; 1174 ipif = ipif->ipif_next) { 1175 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) && 1176 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr)) 1177 break; 1178 } 1179 if (ipif == NULL) { 1180 /* We have no link-local address! */ 1181 mutex_exit(&rill->ill_lock); 1182 goto fail_redirect; 1183 } 1184 gw = ipif->ipif_v6lcl_addr; 1185 mutex_exit(&rill->ill_lock); 1186 1187 ire = ire_create_v6( 1188 dst, /* gateway == dst */ 1189 &ipv6_all_ones, /* mask */ 1190 &gw, /* gateway addr */ 1191 rill->ill_net_type, /* IF_[NO]RESOLVER */ 1192 prev_ire->ire_ill, 1193 ALL_ZONES, 1194 (RTF_DYNAMIC | RTF_HOST), 1195 NULL, 1196 ipst); 1197 } 1198 1199 if (ire == NULL) 1200 goto fail_redirect; 1201 1202 nire = ire_add(ire); 1203 /* Check if it was a duplicate entry */ 1204 if (nire != NULL && nire != ire) { 1205 ASSERT(nire->ire_identical_ref > 1); 1206 ire_delete(nire); 1207 ire_refrele(nire); 1208 nire = NULL; 1209 } 1210 ire = nire; 1211 if (ire != NULL) { 1212 ire_refrele(ire); /* Held in ire_add */ 1213 1214 /* tell routing sockets that we received a redirect */ 1215 ip_rts_change_v6(RTM_REDIRECT, 1216 &rd->nd_rd_dst, 1217 &rd->nd_rd_target, 1218 &ipv6_all_ones, 0, src, 1219 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0, 1220 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst); 1221 1222 /* 1223 * Delete any existing IRE_HOST type ires for this destination. 1224 * This together with the added IRE has the effect of 1225 * modifying an existing redirect. 1226 */ 1227 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST, 1228 prev_ire->ire_ill, ALL_ZONES, NULL, 1229 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst, 1230 NULL); 1231 1232 if (redir_ire != NULL) { 1233 if (redir_ire->ire_flags & RTF_DYNAMIC) 1234 ire_delete(redir_ire); 1235 ire_refrele(redir_ire); 1236 } 1237 } 1238 1239 ire_refrele(prev_ire); 1240 prev_ire = NULL; 1241 1242 fail_redirect: 1243 if (prev_ire != NULL) 1244 ire_refrele(prev_ire); 1245 freemsg(mp); 1246 if (rill != ira->ira_rill) 1247 ill_refrele(rill); 1248 } 1249 1250 /* 1251 * Build and ship an IPv6 ICMP message using the packet data in mp, 1252 * and the ICMP header pointed to by "stuff". (May be called as 1253 * writer.) 1254 * Note: assumes that icmp_pkt_err_ok_v6 has been called to 1255 * verify that an icmp error packet can be sent. 1256 * 1257 * If v6src_ptr is set use it as a source. Otherwise select a reasonable 1258 * source address (see above function). 1259 */ 1260 static void 1261 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len, 1262 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira) 1263 { 1264 ip6_t *ip6h; 1265 in6_addr_t v6dst; 1266 size_t len_needed; 1267 size_t msg_len; 1268 mblk_t *mp1; 1269 icmp6_t *icmp6; 1270 in6_addr_t v6src; 1271 ill_t *ill = ira->ira_ill; 1272 ip_stack_t *ipst = ill->ill_ipst; 1273 ip_xmit_attr_t ixas; 1274 1275 ip6h = (ip6_t *)mp->b_rptr; 1276 1277 bzero(&ixas, sizeof (ixas)); 1278 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 1279 ixas.ixa_zoneid = ira->ira_zoneid; 1280 ixas.ixa_ifindex = 0; 1281 ixas.ixa_ipst = ipst; 1282 ixas.ixa_cred = kcred; 1283 ixas.ixa_cpid = NOPID; 1284 ixas.ixa_tsl = ira->ira_tsl; /* Behave as a multi-level responder */ 1285 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1286 1287 /* 1288 * If the source of the original packet was link-local, then 1289 * make sure we send on the same ill (group) as we received it on. 1290 */ 1291 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 1292 ixas.ixa_flags |= IXAF_SCOPEID_SET; 1293 if (IS_UNDER_IPMP(ill)) 1294 ixas.ixa_scopeid = ill_get_upper_ifindex(ill); 1295 else 1296 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex; 1297 } 1298 1299 if (ira->ira_flags & IRAF_IPSEC_SECURE) { 1300 /* 1301 * Apply IPsec based on how IPsec was applied to 1302 * the packet that had the error. 1303 * 1304 * If it was an outbound packet that caused the ICMP 1305 * error, then the caller will have setup the IRA 1306 * appropriately. 1307 */ 1308 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) { 1309 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 1310 /* Note: mp already consumed and ip_drop_packet done */ 1311 return; 1312 } 1313 } else { 1314 /* 1315 * This is in clear. The icmp message we are building 1316 * here should go out in clear, independent of our policy. 1317 */ 1318 ixas.ixa_flags |= IXAF_NO_IPSEC; 1319 } 1320 1321 /* 1322 * If the caller specified the source we use that. 1323 * Otherwise, if the packet was for one of our unicast addresses, make 1324 * sure we respond with that as the source. Otherwise 1325 * have ip_output_simple pick the source address. 1326 */ 1327 if (v6src_ptr != NULL) { 1328 v6src = *v6src_ptr; 1329 } else { 1330 ire_t *ire; 1331 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY; 1332 1333 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) || 1334 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst)) 1335 match_flags |= MATCH_IRE_ILL; 1336 1337 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 1338 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid, NULL, 1339 match_flags, 0, ipst, NULL); 1340 if (ire != NULL) { 1341 v6src = ip6h->ip6_dst; 1342 ire_refrele(ire); 1343 } else { 1344 v6src = ipv6_all_zeros; 1345 ixas.ixa_flags |= IXAF_SET_SOURCE; 1346 } 1347 } 1348 v6dst = ip6h->ip6_src; 1349 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len; 1350 msg_len = msgdsize(mp); 1351 if (msg_len > len_needed) { 1352 if (!adjmsg(mp, len_needed - msg_len)) { 1353 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1354 freemsg(mp); 1355 return; 1356 } 1357 msg_len = len_needed; 1358 } 1359 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED); 1360 if (mp1 == NULL) { 1361 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1362 freemsg(mp); 1363 return; 1364 } 1365 mp1->b_cont = mp; 1366 mp = mp1; 1367 1368 /* 1369 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this 1370 * node generates be accepted in peace by all on-host destinations. 1371 * If we do NOT assume that all on-host destinations trust 1372 * self-generated ICMP messages, then rework here, ip6.c, and spd.c. 1373 * (Look for IXAF_TRUSTED_ICMP). 1374 */ 1375 ixas.ixa_flags |= IXAF_TRUSTED_ICMP; 1376 1377 ip6h = (ip6_t *)mp->b_rptr; 1378 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len); 1379 1380 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 1381 ip6h->ip6_nxt = IPPROTO_ICMPV6; 1382 ip6h->ip6_hops = ipst->ips_ipv6_def_hops; 1383 ip6h->ip6_dst = v6dst; 1384 ip6h->ip6_src = v6src; 1385 msg_len += IPV6_HDR_LEN + len; 1386 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) { 1387 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len); 1388 msg_len = IP_MAXPACKET + IPV6_HDR_LEN; 1389 } 1390 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN)); 1391 icmp6 = (icmp6_t *)&ip6h[1]; 1392 bcopy(stuff, (char *)icmp6, len); 1393 /* 1394 * Prepare for checksum by putting icmp length in the icmp 1395 * checksum field. The checksum is calculated in ip_output_wire_v6. 1396 */ 1397 icmp6->icmp6_cksum = ip6h->ip6_plen; 1398 if (icmp6->icmp6_type == ND_REDIRECT) { 1399 ip6h->ip6_hops = IPV6_MAX_HOPS; 1400 } 1401 1402 (void) ip_output_simple(mp, &ixas); 1403 ixa_cleanup(&ixas); 1404 } 1405 1406 /* 1407 * Update the output mib when ICMPv6 packets are sent. 1408 */ 1409 void 1410 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6) 1411 { 1412 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs); 1413 1414 switch (icmp6->icmp6_type) { 1415 case ICMP6_DST_UNREACH: 1416 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs); 1417 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN) 1418 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs); 1419 break; 1420 1421 case ICMP6_TIME_EXCEEDED: 1422 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds); 1423 break; 1424 1425 case ICMP6_PARAM_PROB: 1426 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems); 1427 break; 1428 1429 case ICMP6_PACKET_TOO_BIG: 1430 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs); 1431 break; 1432 1433 case ICMP6_ECHO_REQUEST: 1434 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos); 1435 break; 1436 1437 case ICMP6_ECHO_REPLY: 1438 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies); 1439 break; 1440 1441 case ND_ROUTER_SOLICIT: 1442 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits); 1443 break; 1444 1445 case ND_ROUTER_ADVERT: 1446 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements); 1447 break; 1448 1449 case ND_NEIGHBOR_SOLICIT: 1450 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits); 1451 break; 1452 1453 case ND_NEIGHBOR_ADVERT: 1454 BUMP_MIB(ill->ill_icmp6_mib, 1455 ipv6IfIcmpOutNeighborAdvertisements); 1456 break; 1457 1458 case ND_REDIRECT: 1459 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects); 1460 break; 1461 1462 case MLD_LISTENER_QUERY: 1463 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries); 1464 break; 1465 1466 case MLD_LISTENER_REPORT: 1467 case MLD_V2_LISTENER_REPORT: 1468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses); 1469 break; 1470 1471 case MLD_LISTENER_REDUCTION: 1472 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions); 1473 break; 1474 } 1475 } 1476 1477 /* 1478 * Check if it is ok to send an ICMPv6 error packet in 1479 * response to the IP packet in mp. 1480 * Free the message and return null if no 1481 * ICMP error packet should be sent. 1482 */ 1483 static mblk_t * 1484 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira) 1485 { 1486 ill_t *ill = ira->ira_ill; 1487 ip_stack_t *ipst = ill->ill_ipst; 1488 boolean_t llbcast; 1489 ip6_t *ip6h; 1490 1491 if (!mp) 1492 return (NULL); 1493 1494 /* We view multicast and broadcast as the same.. */ 1495 llbcast = (ira->ira_flags & 1496 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0; 1497 ip6h = (ip6_t *)mp->b_rptr; 1498 1499 /* Check if source address uniquely identifies the host */ 1500 1501 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) || 1502 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) || 1503 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 1504 freemsg(mp); 1505 return (NULL); 1506 } 1507 1508 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1509 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN; 1510 icmp6_t *icmp6; 1511 1512 if (mp->b_wptr - mp->b_rptr < len_needed) { 1513 if (!pullupmsg(mp, len_needed)) { 1514 BUMP_MIB(ill->ill_icmp6_mib, 1515 ipv6IfIcmpInErrors); 1516 freemsg(mp); 1517 return (NULL); 1518 } 1519 ip6h = (ip6_t *)mp->b_rptr; 1520 } 1521 icmp6 = (icmp6_t *)&ip6h[1]; 1522 /* Explicitly do not generate errors in response to redirects */ 1523 if (ICMP6_IS_ERROR(icmp6->icmp6_type) || 1524 icmp6->icmp6_type == ND_REDIRECT) { 1525 freemsg(mp); 1526 return (NULL); 1527 } 1528 } 1529 /* 1530 * Check that the destination is not multicast and that the packet 1531 * was not sent on link layer broadcast or multicast. (Exception 1532 * is Packet too big message as per the draft - when mcast_ok is set.) 1533 */ 1534 if (!mcast_ok && 1535 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) { 1536 freemsg(mp); 1537 return (NULL); 1538 } 1539 /* 1540 * If this is a labeled system, then check to see if we're allowed to 1541 * send a response to this particular sender. If not, then just drop. 1542 */ 1543 if (is_system_labeled() && !tsol_can_reply_error(mp, ira)) { 1544 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors); 1545 freemsg(mp); 1546 return (NULL); 1547 } 1548 1549 if (icmp_err_rate_limit(ipst)) { 1550 /* 1551 * Only send ICMP error packets every so often. 1552 * This should be done on a per port/source basis, 1553 * but for now this will suffice. 1554 */ 1555 freemsg(mp); 1556 return (NULL); 1557 } 1558 return (mp); 1559 } 1560 1561 /* 1562 * Called when a packet was sent out the same link that it arrived on. 1563 * Check if it is ok to send a redirect and then send it. 1564 */ 1565 void 1566 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire, 1567 ip_recv_attr_t *ira) 1568 { 1569 ill_t *ill = ira->ira_ill; 1570 ip_stack_t *ipst = ill->ill_ipst; 1571 in6_addr_t *v6targ; 1572 ire_t *src_ire_v6 = NULL; 1573 mblk_t *mp1; 1574 ire_t *nhop_ire = NULL; 1575 1576 /* 1577 * Don't send a redirect when forwarding a source 1578 * routed packet. 1579 */ 1580 if (ip_source_routed_v6(ip6h, mp, ipst)) 1581 return; 1582 1583 if (ire->ire_type & IRE_ONLINK) { 1584 /* Target is directly connected */ 1585 v6targ = &ip6h->ip6_dst; 1586 } else { 1587 /* Determine the most specific IRE used to send the packets */ 1588 nhop_ire = ire_nexthop(ire); 1589 if (nhop_ire == NULL) 1590 return; 1591 1592 /* 1593 * We won't send redirects to a router 1594 * that doesn't have a link local 1595 * address, but will forward. 1596 */ 1597 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) { 1598 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 1599 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1600 ire_refrele(nhop_ire); 1601 return; 1602 } 1603 v6targ = &nhop_ire->ire_addr_v6; 1604 } 1605 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src, 1606 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES, NULL, 1607 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL); 1608 1609 if (src_ire_v6 == NULL) { 1610 if (nhop_ire != NULL) 1611 ire_refrele(nhop_ire); 1612 return; 1613 } 1614 1615 /* 1616 * The source is directly connected. 1617 */ 1618 mp1 = copymsg(mp); 1619 if (mp1 != NULL) 1620 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira); 1621 1622 if (nhop_ire != NULL) 1623 ire_refrele(nhop_ire); 1624 ire_refrele(src_ire_v6); 1625 } 1626 1627 /* 1628 * Generate an ICMPv6 redirect message. 1629 * Include target link layer address option if it exits. 1630 * Always include redirect header. 1631 */ 1632 static void 1633 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest, 1634 ip_recv_attr_t *ira) 1635 { 1636 nd_redirect_t *rd; 1637 nd_opt_rd_hdr_t *rdh; 1638 uchar_t *buf; 1639 ncec_t *ncec = NULL; 1640 nd_opt_hdr_t *opt; 1641 int len; 1642 int ll_opt_len = 0; 1643 int max_redir_hdr_data_len; 1644 int pkt_len; 1645 in6_addr_t *srcp; 1646 ill_t *ill; 1647 boolean_t need_refrele; 1648 ip_stack_t *ipst = ira->ira_ill->ill_ipst; 1649 1650 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira); 1651 if (mp == NULL) 1652 return; 1653 1654 if (IS_UNDER_IPMP(ira->ira_ill)) { 1655 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill); 1656 if (ill == NULL) { 1657 ill = ira->ira_ill; 1658 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects); 1659 ip_drop_output("no IPMP ill for sending redirect", 1660 mp, ill); 1661 freemsg(mp); 1662 return; 1663 } 1664 need_refrele = B_TRUE; 1665 } else { 1666 ill = ira->ira_ill; 1667 need_refrele = B_FALSE; 1668 } 1669 1670 ncec = ncec_lookup_illgrp_v6(ill, targetp); 1671 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE && 1672 ncec->ncec_lladdr != NULL) { 1673 ll_opt_len = (sizeof (nd_opt_hdr_t) + 1674 ill->ill_phys_addr_length + 7)/8 * 8; 1675 } 1676 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len; 1677 ASSERT(len % 4 == 0); 1678 buf = kmem_alloc(len, KM_NOSLEEP); 1679 if (buf == NULL) { 1680 if (ncec != NULL) 1681 ncec_refrele(ncec); 1682 if (need_refrele) 1683 ill_refrele(ill); 1684 freemsg(mp); 1685 return; 1686 } 1687 1688 rd = (nd_redirect_t *)buf; 1689 rd->nd_rd_type = (uint8_t)ND_REDIRECT; 1690 rd->nd_rd_code = 0; 1691 rd->nd_rd_reserved = 0; 1692 rd->nd_rd_target = *targetp; 1693 rd->nd_rd_dst = *dest; 1694 1695 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t)); 1696 if (ncec != NULL && ll_opt_len != 0) { 1697 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; 1698 opt->nd_opt_len = ll_opt_len/8; 1699 bcopy((char *)ncec->ncec_lladdr, &opt[1], 1700 ill->ill_phys_addr_length); 1701 } 1702 if (ncec != NULL) 1703 ncec_refrele(ncec); 1704 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len); 1705 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER; 1706 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */ 1707 max_redir_hdr_data_len = 1708 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8; 1709 pkt_len = msgdsize(mp); 1710 /* Make sure mp is 8 byte aligned */ 1711 if (pkt_len > max_redir_hdr_data_len) { 1712 rdh->nd_opt_rh_len = (max_redir_hdr_data_len + 1713 sizeof (nd_opt_rd_hdr_t))/8; 1714 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len); 1715 } else { 1716 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8; 1717 (void) adjmsg(mp, -(pkt_len % 8)); 1718 } 1719 rdh->nd_opt_rh_reserved1 = 0; 1720 rdh->nd_opt_rh_reserved2 = 0; 1721 /* ipif_v6lcl_addr contains the link-local source address */ 1722 srcp = &ill->ill_ipif->ipif_v6lcl_addr; 1723 1724 /* Redirects sent by router, and router is global zone */ 1725 ASSERT(ira->ira_zoneid == ALL_ZONES); 1726 ira->ira_zoneid = GLOBAL_ZONEID; 1727 icmp_pkt_v6(mp, buf, len, srcp, ira); 1728 kmem_free(buf, len); 1729 if (need_refrele) 1730 ill_refrele(ill); 1731 } 1732 1733 1734 /* Generate an ICMP time exceeded message. (May be called as writer.) */ 1735 void 1736 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1737 ip_recv_attr_t *ira) 1738 { 1739 icmp6_t icmp6; 1740 1741 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1742 if (mp == NULL) 1743 return; 1744 1745 bzero(&icmp6, sizeof (icmp6_t)); 1746 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED; 1747 icmp6.icmp6_code = code; 1748 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1749 } 1750 1751 /* 1752 * Generate an ICMP unreachable message. 1753 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1754 * constructed by the caller. 1755 */ 1756 void 1757 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok, 1758 ip_recv_attr_t *ira) 1759 { 1760 icmp6_t icmp6; 1761 1762 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1763 if (mp == NULL) 1764 return; 1765 1766 bzero(&icmp6, sizeof (icmp6_t)); 1767 icmp6.icmp6_type = ICMP6_DST_UNREACH; 1768 icmp6.icmp6_code = code; 1769 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1770 } 1771 1772 /* 1773 * Generate an ICMP pkt too big message. 1774 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1775 * constructed by the caller. 1776 */ 1777 void 1778 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok, 1779 ip_recv_attr_t *ira) 1780 { 1781 icmp6_t icmp6; 1782 1783 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1784 if (mp == NULL) 1785 return; 1786 1787 bzero(&icmp6, sizeof (icmp6_t)); 1788 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 1789 icmp6.icmp6_code = 0; 1790 icmp6.icmp6_mtu = htonl(mtu); 1791 1792 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1793 } 1794 1795 /* 1796 * Generate an ICMP parameter problem message. (May be called as writer.) 1797 * 'offset' is the offset from the beginning of the packet in error. 1798 * When called from ip_output side a minimal ip_recv_attr_t needs to be 1799 * constructed by the caller. 1800 */ 1801 static void 1802 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset, 1803 boolean_t mcast_ok, ip_recv_attr_t *ira) 1804 { 1805 icmp6_t icmp6; 1806 1807 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira); 1808 if (mp == NULL) 1809 return; 1810 1811 bzero((char *)&icmp6, sizeof (icmp6_t)); 1812 icmp6.icmp6_type = ICMP6_PARAM_PROB; 1813 icmp6.icmp6_code = code; 1814 icmp6.icmp6_pptr = htonl(offset); 1815 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira); 1816 } 1817 1818 void 1819 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok, 1820 ip_recv_attr_t *ira) 1821 { 1822 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1823 uint16_t hdr_length; 1824 uint8_t *nexthdrp; 1825 uint32_t offset; 1826 ill_t *ill = ira->ira_ill; 1827 1828 /* Determine the offset of the bad nexthdr value */ 1829 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) { 1830 /* Malformed packet */ 1831 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1832 ip_drop_input("ipIfStatsInDiscards", mp, ill); 1833 freemsg(mp); 1834 return; 1835 } 1836 1837 offset = nexthdrp - mp->b_rptr; 1838 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset, 1839 mcast_ok, ira); 1840 } 1841 1842 /* 1843 * Verify whether or not the IP address is a valid local address. 1844 * Could be a unicast, including one for a down interface. 1845 * If allow_mcbc then a multicast or broadcast address is also 1846 * acceptable. 1847 * 1848 * In the case of a multicast address, however, the 1849 * upper protocol is expected to reset the src address 1850 * to zero when we return IPVL_MCAST so that 1851 * no packets are emitted with multicast address as 1852 * source address. 1853 * The addresses valid for bind are: 1854 * (1) - in6addr_any 1855 * (2) - IP address of an UP interface 1856 * (3) - IP address of a DOWN interface 1857 * (4) - a multicast address. In this case 1858 * the conn will only receive packets destined to 1859 * the specified multicast address. Note: the 1860 * application still has to issue an 1861 * IPV6_JOIN_GROUP socket option. 1862 * 1863 * In all the above cases, the bound address must be valid in the current zone. 1864 * When the address is loopback or multicast, there might be many matching IREs 1865 * so bind has to look up based on the zone. 1866 */ 1867 ip_laddr_t 1868 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid, 1869 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid) 1870 { 1871 ire_t *src_ire; 1872 uint_t match_flags; 1873 ill_t *ill = NULL; 1874 1875 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src)); 1876 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src)); 1877 1878 match_flags = MATCH_IRE_ZONEONLY; 1879 if (scopeid != 0) { 1880 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst); 1881 if (ill == NULL) 1882 return (IPVL_BAD); 1883 match_flags |= MATCH_IRE_ILL; 1884 } 1885 1886 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0, 1887 ill, zoneid, NULL, match_flags, 0, ipst, NULL); 1888 if (ill != NULL) 1889 ill_refrele(ill); 1890 1891 /* 1892 * If an address other than in6addr_any is requested, 1893 * we verify that it is a valid address for bind 1894 * Note: Following code is in if-else-if form for 1895 * readability compared to a condition check. 1896 */ 1897 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) { 1898 /* 1899 * (2) Bind to address of local UP interface 1900 */ 1901 ire_refrele(src_ire); 1902 return (IPVL_UNICAST_UP); 1903 } else if (IN6_IS_ADDR_MULTICAST(v6src)) { 1904 /* (4) bind to multicast address. */ 1905 if (src_ire != NULL) 1906 ire_refrele(src_ire); 1907 1908 /* 1909 * Note: caller should take IPV6_MULTICAST_IF 1910 * into account when selecting a real source address. 1911 */ 1912 if (allow_mcbc) 1913 return (IPVL_MCAST); 1914 else 1915 return (IPVL_BAD); 1916 } else { 1917 ipif_t *ipif; 1918 1919 /* 1920 * (3) Bind to address of local DOWN interface? 1921 * (ipif_lookup_addr() looks up all interfaces 1922 * but we do not get here for UP interfaces 1923 * - case (2) above) 1924 */ 1925 if (src_ire != NULL) 1926 ire_refrele(src_ire); 1927 1928 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst); 1929 if (ipif == NULL) 1930 return (IPVL_BAD); 1931 1932 /* Not a useful source? */ 1933 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) { 1934 ipif_refrele(ipif); 1935 return (IPVL_BAD); 1936 } 1937 ipif_refrele(ipif); 1938 return (IPVL_UNICAST_DOWN); 1939 } 1940 } 1941 1942 /* 1943 * Verify that both the source and destination addresses are valid. If 1944 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable, 1945 * i.e. have no route to it. Protocols like TCP want to verify destination 1946 * reachability, while tunnels do not. 1947 * 1948 * Determine the route, the interface, and (optionally) the source address 1949 * to use to reach a given destination. 1950 * Note that we allow connect to broadcast and multicast addresses when 1951 * IPDF_ALLOW_MCBC is set. 1952 * first_hop and dst_addr are normally the same, but if source routing 1953 * they will differ; in that case the first_hop is what we'll use for the 1954 * routing lookup but the dce and label checks will be done on dst_addr, 1955 * 1956 * If uinfo is set, then we fill in the best available information 1957 * we have for the destination. This is based on (in priority order) any 1958 * metrics and path MTU stored in a dce_t, route metrics, and finally the 1959 * ill_mtu/ill_mc_mtu. 1960 * 1961 * Tsol note: If we have a source route then dst_addr != firsthop. But we 1962 * always do the label check on dst_addr. 1963 * 1964 * Assumes that the caller has set ixa_scopeid for link-local communication. 1965 */ 1966 int 1967 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr, 1968 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo, 1969 uint32_t flags, uint_t mac_mode) 1970 { 1971 ire_t *ire; 1972 int error = 0; 1973 in6_addr_t setsrc; /* RTF_SETSRC */ 1974 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */ 1975 ip_stack_t *ipst = ixa->ixa_ipst; 1976 dce_t *dce; 1977 uint_t pmtu; 1978 uint_t ifindex; 1979 uint_t generation; 1980 nce_t *nce; 1981 ill_t *ill = NULL; 1982 boolean_t multirt = B_FALSE; 1983 1984 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr)); 1985 1986 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1987 1988 /* 1989 * We never send to zero; the ULPs map it to the loopback address. 1990 * We can't allow it since we use zero to mean unitialized in some 1991 * places. 1992 */ 1993 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr)); 1994 1995 if (is_system_labeled()) { 1996 ts_label_t *tsl = NULL; 1997 1998 error = tsol_check_dest(ixa->ixa_tsl, dst_addr, IPV6_VERSION, 1999 mac_mode, (flags & IPDF_ZONE_IS_GLOBAL) != 0, &tsl); 2000 if (error != 0) 2001 return (error); 2002 if (tsl != NULL) { 2003 /* Update the label */ 2004 ip_xmit_attr_replace_tsl(ixa, tsl); 2005 } 2006 } 2007 2008 setsrc = ipv6_all_zeros; 2009 /* 2010 * Select a route; For IPMP interfaces, we would only select 2011 * a "hidden" route (i.e., going through a specific under_ill) 2012 * if ixa_ifindex has been specified. 2013 */ 2014 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation, 2015 &setsrc, &error, &multirt); 2016 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 2017 if (error != 0) 2018 goto bad_addr; 2019 2020 /* 2021 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set. 2022 * If IPDF_VERIFY_DST is set, the destination must be reachable. 2023 * Otherwise the destination needn't be reachable. 2024 * 2025 * If we match on a reject or black hole, then we've got a 2026 * local failure. May as well fail out the connect() attempt, 2027 * since it's never going to succeed. 2028 */ 2029 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2030 /* 2031 * If we're verifying destination reachability, we always want 2032 * to complain here. 2033 * 2034 * If we're not verifying destination reachability but the 2035 * destination has a route, we still want to fail on the 2036 * temporary address and broadcast address tests. 2037 * 2038 * In both cases do we let the code continue so some reasonable 2039 * information is returned to the caller. That enables the 2040 * caller to use (and even cache) the IRE. conn_ip_ouput will 2041 * use the generation mismatch path to check for the unreachable 2042 * case thereby avoiding any specific check in the main path. 2043 */ 2044 ASSERT(generation == IRE_GENERATION_VERIFY); 2045 if (flags & IPDF_VERIFY_DST) { 2046 /* 2047 * Set errno but continue to set up ixa_ire to be 2048 * the RTF_REJECT|RTF_BLACKHOLE IRE. 2049 * That allows callers to use ip_output to get an 2050 * ICMP error back. 2051 */ 2052 if (!(ire->ire_type & IRE_HOST)) 2053 error = ENETUNREACH; 2054 else 2055 error = EHOSTUNREACH; 2056 } 2057 } 2058 2059 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) && 2060 !(flags & IPDF_ALLOW_MCBC)) { 2061 ire_refrele(ire); 2062 ire = ire_reject(ipst, B_FALSE); 2063 generation = IRE_GENERATION_VERIFY; 2064 error = ENETUNREACH; 2065 } 2066 2067 /* Cache things */ 2068 if (ixa->ixa_ire != NULL) 2069 ire_refrele_notr(ixa->ixa_ire); 2070 #ifdef DEBUG 2071 ire_refhold_notr(ire); 2072 ire_refrele(ire); 2073 #endif 2074 ixa->ixa_ire = ire; 2075 ixa->ixa_ire_generation = generation; 2076 2077 /* 2078 * Ensure that ixa_dce is always set any time that ixa_ire is set, 2079 * since some callers will send a packet to conn_ip_output() even if 2080 * there's an error. 2081 */ 2082 ifindex = 0; 2083 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) { 2084 /* If we are creating a DCE we'd better have an ifindex */ 2085 if (ill != NULL) 2086 ifindex = ill->ill_phyint->phyint_ifindex; 2087 else 2088 flags &= ~IPDF_UNIQUE_DCE; 2089 } 2090 2091 if (flags & IPDF_UNIQUE_DCE) { 2092 /* Fallback to the default dce if allocation fails */ 2093 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst); 2094 if (dce != NULL) { 2095 generation = dce->dce_generation; 2096 } else { 2097 dce = dce_lookup_v6(dst_addr, ifindex, ipst, 2098 &generation); 2099 } 2100 } else { 2101 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation); 2102 } 2103 ASSERT(dce != NULL); 2104 if (ixa->ixa_dce != NULL) 2105 dce_refrele_notr(ixa->ixa_dce); 2106 #ifdef DEBUG 2107 dce_refhold_notr(dce); 2108 dce_refrele(dce); 2109 #endif 2110 ixa->ixa_dce = dce; 2111 ixa->ixa_dce_generation = generation; 2112 2113 2114 /* 2115 * For multicast with multirt we have a flag passed back from 2116 * ire_lookup_multi_ill_v6 since we don't have an IRE for each 2117 * possible multicast address. 2118 * We also need a flag for multicast since we can't check 2119 * whether RTF_MULTIRT is set in ixa_ire for multicast. 2120 */ 2121 if (multirt) { 2122 ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 2123 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 2124 } else { 2125 ixa->ixa_postfragfn = ire->ire_postfragfn; 2126 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 2127 } 2128 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2129 /* Get an nce to cache. */ 2130 nce = ire_to_nce(ire, NULL, firsthop); 2131 if (nce == NULL) { 2132 /* Allocation failure? */ 2133 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2134 } else { 2135 if (ixa->ixa_nce != NULL) 2136 nce_refrele(ixa->ixa_nce); 2137 ixa->ixa_nce = nce; 2138 } 2139 } 2140 2141 /* 2142 * If the source address is a loopback address, the 2143 * destination had best be local or multicast. 2144 * If we are sending to an IRE_LOCAL using a loopback source then 2145 * it had better be the same zoneid. 2146 */ 2147 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) { 2148 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) { 2149 ire = NULL; /* Stored in ixa_ire */ 2150 error = EADDRNOTAVAIL; 2151 goto bad_addr; 2152 } 2153 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) { 2154 ire = NULL; /* Stored in ixa_ire */ 2155 error = EADDRNOTAVAIL; 2156 goto bad_addr; 2157 } 2158 } 2159 2160 /* 2161 * Does the caller want us to pick a source address? 2162 */ 2163 if (flags & IPDF_SELECT_SRC) { 2164 in6_addr_t src_addr; 2165 2166 /* 2167 * We use use ire_nexthop_ill to avoid the under ipmp 2168 * interface for source address selection. Note that for ipmp 2169 * probe packets, ixa_ifindex would have been specified, and 2170 * the ip_select_route() invocation would have picked an ire 2171 * will ire_ill pointing at an under interface. 2172 */ 2173 ill = ire_nexthop_ill(ire); 2174 2175 /* If unreachable we have no ill but need some source */ 2176 if (ill == NULL) { 2177 src_addr = ipv6_loopback; 2178 /* Make sure we look for a better source address */ 2179 generation = SRC_GENERATION_VERIFY; 2180 } else { 2181 error = ip_select_source_v6(ill, &setsrc, dst_addr, 2182 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences, 2183 &src_addr, &generation, NULL); 2184 if (error != 0) { 2185 ire = NULL; /* Stored in ixa_ire */ 2186 goto bad_addr; 2187 } 2188 } 2189 2190 /* 2191 * We allow the source address to to down. 2192 * However, we check that we don't use the loopback address 2193 * as a source when sending out on the wire. 2194 */ 2195 if (IN6_IS_ADDR_LOOPBACK(&src_addr) && 2196 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) && 2197 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) { 2198 ire = NULL; /* Stored in ixa_ire */ 2199 error = EADDRNOTAVAIL; 2200 goto bad_addr; 2201 } 2202 2203 *src_addrp = src_addr; 2204 ixa->ixa_src_generation = generation; 2205 } 2206 2207 /* 2208 * Make sure we don't leave an unreachable ixa_nce in place 2209 * since ip_select_route is used when we unplumb i.e., remove 2210 * references on ixa_ire, ixa_nce, and ixa_dce. 2211 */ 2212 nce = ixa->ixa_nce; 2213 if (nce != NULL && nce->nce_is_condemned) { 2214 nce_refrele(nce); 2215 ixa->ixa_nce = NULL; 2216 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2217 } 2218 2219 /* 2220 * Note that IPv6 multicast supports PMTU discovery unlike IPv4 2221 * multicast. But pmtu discovery is only enabled for connected 2222 * sockets in general. 2223 */ 2224 2225 /* 2226 * Set initial value for fragmentation limit. Either conn_ip_output 2227 * or ULP might updates it when there are routing changes. 2228 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT. 2229 */ 2230 pmtu = ip_get_pmtu(ixa); 2231 ixa->ixa_fragsize = pmtu; 2232 /* Make sure ixa_fragsize and ixa_pmtu remain identical */ 2233 if (ixa->ixa_flags & IXAF_VERIFY_PMTU) 2234 ixa->ixa_pmtu = pmtu; 2235 2236 /* 2237 * Extract information useful for some transports. 2238 * First we look for DCE metrics. Then we take what we have in 2239 * the metrics in the route, where the offlink is used if we have 2240 * one. 2241 */ 2242 if (uinfo != NULL) { 2243 bzero(uinfo, sizeof (*uinfo)); 2244 2245 if (dce->dce_flags & DCEF_UINFO) 2246 *uinfo = dce->dce_uinfo; 2247 2248 rts_merge_metrics(uinfo, &ire->ire_metrics); 2249 2250 /* Allow ire_metrics to decrease the path MTU from above */ 2251 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu) 2252 uinfo->iulp_mtu = pmtu; 2253 2254 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0; 2255 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0; 2256 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0; 2257 } 2258 2259 if (ill != NULL) 2260 ill_refrele(ill); 2261 2262 return (error); 2263 2264 bad_addr: 2265 if (ire != NULL) 2266 ire_refrele(ire); 2267 2268 if (ill != NULL) 2269 ill_refrele(ill); 2270 2271 /* 2272 * Make sure we don't leave an unreachable ixa_nce in place 2273 * since ip_select_route is used when we unplumb i.e., remove 2274 * references on ixa_ire, ixa_nce, and ixa_dce. 2275 */ 2276 nce = ixa->ixa_nce; 2277 if (nce != NULL && nce->nce_is_condemned) { 2278 nce_refrele(nce); 2279 ixa->ixa_nce = NULL; 2280 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 2281 } 2282 2283 return (error); 2284 } 2285 2286 /* 2287 * Handle protocols with which IP is less intimate. There 2288 * can be more than one stream bound to a particular 2289 * protocol. When this is the case, normally each one gets a copy 2290 * of any incoming packets. 2291 * 2292 * Zones notes: 2293 * Packets will be distributed to conns in all zones. This is really only 2294 * useful for ICMPv6 as only applications in the global zone can create raw 2295 * sockets for other protocols. 2296 */ 2297 void 2298 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira) 2299 { 2300 mblk_t *mp1; 2301 in6_addr_t laddr = ip6h->ip6_dst; 2302 conn_t *connp, *first_connp, *next_connp; 2303 connf_t *connfp; 2304 ill_t *ill = ira->ira_ill; 2305 ip_stack_t *ipst = ill->ill_ipst; 2306 2307 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol]; 2308 mutex_enter(&connfp->connf_lock); 2309 connp = connfp->connf_head; 2310 for (connp = connfp->connf_head; connp != NULL; 2311 connp = connp->conn_next) { 2312 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2313 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2314 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2315 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2316 break; 2317 } 2318 2319 if (connp == NULL) { 2320 /* 2321 * No one bound to this port. Is 2322 * there a client that wants all 2323 * unclaimed datagrams? 2324 */ 2325 mutex_exit(&connfp->connf_lock); 2326 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB, 2327 ICMP6_PARAMPROB_NEXTHEADER, ira); 2328 return; 2329 } 2330 2331 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL); 2332 2333 CONN_INC_REF(connp); 2334 first_connp = connp; 2335 2336 /* 2337 * XXX: Fix the multiple protocol listeners case. We should not 2338 * be walking the conn->conn_next list here. 2339 */ 2340 connp = connp->conn_next; 2341 for (;;) { 2342 while (connp != NULL) { 2343 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */ 2344 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h) && 2345 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2346 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2347 ira, connp))) 2348 break; 2349 connp = connp->conn_next; 2350 } 2351 2352 if (connp == NULL) { 2353 /* No more interested clients */ 2354 connp = first_connp; 2355 break; 2356 } 2357 if (((mp1 = dupmsg(mp)) == NULL) && 2358 ((mp1 = copymsg(mp)) == NULL)) { 2359 /* Memory allocation failed */ 2360 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2361 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2362 connp = first_connp; 2363 break; 2364 } 2365 2366 CONN_INC_REF(connp); 2367 mutex_exit(&connfp->connf_lock); 2368 2369 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr, 2370 ira); 2371 2372 mutex_enter(&connfp->connf_lock); 2373 /* Follow the next pointer before releasing the conn. */ 2374 next_connp = connp->conn_next; 2375 CONN_DEC_REF(connp); 2376 connp = next_connp; 2377 } 2378 2379 /* Last one. Send it upstream. */ 2380 mutex_exit(&connfp->connf_lock); 2381 2382 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira); 2383 2384 CONN_DEC_REF(connp); 2385 } 2386 2387 /* 2388 * Called when it is conceptually a ULP that would sent the packet 2389 * e.g., port unreachable and nexthdr unknown. Check that the packet 2390 * would have passed the IPsec global policy before sending the error. 2391 * 2392 * Send an ICMP error after patching up the packet appropriately. 2393 * Uses ip_drop_input and bumps the appropriate MIB. 2394 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use. 2395 */ 2396 void 2397 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code, 2398 ip_recv_attr_t *ira) 2399 { 2400 ip6_t *ip6h; 2401 boolean_t secure; 2402 ill_t *ill = ira->ira_ill; 2403 ip_stack_t *ipst = ill->ill_ipst; 2404 netstack_t *ns = ipst->ips_netstack; 2405 ipsec_stack_t *ipss = ns->netstack_ipsec; 2406 2407 secure = ira->ira_flags & IRAF_IPSEC_SECURE; 2408 2409 /* 2410 * We are generating an icmp error for some inbound packet. 2411 * Called from all ip_fanout_(udp, tcp, proto) functions. 2412 * Before we generate an error, check with global policy 2413 * to see whether this is allowed to enter the system. As 2414 * there is no "conn", we are checking with global policy. 2415 */ 2416 ip6h = (ip6_t *)mp->b_rptr; 2417 if (secure || ipss->ipsec_inbound_v6_policy_present) { 2418 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns); 2419 if (mp == NULL) 2420 return; 2421 } 2422 2423 /* We never send errors for protocols that we do implement */ 2424 if (ira->ira_protocol == IPPROTO_ICMPV6) { 2425 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2426 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill); 2427 freemsg(mp); 2428 return; 2429 } 2430 2431 switch (icmp_type) { 2432 case ICMP6_DST_UNREACH: 2433 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT); 2434 2435 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts); 2436 ip_drop_input("ipIfStatsNoPorts", mp, ill); 2437 2438 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira); 2439 break; 2440 case ICMP6_PARAM_PROB: 2441 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER); 2442 2443 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos); 2444 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill); 2445 2446 /* Let the system determine the offset for this one */ 2447 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira); 2448 break; 2449 default: 2450 #ifdef DEBUG 2451 panic("ip_fanout_send_icmp_v6: wrong type"); 2452 /*NOTREACHED*/ 2453 #else 2454 freemsg(mp); 2455 break; 2456 #endif 2457 } 2458 } 2459 2460 /* 2461 * Fanout for UDP packets that are multicast or ICMP errors. 2462 * (Unicast fanout is handled in ip_input_v6.) 2463 * 2464 * If SO_REUSEADDR is set all multicast packets 2465 * will be delivered to all conns bound to the same port. 2466 * 2467 * Fanout for UDP packets. 2468 * The caller puts <fport, lport> in the ports parameter. 2469 * ire_type must be IRE_BROADCAST for multicast and broadcast packets. 2470 * 2471 * If SO_REUSEADDR is set all multicast and broadcast packets 2472 * will be delivered to all conns bound to the same port. 2473 * 2474 * Zones notes: 2475 * Earlier in ip_input on a system with multiple shared-IP zones we 2476 * duplicate the multicast and broadcast packets and send them up 2477 * with each explicit zoneid that exists on that ill. 2478 * This means that here we can match the zoneid with SO_ALLZONES being special. 2479 */ 2480 void 2481 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport, 2482 ip_recv_attr_t *ira) 2483 { 2484 in6_addr_t laddr; 2485 conn_t *connp; 2486 connf_t *connfp; 2487 in6_addr_t faddr; 2488 ill_t *ill = ira->ira_ill; 2489 ip_stack_t *ipst = ill->ill_ipst; 2490 2491 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR)); 2492 2493 laddr = ip6h->ip6_dst; 2494 faddr = ip6h->ip6_src; 2495 2496 /* Attempt to find a client stream based on destination port. */ 2497 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 2498 mutex_enter(&connfp->connf_lock); 2499 connp = connfp->connf_head; 2500 while (connp != NULL) { 2501 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) && 2502 conn_wantpacket_v6(connp, ira, ip6h) && 2503 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2504 tsol_receive_local(mp, &laddr, IPV6_VERSION, ira, connp))) 2505 break; 2506 connp = connp->conn_next; 2507 } 2508 2509 if (connp == NULL) 2510 goto notfound; 2511 2512 CONN_INC_REF(connp); 2513 2514 if (connp->conn_reuseaddr) { 2515 conn_t *first_connp = connp; 2516 conn_t *next_connp; 2517 mblk_t *mp1; 2518 2519 connp = connp->conn_next; 2520 for (;;) { 2521 while (connp != NULL) { 2522 if (IPCL_UDP_MATCH_V6(connp, lport, laddr, 2523 fport, faddr) && 2524 conn_wantpacket_v6(connp, ira, ip6h) && 2525 (!(ira->ira_flags & IRAF_SYSTEM_LABELED) || 2526 tsol_receive_local(mp, &laddr, IPV6_VERSION, 2527 ira, connp))) 2528 break; 2529 connp = connp->conn_next; 2530 } 2531 if (connp == NULL) { 2532 /* No more interested clients */ 2533 connp = first_connp; 2534 break; 2535 } 2536 if (((mp1 = dupmsg(mp)) == NULL) && 2537 ((mp1 = copymsg(mp)) == NULL)) { 2538 /* Memory allocation failed */ 2539 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2540 ip_drop_input("ipIfStatsInDiscards", mp, ill); 2541 connp = first_connp; 2542 break; 2543 } 2544 2545 CONN_INC_REF(connp); 2546 mutex_exit(&connfp->connf_lock); 2547 2548 IP6_STAT(ipst, ip6_udp_fanmb); 2549 ip_fanout_udp_conn(connp, mp1, NULL, 2550 (ip6_t *)mp1->b_rptr, ira); 2551 2552 mutex_enter(&connfp->connf_lock); 2553 /* Follow the next pointer before releasing the conn. */ 2554 next_connp = connp->conn_next; 2555 IP6_STAT(ipst, ip6_udp_fanmb); 2556 CONN_DEC_REF(connp); 2557 connp = next_connp; 2558 } 2559 } 2560 2561 /* Last one. Send it upstream. */ 2562 mutex_exit(&connfp->connf_lock); 2563 2564 IP6_STAT(ipst, ip6_udp_fanmb); 2565 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira); 2566 CONN_DEC_REF(connp); 2567 return; 2568 2569 notfound: 2570 mutex_exit(&connfp->connf_lock); 2571 /* 2572 * No one bound to this port. Is 2573 * there a client that wants all 2574 * unclaimed datagrams? 2575 */ 2576 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) { 2577 ASSERT(ira->ira_protocol == IPPROTO_UDP); 2578 ip_fanout_proto_v6(mp, ip6h, ira); 2579 } else { 2580 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH, 2581 ICMP6_DST_UNREACH_NOPORT, ira); 2582 } 2583 } 2584 2585 /* 2586 * int ip_find_hdr_v6() 2587 * 2588 * This routine is used by the upper layer protocols, iptun, and IPsec: 2589 * - Set extension header pointers to appropriate locations 2590 * - Determine IPv6 header length and return it 2591 * - Return a pointer to the last nexthdr value 2592 * 2593 * The caller must initialize ipp_fields. 2594 * The upper layer protocols normally set label_separate which makes the 2595 * routine put the TX label in ipp_label_v6. If this is not set then 2596 * the hop-by-hop options including the label are placed in ipp_hopopts. 2597 * 2598 * NOTE: If multiple extension headers of the same type are present, 2599 * ip_find_hdr_v6() will set the respective extension header pointers 2600 * to the first one that it encounters in the IPv6 header. It also 2601 * skips fragment headers. This routine deals with malformed packets 2602 * of various sorts in which case the returned length is up to the 2603 * malformed part. 2604 */ 2605 int 2606 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, boolean_t label_separate, ip_pkt_t *ipp, 2607 uint8_t *nexthdrp) 2608 { 2609 uint_t length, ehdrlen; 2610 uint8_t nexthdr; 2611 uint8_t *whereptr, *endptr; 2612 ip6_dest_t *tmpdstopts; 2613 ip6_rthdr_t *tmprthdr; 2614 ip6_hbh_t *tmphopopts; 2615 ip6_frag_t *tmpfraghdr; 2616 2617 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR; 2618 ipp->ipp_hoplimit = ip6h->ip6_hops; 2619 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 2620 ipp->ipp_addr = ip6h->ip6_dst; 2621 2622 length = IPV6_HDR_LEN; 2623 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2624 endptr = mp->b_wptr; 2625 2626 nexthdr = ip6h->ip6_nxt; 2627 while (whereptr < endptr) { 2628 /* Is there enough left for len + nexthdr? */ 2629 if (whereptr + MIN_EHDR_LEN > endptr) 2630 goto done; 2631 2632 switch (nexthdr) { 2633 case IPPROTO_HOPOPTS: { 2634 /* We check for any CIPSO */ 2635 uchar_t *secopt; 2636 boolean_t hbh_needed; 2637 uchar_t *after_secopt; 2638 2639 tmphopopts = (ip6_hbh_t *)whereptr; 2640 ehdrlen = 8 * (tmphopopts->ip6h_len + 1); 2641 if ((uchar_t *)tmphopopts + ehdrlen > endptr) 2642 goto done; 2643 nexthdr = tmphopopts->ip6h_nxt; 2644 2645 if (!label_separate) { 2646 secopt = NULL; 2647 after_secopt = whereptr; 2648 } else { 2649 /* 2650 * We have dropped packets with bad options in 2651 * ip6_input. No need to check return value 2652 * here. 2653 */ 2654 (void) tsol_find_secopt_v6(whereptr, ehdrlen, 2655 &secopt, &after_secopt, &hbh_needed); 2656 } 2657 if (secopt != NULL && after_secopt - whereptr > 0) { 2658 ipp->ipp_fields |= IPPF_LABEL_V6; 2659 ipp->ipp_label_v6 = secopt; 2660 ipp->ipp_label_len_v6 = after_secopt - whereptr; 2661 } else { 2662 ipp->ipp_label_len_v6 = 0; 2663 after_secopt = whereptr; 2664 hbh_needed = B_TRUE; 2665 } 2666 /* return only 1st hbh */ 2667 if (hbh_needed && !(ipp->ipp_fields & IPPF_HOPOPTS)) { 2668 ipp->ipp_fields |= IPPF_HOPOPTS; 2669 ipp->ipp_hopopts = (ip6_hbh_t *)after_secopt; 2670 ipp->ipp_hopoptslen = ehdrlen - 2671 ipp->ipp_label_len_v6; 2672 } 2673 break; 2674 } 2675 case IPPROTO_DSTOPTS: 2676 tmpdstopts = (ip6_dest_t *)whereptr; 2677 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1); 2678 if ((uchar_t *)tmpdstopts + ehdrlen > endptr) 2679 goto done; 2680 nexthdr = tmpdstopts->ip6d_nxt; 2681 /* 2682 * ipp_dstopts is set to the destination header after a 2683 * routing header. 2684 * Assume it is a post-rthdr destination header 2685 * and adjust when we find an rthdr. 2686 */ 2687 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2688 ipp->ipp_fields |= IPPF_DSTOPTS; 2689 ipp->ipp_dstopts = tmpdstopts; 2690 ipp->ipp_dstoptslen = ehdrlen; 2691 } 2692 break; 2693 case IPPROTO_ROUTING: 2694 tmprthdr = (ip6_rthdr_t *)whereptr; 2695 ehdrlen = 8 * (tmprthdr->ip6r_len + 1); 2696 if ((uchar_t *)tmprthdr + ehdrlen > endptr) 2697 goto done; 2698 nexthdr = tmprthdr->ip6r_nxt; 2699 /* return only 1st rthdr */ 2700 if (!(ipp->ipp_fields & IPPF_RTHDR)) { 2701 ipp->ipp_fields |= IPPF_RTHDR; 2702 ipp->ipp_rthdr = tmprthdr; 2703 ipp->ipp_rthdrlen = ehdrlen; 2704 } 2705 /* 2706 * Make any destination header we've seen be a 2707 * pre-rthdr destination header. 2708 */ 2709 if (ipp->ipp_fields & IPPF_DSTOPTS) { 2710 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2711 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS; 2712 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts; 2713 ipp->ipp_dstopts = NULL; 2714 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen; 2715 ipp->ipp_dstoptslen = 0; 2716 } 2717 break; 2718 case IPPROTO_FRAGMENT: 2719 tmpfraghdr = (ip6_frag_t *)whereptr; 2720 ehdrlen = sizeof (ip6_frag_t); 2721 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr) 2722 goto done; 2723 nexthdr = tmpfraghdr->ip6f_nxt; 2724 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) { 2725 ipp->ipp_fields |= IPPF_FRAGHDR; 2726 ipp->ipp_fraghdr = tmpfraghdr; 2727 ipp->ipp_fraghdrlen = ehdrlen; 2728 } 2729 break; 2730 case IPPROTO_NONE: 2731 default: 2732 goto done; 2733 } 2734 length += ehdrlen; 2735 whereptr += ehdrlen; 2736 } 2737 done: 2738 if (nexthdrp != NULL) 2739 *nexthdrp = nexthdr; 2740 return (length); 2741 } 2742 2743 /* 2744 * Try to determine where and what are the IPv6 header length and 2745 * pointer to nexthdr value for the upper layer protocol (or an 2746 * unknown next hdr). 2747 * 2748 * Parameters returns a pointer to the nexthdr value; 2749 * Must handle malformed packets of various sorts. 2750 * Function returns failure for malformed cases. 2751 */ 2752 boolean_t 2753 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr, 2754 uint8_t **nexthdrpp) 2755 { 2756 uint16_t length; 2757 uint_t ehdrlen; 2758 uint8_t *nexthdrp; 2759 uint8_t *whereptr; 2760 uint8_t *endptr; 2761 ip6_dest_t *desthdr; 2762 ip6_rthdr_t *rthdr; 2763 ip6_frag_t *fraghdr; 2764 2765 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 2766 length = IPV6_HDR_LEN; 2767 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 2768 endptr = mp->b_wptr; 2769 2770 nexthdrp = &ip6h->ip6_nxt; 2771 while (whereptr < endptr) { 2772 /* Is there enough left for len + nexthdr? */ 2773 if (whereptr + MIN_EHDR_LEN > endptr) 2774 break; 2775 2776 switch (*nexthdrp) { 2777 case IPPROTO_HOPOPTS: 2778 case IPPROTO_DSTOPTS: 2779 /* Assumes the headers are identical for hbh and dst */ 2780 desthdr = (ip6_dest_t *)whereptr; 2781 ehdrlen = 8 * (desthdr->ip6d_len + 1); 2782 if ((uchar_t *)desthdr + ehdrlen > endptr) 2783 return (B_FALSE); 2784 nexthdrp = &desthdr->ip6d_nxt; 2785 break; 2786 case IPPROTO_ROUTING: 2787 rthdr = (ip6_rthdr_t *)whereptr; 2788 ehdrlen = 8 * (rthdr->ip6r_len + 1); 2789 if ((uchar_t *)rthdr + ehdrlen > endptr) 2790 return (B_FALSE); 2791 nexthdrp = &rthdr->ip6r_nxt; 2792 break; 2793 case IPPROTO_FRAGMENT: 2794 fraghdr = (ip6_frag_t *)whereptr; 2795 ehdrlen = sizeof (ip6_frag_t); 2796 if ((uchar_t *)&fraghdr[1] > endptr) 2797 return (B_FALSE); 2798 nexthdrp = &fraghdr->ip6f_nxt; 2799 break; 2800 case IPPROTO_NONE: 2801 /* No next header means we're finished */ 2802 default: 2803 *hdr_length_ptr = length; 2804 *nexthdrpp = nexthdrp; 2805 return (B_TRUE); 2806 } 2807 length += ehdrlen; 2808 whereptr += ehdrlen; 2809 *hdr_length_ptr = length; 2810 *nexthdrpp = nexthdrp; 2811 } 2812 switch (*nexthdrp) { 2813 case IPPROTO_HOPOPTS: 2814 case IPPROTO_DSTOPTS: 2815 case IPPROTO_ROUTING: 2816 case IPPROTO_FRAGMENT: 2817 /* 2818 * If any know extension headers are still to be processed, 2819 * the packet's malformed (or at least all the IP header(s) are 2820 * not in the same mblk - and that should never happen. 2821 */ 2822 return (B_FALSE); 2823 2824 default: 2825 /* 2826 * If we get here, we know that all of the IP headers were in 2827 * the same mblk, even if the ULP header is in the next mblk. 2828 */ 2829 *hdr_length_ptr = length; 2830 *nexthdrpp = nexthdrp; 2831 return (B_TRUE); 2832 } 2833 } 2834 2835 /* 2836 * Return the length of the IPv6 related headers (including extension headers) 2837 * Returns a length even if the packet is malformed. 2838 */ 2839 int 2840 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h) 2841 { 2842 uint16_t hdr_len; 2843 uint8_t *nexthdrp; 2844 2845 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp); 2846 return (hdr_len); 2847 } 2848 2849 /* 2850 * Parse and process any hop-by-hop or destination options. 2851 * 2852 * Assumes that q is an ill read queue so that ICMP errors for link-local 2853 * destinations are sent out the correct interface. 2854 * 2855 * Returns -1 if there was an error and mp has been consumed. 2856 * Returns 0 if no special action is needed. 2857 * Returns 1 if the packet contained a router alert option for this node 2858 * which is verified to be "interesting/known" for our implementation. 2859 * 2860 * XXX Note: In future as more hbh or dest options are defined, 2861 * it may be better to have different routines for hbh and dest 2862 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN 2863 * may have same value in different namespaces. Or is it same namespace ?? 2864 * Current code checks for each opt_type (other than pads) if it is in 2865 * the expected nexthdr (hbh or dest) 2866 */ 2867 int 2868 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h, 2869 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira) 2870 { 2871 uint8_t opt_type; 2872 uint_t optused; 2873 int ret = 0; 2874 const char *errtype; 2875 ill_t *ill = ira->ira_ill; 2876 ip_stack_t *ipst = ill->ill_ipst; 2877 2878 while (optlen != 0) { 2879 opt_type = *optptr; 2880 if (opt_type == IP6OPT_PAD1) { 2881 optused = 1; 2882 } else { 2883 if (optlen < 2) 2884 goto bad_opt; 2885 errtype = "malformed"; 2886 if (opt_type == ip6opt_ls) { 2887 optused = 2 + optptr[1]; 2888 if (optused > optlen) 2889 goto bad_opt; 2890 } else switch (opt_type) { 2891 case IP6OPT_PADN: 2892 /* 2893 * Note:We don't verify that (N-2) pad octets 2894 * are zero as required by spec. Adhere to 2895 * "be liberal in what you accept..." part of 2896 * implementation philosophy (RFC791,RFC1122) 2897 */ 2898 optused = 2 + optptr[1]; 2899 if (optused > optlen) 2900 goto bad_opt; 2901 break; 2902 2903 case IP6OPT_JUMBO: 2904 if (hdr_type != IPPROTO_HOPOPTS) 2905 goto opt_error; 2906 goto opt_error; /* XXX Not implemented! */ 2907 2908 case IP6OPT_ROUTER_ALERT: { 2909 struct ip6_opt_router *or; 2910 2911 if (hdr_type != IPPROTO_HOPOPTS) 2912 goto opt_error; 2913 optused = 2 + optptr[1]; 2914 if (optused > optlen) 2915 goto bad_opt; 2916 or = (struct ip6_opt_router *)optptr; 2917 /* Check total length and alignment */ 2918 if (optused != sizeof (*or) || 2919 ((uintptr_t)or->ip6or_value & 0x1) != 0) 2920 goto opt_error; 2921 /* Check value */ 2922 switch (*((uint16_t *)or->ip6or_value)) { 2923 case IP6_ALERT_MLD: 2924 case IP6_ALERT_RSVP: 2925 ret = 1; 2926 } 2927 break; 2928 } 2929 case IP6OPT_HOME_ADDRESS: { 2930 /* 2931 * Minimal support for the home address option 2932 * (which is required by all IPv6 nodes). 2933 * Implement by just swapping the home address 2934 * and source address. 2935 * XXX Note: this has IPsec implications since 2936 * AH needs to take this into account. 2937 * Also, when IPsec is used we need to ensure 2938 * that this is only processed once 2939 * in the received packet (to avoid swapping 2940 * back and forth). 2941 * NOTE:This option processing is considered 2942 * to be unsafe and prone to a denial of 2943 * service attack. 2944 * The current processing is not safe even with 2945 * IPsec secured IP packets. Since the home 2946 * address option processing requirement still 2947 * is in the IETF draft and in the process of 2948 * being redefined for its usage, it has been 2949 * decided to turn off the option by default. 2950 * If this section of code needs to be executed, 2951 * ndd variable ip6_ignore_home_address_opt 2952 * should be set to 0 at the user's own risk. 2953 */ 2954 struct ip6_opt_home_address *oh; 2955 in6_addr_t tmp; 2956 2957 if (ipst->ips_ipv6_ignore_home_address_opt) 2958 goto opt_error; 2959 2960 if (hdr_type != IPPROTO_DSTOPTS) 2961 goto opt_error; 2962 optused = 2 + optptr[1]; 2963 if (optused > optlen) 2964 goto bad_opt; 2965 2966 /* 2967 * We did this dest. opt the first time 2968 * around (i.e. before AH processing). 2969 * If we've done AH... stop now. 2970 */ 2971 if ((ira->ira_flags & IRAF_IPSEC_SECURE) && 2972 ira->ira_ipsec_ah_sa != NULL) 2973 break; 2974 2975 oh = (struct ip6_opt_home_address *)optptr; 2976 /* Check total length and alignment */ 2977 if (optused < sizeof (*oh) || 2978 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0) 2979 goto opt_error; 2980 /* Swap ip6_src and the home address */ 2981 tmp = ip6h->ip6_src; 2982 /* XXX Note: only 8 byte alignment option */ 2983 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr; 2984 *(in6_addr_t *)oh->ip6oh_addr = tmp; 2985 break; 2986 } 2987 2988 case IP6OPT_TUNNEL_LIMIT: 2989 if (hdr_type != IPPROTO_DSTOPTS) { 2990 goto opt_error; 2991 } 2992 optused = 2 + optptr[1]; 2993 if (optused > optlen) { 2994 goto bad_opt; 2995 } 2996 if (optused != 3) { 2997 goto opt_error; 2998 } 2999 break; 3000 3001 default: 3002 errtype = "unknown"; 3003 /* FALLTHROUGH */ 3004 opt_error: 3005 /* Determine which zone should send error */ 3006 switch (IP6OPT_TYPE(opt_type)) { 3007 case IP6OPT_TYPE_SKIP: 3008 optused = 2 + optptr[1]; 3009 if (optused > optlen) 3010 goto bad_opt; 3011 ip1dbg(("ip_process_options_v6: %s " 3012 "opt 0x%x skipped\n", 3013 errtype, opt_type)); 3014 break; 3015 case IP6OPT_TYPE_DISCARD: 3016 ip1dbg(("ip_process_options_v6: %s " 3017 "opt 0x%x; packet dropped\n", 3018 errtype, opt_type)); 3019 BUMP_MIB(ill->ill_ip_mib, 3020 ipIfStatsInHdrErrors); 3021 ip_drop_input("ipIfStatsInHdrErrors", 3022 mp, ill); 3023 freemsg(mp); 3024 return (-1); 3025 case IP6OPT_TYPE_ICMP: 3026 BUMP_MIB(ill->ill_ip_mib, 3027 ipIfStatsInHdrErrors); 3028 ip_drop_input("ipIfStatsInHdrErrors", 3029 mp, ill); 3030 icmp_param_problem_v6(mp, 3031 ICMP6_PARAMPROB_OPTION, 3032 (uint32_t)(optptr - 3033 (uint8_t *)ip6h), 3034 B_FALSE, ira); 3035 return (-1); 3036 case IP6OPT_TYPE_FORCEICMP: 3037 BUMP_MIB(ill->ill_ip_mib, 3038 ipIfStatsInHdrErrors); 3039 ip_drop_input("ipIfStatsInHdrErrors", 3040 mp, ill); 3041 icmp_param_problem_v6(mp, 3042 ICMP6_PARAMPROB_OPTION, 3043 (uint32_t)(optptr - 3044 (uint8_t *)ip6h), 3045 B_TRUE, ira); 3046 return (-1); 3047 default: 3048 ASSERT(0); 3049 } 3050 } 3051 } 3052 optlen -= optused; 3053 optptr += optused; 3054 } 3055 return (ret); 3056 3057 bad_opt: 3058 /* Determine which zone should send error */ 3059 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3060 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION, 3061 (uint32_t)(optptr - (uint8_t *)ip6h), 3062 B_FALSE, ira); 3063 return (-1); 3064 } 3065 3066 /* 3067 * Process a routing header that is not yet empty. 3068 * Because of RFC 5095, we now reject all route headers. 3069 */ 3070 void 3071 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth, 3072 ip_recv_attr_t *ira) 3073 { 3074 ill_t *ill = ira->ira_ill; 3075 ip_stack_t *ipst = ill->ill_ipst; 3076 3077 ASSERT(rth->ip6r_segleft != 0); 3078 3079 if (!ipst->ips_ipv6_forward_src_routed) { 3080 /* XXX Check for source routed out same interface? */ 3081 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 3082 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 3083 ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 3084 freemsg(mp); 3085 return; 3086 } 3087 3088 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3089 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3090 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h), 3091 B_FALSE, ira); 3092 } 3093 3094 /* 3095 * Read side put procedure for IPv6 module. 3096 */ 3097 void 3098 ip_rput_v6(queue_t *q, mblk_t *mp) 3099 { 3100 ill_t *ill; 3101 3102 ill = (ill_t *)q->q_ptr; 3103 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) { 3104 union DL_primitives *dl; 3105 3106 dl = (union DL_primitives *)mp->b_rptr; 3107 /* 3108 * Things are opening or closing - only accept DLPI 3109 * ack messages. If the stream is closing and ip_wsrv 3110 * has completed, ip_close is out of the qwait, but has 3111 * not yet completed qprocsoff. Don't proceed any further 3112 * because the ill has been cleaned up and things hanging 3113 * off the ill have been freed. 3114 */ 3115 if ((mp->b_datap->db_type != M_PCPROTO) || 3116 (dl->dl_primitive == DL_UNITDATA_IND)) { 3117 inet_freemsg(mp); 3118 return; 3119 } 3120 } 3121 if (DB_TYPE(mp) == M_DATA) { 3122 struct mac_header_info_s mhi; 3123 3124 ip_mdata_to_mhi(ill, mp, &mhi); 3125 ip_input_v6(ill, NULL, mp, &mhi); 3126 } else { 3127 ip_rput_notdata(ill, mp); 3128 } 3129 } 3130 3131 /* 3132 * Walk through the IPv6 packet in mp and see if there's an AH header 3133 * in it. See if the AH header needs to get done before other headers in 3134 * the packet. (Worker function for ipsec_early_ah_v6().) 3135 */ 3136 #define IPSEC_HDR_DONT_PROCESS 0 3137 #define IPSEC_HDR_PROCESS 1 3138 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */ 3139 static int 3140 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr) 3141 { 3142 uint_t length; 3143 uint_t ehdrlen; 3144 uint8_t *whereptr; 3145 uint8_t *endptr; 3146 uint8_t *nexthdrp; 3147 ip6_dest_t *desthdr; 3148 ip6_rthdr_t *rthdr; 3149 ip6_t *ip6h; 3150 3151 /* 3152 * For now just pullup everything. In general, the less pullups, 3153 * the better, but there's so much squirrelling through anyway, 3154 * it's just easier this way. 3155 */ 3156 if (!pullupmsg(mp, -1)) { 3157 return (IPSEC_MEMORY_ERROR); 3158 } 3159 3160 ip6h = (ip6_t *)mp->b_rptr; 3161 length = IPV6_HDR_LEN; 3162 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 3163 endptr = mp->b_wptr; 3164 3165 /* 3166 * We can't just use the argument nexthdr in the place 3167 * of nexthdrp becaue we don't dereference nexthdrp 3168 * till we confirm whether it is a valid address. 3169 */ 3170 nexthdrp = &ip6h->ip6_nxt; 3171 while (whereptr < endptr) { 3172 /* Is there enough left for len + nexthdr? */ 3173 if (whereptr + MIN_EHDR_LEN > endptr) 3174 return (IPSEC_MEMORY_ERROR); 3175 3176 switch (*nexthdrp) { 3177 case IPPROTO_HOPOPTS: 3178 case IPPROTO_DSTOPTS: 3179 /* Assumes the headers are identical for hbh and dst */ 3180 desthdr = (ip6_dest_t *)whereptr; 3181 ehdrlen = 8 * (desthdr->ip6d_len + 1); 3182 if ((uchar_t *)desthdr + ehdrlen > endptr) 3183 return (IPSEC_MEMORY_ERROR); 3184 /* 3185 * Return DONT_PROCESS because the destination 3186 * options header may be for each hop in a 3187 * routing-header, and we only want AH if we're 3188 * finished with routing headers. 3189 */ 3190 if (*nexthdrp == IPPROTO_DSTOPTS) 3191 return (IPSEC_HDR_DONT_PROCESS); 3192 nexthdrp = &desthdr->ip6d_nxt; 3193 break; 3194 case IPPROTO_ROUTING: 3195 rthdr = (ip6_rthdr_t *)whereptr; 3196 3197 /* 3198 * If there's more hops left on the routing header, 3199 * return now with DON'T PROCESS. 3200 */ 3201 if (rthdr->ip6r_segleft > 0) 3202 return (IPSEC_HDR_DONT_PROCESS); 3203 3204 ehdrlen = 8 * (rthdr->ip6r_len + 1); 3205 if ((uchar_t *)rthdr + ehdrlen > endptr) 3206 return (IPSEC_MEMORY_ERROR); 3207 nexthdrp = &rthdr->ip6r_nxt; 3208 break; 3209 case IPPROTO_FRAGMENT: 3210 /* Wait for reassembly */ 3211 return (IPSEC_HDR_DONT_PROCESS); 3212 case IPPROTO_AH: 3213 *nexthdr = IPPROTO_AH; 3214 return (IPSEC_HDR_PROCESS); 3215 case IPPROTO_NONE: 3216 /* No next header means we're finished */ 3217 default: 3218 return (IPSEC_HDR_DONT_PROCESS); 3219 } 3220 length += ehdrlen; 3221 whereptr += ehdrlen; 3222 } 3223 /* 3224 * Malformed/truncated packet. 3225 */ 3226 return (IPSEC_MEMORY_ERROR); 3227 } 3228 3229 /* 3230 * Path for AH if options are present. 3231 * Returns NULL if the mblk was consumed. 3232 * 3233 * Sometimes AH needs to be done before other IPv6 headers for security 3234 * reasons. This function (and its ipsec_needs_processing_v6() above) 3235 * indicates if that is so, and fans out to the appropriate IPsec protocol 3236 * for the datagram passed in. 3237 */ 3238 mblk_t * 3239 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira) 3240 { 3241 uint8_t nexthdr; 3242 ah_t *ah; 3243 ill_t *ill = ira->ira_ill; 3244 ip_stack_t *ipst = ill->ill_ipst; 3245 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 3246 3247 switch (ipsec_needs_processing_v6(mp, &nexthdr)) { 3248 case IPSEC_MEMORY_ERROR: 3249 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3250 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3251 freemsg(mp); 3252 return (NULL); 3253 case IPSEC_HDR_DONT_PROCESS: 3254 return (mp); 3255 } 3256 3257 /* Default means send it to AH! */ 3258 ASSERT(nexthdr == IPPROTO_AH); 3259 3260 if (!ipsec_loaded(ipss)) { 3261 ip_proto_not_sup(mp, ira); 3262 return (NULL); 3263 } 3264 3265 mp = ipsec_inbound_ah_sa(mp, ira, &ah); 3266 if (mp == NULL) 3267 return (NULL); 3268 ASSERT(ah != NULL); 3269 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3270 ASSERT(ira->ira_ipsec_ah_sa != NULL); 3271 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 3272 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira); 3273 3274 if (mp == NULL) { 3275 /* 3276 * Either it failed or is pending. In the former case 3277 * ipIfStatsInDiscards was increased. 3278 */ 3279 return (NULL); 3280 } 3281 3282 /* we're done with IPsec processing, send it up */ 3283 ip_input_post_ipsec(mp, ira); 3284 return (NULL); 3285 } 3286 3287 /* 3288 * Reassemble fragment. 3289 * When it returns a completed message the first mblk will only contain 3290 * the headers prior to the fragment header, with the nexthdr value updated 3291 * to be the header after the fragment header. 3292 */ 3293 mblk_t * 3294 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h, 3295 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira) 3296 { 3297 uint32_t ident = ntohl(fraghdr->ip6f_ident); 3298 uint16_t offset; 3299 boolean_t more_frags; 3300 uint8_t nexthdr = fraghdr->ip6f_nxt; 3301 in6_addr_t *v6dst_ptr; 3302 in6_addr_t *v6src_ptr; 3303 uint_t end; 3304 uint_t hdr_length; 3305 size_t count; 3306 ipf_t *ipf; 3307 ipf_t **ipfp; 3308 ipfb_t *ipfb; 3309 mblk_t *mp1; 3310 uint8_t ecn_info = 0; 3311 size_t msg_len; 3312 mblk_t *tail_mp; 3313 mblk_t *t_mp; 3314 boolean_t pruned = B_FALSE; 3315 uint32_t sum_val; 3316 uint16_t sum_flags; 3317 ill_t *ill = ira->ira_ill; 3318 ip_stack_t *ipst = ill->ill_ipst; 3319 uint_t prev_nexthdr_offset; 3320 uint8_t prev_nexthdr; 3321 uint8_t *ptr; 3322 uint32_t packet_size; 3323 3324 /* 3325 * We utilize hardware computed checksum info only for UDP since 3326 * IP fragmentation is a normal occurence for the protocol. In 3327 * addition, checksum offload support for IP fragments carrying 3328 * UDP payload is commonly implemented across network adapters. 3329 */ 3330 ASSERT(ira->ira_rill != NULL); 3331 if (nexthdr == IPPROTO_UDP && dohwcksum && 3332 ILL_HCKSUM_CAPABLE(ira->ira_rill) && 3333 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) { 3334 mblk_t *mp1 = mp->b_cont; 3335 int32_t len; 3336 3337 /* Record checksum information from the packet */ 3338 sum_val = (uint32_t)DB_CKSUM16(mp); 3339 sum_flags = DB_CKSUMFLAGS(mp); 3340 3341 /* fragmented payload offset from beginning of mblk */ 3342 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr); 3343 3344 if ((sum_flags & HCK_PARTIALCKSUM) && 3345 (mp1 == NULL || mp1->b_cont == NULL) && 3346 offset >= DB_CKSUMSTART(mp) && 3347 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) { 3348 uint32_t adj; 3349 /* 3350 * Partial checksum has been calculated by hardware 3351 * and attached to the packet; in addition, any 3352 * prepended extraneous data is even byte aligned. 3353 * If any such data exists, we adjust the checksum; 3354 * this would also handle any postpended data. 3355 */ 3356 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp), 3357 mp, mp1, len, adj); 3358 3359 /* One's complement subtract extraneous checksum */ 3360 if (adj >= sum_val) 3361 sum_val = ~(adj - sum_val) & 0xFFFF; 3362 else 3363 sum_val -= adj; 3364 } 3365 } else { 3366 sum_val = 0; 3367 sum_flags = 0; 3368 } 3369 3370 /* Clear hardware checksumming flag */ 3371 DB_CKSUMFLAGS(mp) = 0; 3372 3373 /* 3374 * Determine the offset (from the begining of the IP header) 3375 * of the nexthdr value which has IPPROTO_FRAGMENT. We use 3376 * this when removing the fragment header from the packet. 3377 * This packet consists of the IPv6 header, a potential 3378 * hop-by-hop options header, a potential pre-routing-header 3379 * destination options header, and a potential routing header. 3380 */ 3381 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 3382 prev_nexthdr = ip6h->ip6_nxt; 3383 ptr = (uint8_t *)&ip6h[1]; 3384 3385 if (prev_nexthdr == IPPROTO_HOPOPTS) { 3386 ip6_hbh_t *hbh_hdr; 3387 uint_t hdr_len; 3388 3389 hbh_hdr = (ip6_hbh_t *)ptr; 3390 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 3391 prev_nexthdr = hbh_hdr->ip6h_nxt; 3392 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 3393 - (uint8_t *)ip6h; 3394 ptr += hdr_len; 3395 } 3396 if (prev_nexthdr == IPPROTO_DSTOPTS) { 3397 ip6_dest_t *dest_hdr; 3398 uint_t hdr_len; 3399 3400 dest_hdr = (ip6_dest_t *)ptr; 3401 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 3402 prev_nexthdr = dest_hdr->ip6d_nxt; 3403 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 3404 - (uint8_t *)ip6h; 3405 ptr += hdr_len; 3406 } 3407 if (prev_nexthdr == IPPROTO_ROUTING) { 3408 ip6_rthdr_t *rthdr; 3409 uint_t hdr_len; 3410 3411 rthdr = (ip6_rthdr_t *)ptr; 3412 prev_nexthdr = rthdr->ip6r_nxt; 3413 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 3414 - (uint8_t *)ip6h; 3415 hdr_len = 8 * (rthdr->ip6r_len + 1); 3416 ptr += hdr_len; 3417 } 3418 if (prev_nexthdr != IPPROTO_FRAGMENT) { 3419 /* Can't handle other headers before the fragment header */ 3420 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3421 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3422 freemsg(mp); 3423 return (NULL); 3424 } 3425 3426 /* 3427 * Note: Fragment offset in header is in 8-octet units. 3428 * Clearing least significant 3 bits not only extracts 3429 * it but also gets it in units of octets. 3430 */ 3431 offset = ntohs(fraghdr->ip6f_offlg) & ~7; 3432 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG); 3433 3434 /* 3435 * Is the more frags flag on and the payload length not a multiple 3436 * of eight? 3437 */ 3438 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) { 3439 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill); 3440 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3441 (uint32_t)((char *)&ip6h->ip6_plen - 3442 (char *)ip6h), B_FALSE, ira); 3443 return (NULL); 3444 } 3445 3446 v6src_ptr = &ip6h->ip6_src; 3447 v6dst_ptr = &ip6h->ip6_dst; 3448 end = remlen; 3449 3450 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h); 3451 end += offset; 3452 3453 /* 3454 * Would fragment cause reassembled packet to have a payload length 3455 * greater than IP_MAXPACKET - the max payload size? 3456 */ 3457 if (end > IP_MAXPACKET) { 3458 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3459 ip_drop_input("Reassembled packet too large", mp, ill); 3460 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER, 3461 (uint32_t)((char *)&fraghdr->ip6f_offlg - 3462 (char *)ip6h), B_FALSE, ira); 3463 return (NULL); 3464 } 3465 3466 /* 3467 * This packet just has one fragment. Reassembly not 3468 * needed. 3469 */ 3470 if (!more_frags && offset == 0) { 3471 goto reass_done; 3472 } 3473 3474 /* 3475 * Drop the fragmented as early as possible, if 3476 * we don't have resource(s) to re-assemble. 3477 */ 3478 if (ipst->ips_ip_reass_queue_bytes == 0) { 3479 freemsg(mp); 3480 return (NULL); 3481 } 3482 3483 /* Record the ECN field info. */ 3484 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20); 3485 /* 3486 * If this is not the first fragment, dump the unfragmentable 3487 * portion of the packet. 3488 */ 3489 if (offset) 3490 mp->b_rptr = (uchar_t *)&fraghdr[1]; 3491 3492 /* 3493 * Fragmentation reassembly. Each ILL has a hash table for 3494 * queueing packets undergoing reassembly for all IPIFs 3495 * associated with the ILL. The hash is based on the packet 3496 * IP ident field. The ILL frag hash table was allocated 3497 * as a timer block at the time the ILL was created. Whenever 3498 * there is anything on the reassembly queue, the timer will 3499 * be running. 3500 */ 3501 /* Handle vnic loopback of fragments */ 3502 if (mp->b_datap->db_ref > 2) 3503 msg_len = 0; 3504 else 3505 msg_len = MBLKSIZE(mp); 3506 3507 tail_mp = mp; 3508 while (tail_mp->b_cont != NULL) { 3509 tail_mp = tail_mp->b_cont; 3510 if (tail_mp->b_datap->db_ref <= 2) 3511 msg_len += MBLKSIZE(tail_mp); 3512 } 3513 /* 3514 * If the reassembly list for this ILL will get too big 3515 * prune it. 3516 */ 3517 3518 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >= 3519 ipst->ips_ip_reass_queue_bytes) { 3520 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len, 3521 uint_t, ill->ill_frag_count, 3522 uint_t, ipst->ips_ip_reass_queue_bytes); 3523 ill_frag_prune(ill, 3524 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 : 3525 (ipst->ips_ip_reass_queue_bytes - msg_len)); 3526 pruned = B_TRUE; 3527 } 3528 3529 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)]; 3530 mutex_enter(&ipfb->ipfb_lock); 3531 3532 ipfp = &ipfb->ipfb_ipf; 3533 /* Try to find an existing fragment queue for this packet. */ 3534 for (;;) { 3535 ipf = ipfp[0]; 3536 if (ipf) { 3537 /* 3538 * It has to match on ident, source address, and 3539 * dest address. 3540 */ 3541 if (ipf->ipf_ident == ident && 3542 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) && 3543 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) { 3544 3545 /* 3546 * If we have received too many 3547 * duplicate fragments for this packet 3548 * free it. 3549 */ 3550 if (ipf->ipf_num_dups > ip_max_frag_dups) { 3551 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3552 freemsg(mp); 3553 mutex_exit(&ipfb->ipfb_lock); 3554 return (NULL); 3555 } 3556 3557 break; 3558 } 3559 ipfp = &ipf->ipf_hash_next; 3560 continue; 3561 } 3562 3563 3564 /* 3565 * If we pruned the list, do we want to store this new 3566 * fragment?. We apply an optimization here based on the 3567 * fact that most fragments will be received in order. 3568 * So if the offset of this incoming fragment is zero, 3569 * it is the first fragment of a new packet. We will 3570 * keep it. Otherwise drop the fragment, as we have 3571 * probably pruned the packet already (since the 3572 * packet cannot be found). 3573 */ 3574 3575 if (pruned && offset != 0) { 3576 mutex_exit(&ipfb->ipfb_lock); 3577 freemsg(mp); 3578 return (NULL); 3579 } 3580 3581 /* New guy. Allocate a frag message. */ 3582 mp1 = allocb(sizeof (*ipf), BPRI_MED); 3583 if (!mp1) { 3584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3585 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3586 freemsg(mp); 3587 partial_reass_done: 3588 mutex_exit(&ipfb->ipfb_lock); 3589 return (NULL); 3590 } 3591 3592 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) { 3593 /* 3594 * Too many fragmented packets in this hash bucket. 3595 * Free the oldest. 3596 */ 3597 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1); 3598 } 3599 3600 mp1->b_cont = mp; 3601 3602 /* Initialize the fragment header. */ 3603 ipf = (ipf_t *)mp1->b_rptr; 3604 ipf->ipf_mp = mp1; 3605 ipf->ipf_ptphn = ipfp; 3606 ipfp[0] = ipf; 3607 ipf->ipf_hash_next = NULL; 3608 ipf->ipf_ident = ident; 3609 ipf->ipf_v6src = *v6src_ptr; 3610 ipf->ipf_v6dst = *v6dst_ptr; 3611 /* Record reassembly start time. */ 3612 ipf->ipf_timestamp = gethrestime_sec(); 3613 /* Record ipf generation and account for frag header */ 3614 ipf->ipf_gen = ill->ill_ipf_gen++; 3615 ipf->ipf_count = MBLKSIZE(mp1); 3616 ipf->ipf_protocol = nexthdr; 3617 ipf->ipf_nf_hdr_len = 0; 3618 ipf->ipf_prev_nexthdr_offset = 0; 3619 ipf->ipf_last_frag_seen = B_FALSE; 3620 ipf->ipf_ecn = ecn_info; 3621 ipf->ipf_num_dups = 0; 3622 ipfb->ipfb_frag_pkts++; 3623 ipf->ipf_checksum = 0; 3624 ipf->ipf_checksum_flags = 0; 3625 3626 /* Store checksum value in fragment header */ 3627 if (sum_flags != 0) { 3628 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3629 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3630 ipf->ipf_checksum = sum_val; 3631 ipf->ipf_checksum_flags = sum_flags; 3632 } 3633 3634 /* 3635 * We handle reassembly two ways. In the easy case, 3636 * where all the fragments show up in order, we do 3637 * minimal bookkeeping, and just clip new pieces on 3638 * the end. If we ever see a hole, then we go off 3639 * to ip_reassemble which has to mark the pieces and 3640 * keep track of the number of holes, etc. Obviously, 3641 * the point of having both mechanisms is so we can 3642 * handle the easy case as efficiently as possible. 3643 */ 3644 if (offset == 0) { 3645 /* Easy case, in-order reassembly so far. */ 3646 /* Update the byte count */ 3647 ipf->ipf_count += msg_len; 3648 ipf->ipf_tail_mp = tail_mp; 3649 /* 3650 * Keep track of next expected offset in 3651 * ipf_end. 3652 */ 3653 ipf->ipf_end = end; 3654 ipf->ipf_nf_hdr_len = hdr_length; 3655 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset; 3656 } else { 3657 /* Hard case, hole at the beginning. */ 3658 ipf->ipf_tail_mp = NULL; 3659 /* 3660 * ipf_end == 0 means that we have given up 3661 * on easy reassembly. 3662 */ 3663 ipf->ipf_end = 0; 3664 3665 /* Forget checksum offload from now on */ 3666 ipf->ipf_checksum_flags = 0; 3667 3668 /* 3669 * ipf_hole_cnt is set by ip_reassemble. 3670 * ipf_count is updated by ip_reassemble. 3671 * No need to check for return value here 3672 * as we don't expect reassembly to complete or 3673 * fail for the first fragment itself. 3674 */ 3675 (void) ip_reassemble(mp, ipf, offset, more_frags, ill, 3676 msg_len); 3677 } 3678 /* Update per ipfb and ill byte counts */ 3679 ipfb->ipfb_count += ipf->ipf_count; 3680 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3681 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count); 3682 /* If the frag timer wasn't already going, start it. */ 3683 mutex_enter(&ill->ill_lock); 3684 ill_frag_timer_start(ill); 3685 mutex_exit(&ill->ill_lock); 3686 goto partial_reass_done; 3687 } 3688 3689 /* 3690 * If the packet's flag has changed (it could be coming up 3691 * from an interface different than the previous, therefore 3692 * possibly different checksum capability), then forget about 3693 * any stored checksum states. Otherwise add the value to 3694 * the existing one stored in the fragment header. 3695 */ 3696 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) { 3697 sum_val += ipf->ipf_checksum; 3698 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3699 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16); 3700 ipf->ipf_checksum = sum_val; 3701 } else if (ipf->ipf_checksum_flags != 0) { 3702 /* Forget checksum offload from now on */ 3703 ipf->ipf_checksum_flags = 0; 3704 } 3705 3706 /* 3707 * We have a new piece of a datagram which is already being 3708 * reassembled. Update the ECN info if all IP fragments 3709 * are ECN capable. If there is one which is not, clear 3710 * all the info. If there is at least one which has CE 3711 * code point, IP needs to report that up to transport. 3712 */ 3713 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) { 3714 if (ecn_info == IPH_ECN_CE) 3715 ipf->ipf_ecn = IPH_ECN_CE; 3716 } else { 3717 ipf->ipf_ecn = IPH_ECN_NECT; 3718 } 3719 3720 if (offset && ipf->ipf_end == offset) { 3721 /* The new fragment fits at the end */ 3722 ipf->ipf_tail_mp->b_cont = mp; 3723 /* Update the byte count */ 3724 ipf->ipf_count += msg_len; 3725 /* Update per ipfb and ill byte counts */ 3726 ipfb->ipfb_count += msg_len; 3727 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3728 atomic_add_32(&ill->ill_frag_count, msg_len); 3729 if (more_frags) { 3730 /* More to come. */ 3731 ipf->ipf_end = end; 3732 ipf->ipf_tail_mp = tail_mp; 3733 goto partial_reass_done; 3734 } 3735 } else { 3736 /* 3737 * Go do the hard cases. 3738 * Call ip_reassemble(). 3739 */ 3740 int ret; 3741 3742 if (offset == 0) { 3743 if (ipf->ipf_prev_nexthdr_offset == 0) { 3744 ipf->ipf_nf_hdr_len = hdr_length; 3745 ipf->ipf_prev_nexthdr_offset = 3746 prev_nexthdr_offset; 3747 } 3748 } 3749 /* Save current byte count */ 3750 count = ipf->ipf_count; 3751 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len); 3752 3753 /* Count of bytes added and subtracted (freeb()ed) */ 3754 count = ipf->ipf_count - count; 3755 if (count) { 3756 /* Update per ipfb and ill byte counts */ 3757 ipfb->ipfb_count += count; 3758 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */ 3759 atomic_add_32(&ill->ill_frag_count, count); 3760 } 3761 if (ret == IP_REASS_PARTIAL) { 3762 goto partial_reass_done; 3763 } else if (ret == IP_REASS_FAILED) { 3764 /* Reassembly failed. Free up all resources */ 3765 ill_frag_free_pkts(ill, ipfb, ipf, 1); 3766 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) { 3767 IP_REASS_SET_START(t_mp, 0); 3768 IP_REASS_SET_END(t_mp, 0); 3769 } 3770 freemsg(mp); 3771 goto partial_reass_done; 3772 } 3773 3774 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */ 3775 } 3776 /* 3777 * We have completed reassembly. Unhook the frag header from 3778 * the reassembly list. 3779 * 3780 * Grab the unfragmentable header length next header value out 3781 * of the first fragment 3782 */ 3783 ASSERT(ipf->ipf_nf_hdr_len != 0); 3784 hdr_length = ipf->ipf_nf_hdr_len; 3785 3786 /* 3787 * Before we free the frag header, record the ECN info 3788 * to report back to the transport. 3789 */ 3790 ecn_info = ipf->ipf_ecn; 3791 3792 /* 3793 * Store the nextheader field in the header preceding the fragment 3794 * header 3795 */ 3796 nexthdr = ipf->ipf_protocol; 3797 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset; 3798 ipfp = ipf->ipf_ptphn; 3799 3800 /* We need to supply these to caller */ 3801 if ((sum_flags = ipf->ipf_checksum_flags) != 0) 3802 sum_val = ipf->ipf_checksum; 3803 else 3804 sum_val = 0; 3805 3806 mp1 = ipf->ipf_mp; 3807 count = ipf->ipf_count; 3808 ipf = ipf->ipf_hash_next; 3809 if (ipf) 3810 ipf->ipf_ptphn = ipfp; 3811 ipfp[0] = ipf; 3812 atomic_add_32(&ill->ill_frag_count, -count); 3813 ASSERT(ipfb->ipfb_count >= count); 3814 ipfb->ipfb_count -= count; 3815 ipfb->ipfb_frag_pkts--; 3816 mutex_exit(&ipfb->ipfb_lock); 3817 /* Ditch the frag header. */ 3818 mp = mp1->b_cont; 3819 freeb(mp1); 3820 3821 /* 3822 * Make sure the packet is good by doing some sanity 3823 * check. If bad we can silentely drop the packet. 3824 */ 3825 reass_done: 3826 if (hdr_length < sizeof (ip6_frag_t)) { 3827 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 3828 ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 3829 ip1dbg(("ip_input_fragment_v6: bad packet\n")); 3830 freemsg(mp); 3831 return (NULL); 3832 } 3833 3834 /* 3835 * Remove the fragment header from the initial header by 3836 * splitting the mblk into the non-fragmentable header and 3837 * everthing after the fragment extension header. This has the 3838 * side effect of putting all the headers that need destination 3839 * processing into the b_cont block-- on return this fact is 3840 * used in order to avoid having to look at the extensions 3841 * already processed. 3842 * 3843 * Note that this code assumes that the unfragmentable portion 3844 * of the header is in the first mblk and increments 3845 * the read pointer past it. If this assumption is broken 3846 * this code fails badly. 3847 */ 3848 if (mp->b_rptr + hdr_length != mp->b_wptr) { 3849 mblk_t *nmp; 3850 3851 if (!(nmp = dupb(mp))) { 3852 ip1dbg(("ip_input_fragment_v6: dupb failed\n")); 3853 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3854 ip_drop_input("ipIfStatsInDiscards", mp, ill); 3855 freemsg(mp); 3856 return (NULL); 3857 } 3858 nmp->b_cont = mp->b_cont; 3859 mp->b_cont = nmp; 3860 nmp->b_rptr += hdr_length; 3861 } 3862 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t); 3863 3864 ip6h = (ip6_t *)mp->b_rptr; 3865 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr; 3866 3867 /* Restore original IP length in header. */ 3868 packet_size = msgdsize(mp); 3869 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN)); 3870 /* Record the ECN info. */ 3871 ip6h->ip6_vcf &= htonl(0xFFCFFFFF); 3872 ip6h->ip6_vcf |= htonl(ecn_info << 20); 3873 3874 /* Update the receive attributes */ 3875 ira->ira_pktlen = packet_size; 3876 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t); 3877 ira->ira_protocol = nexthdr; 3878 3879 /* Reassembly is successful; set checksum information in packet */ 3880 DB_CKSUM16(mp) = (uint16_t)sum_val; 3881 DB_CKSUMFLAGS(mp) = sum_flags; 3882 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length; 3883 3884 return (mp); 3885 } 3886 3887 /* 3888 * Given an mblk and a ptr, find the destination address in an IPv6 routing 3889 * header. 3890 */ 3891 static in6_addr_t 3892 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv) 3893 { 3894 ip6_rthdr0_t *rt0; 3895 int segleft, numaddr; 3896 in6_addr_t *ap, rv = oldrv; 3897 3898 rt0 = (ip6_rthdr0_t *)whereptr; 3899 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) { 3900 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp, 3901 uint8_t *, whereptr); 3902 return (rv); 3903 } 3904 segleft = rt0->ip6r0_segleft; 3905 numaddr = rt0->ip6r0_len / 2; 3906 3907 if ((rt0->ip6r0_len & 0x1) || 3908 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) || 3909 (segleft > rt0->ip6r0_len / 2)) { 3910 /* 3911 * Corrupt packet. Either the routing header length is odd 3912 * (can't happen) or mismatched compared to the packet, or the 3913 * number of addresses is. Return what we can. This will 3914 * only be a problem on forwarded packets that get squeezed 3915 * through an outbound tunnel enforcing IPsec Tunnel Mode. 3916 */ 3917 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *, 3918 whereptr); 3919 return (rv); 3920 } 3921 3922 if (segleft != 0) { 3923 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0)); 3924 rv = ap[numaddr - 1]; 3925 } 3926 3927 return (rv); 3928 } 3929 3930 /* 3931 * Walk through the options to see if there is a routing header. 3932 * If present get the destination which is the last address of 3933 * the option. 3934 * mp needs to be provided in cases when the extension headers might span 3935 * b_cont; mp is never modified by this function. 3936 */ 3937 in6_addr_t 3938 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment) 3939 { 3940 const mblk_t *current_mp = mp; 3941 uint8_t nexthdr; 3942 uint8_t *whereptr; 3943 int ehdrlen; 3944 in6_addr_t rv; 3945 3946 whereptr = (uint8_t *)ip6h; 3947 ehdrlen = sizeof (ip6_t); 3948 3949 /* We assume at least the IPv6 base header is within one mblk. */ 3950 ASSERT(mp == NULL || 3951 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen)); 3952 3953 rv = ip6h->ip6_dst; 3954 nexthdr = ip6h->ip6_nxt; 3955 if (is_fragment != NULL) 3956 *is_fragment = B_FALSE; 3957 3958 /* 3959 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that 3960 * no extension headers will be split across mblks. 3961 */ 3962 3963 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS || 3964 nexthdr == IPPROTO_ROUTING) { 3965 if (nexthdr == IPPROTO_ROUTING) 3966 rv = pluck_out_dst(current_mp, whereptr, rv); 3967 3968 /* 3969 * All IPv6 extension headers have the next-header in byte 3970 * 0, and the (length - 8) in 8-byte-words. 3971 */ 3972 while (current_mp != NULL && 3973 whereptr + ehdrlen >= current_mp->b_wptr) { 3974 ehdrlen -= (current_mp->b_wptr - whereptr); 3975 current_mp = current_mp->b_cont; 3976 if (current_mp == NULL) { 3977 /* Bad packet. Return what we can. */ 3978 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *, 3979 mp, mblk_t *, current_mp, ip6_t *, ip6h); 3980 goto done; 3981 } 3982 whereptr = current_mp->b_rptr; 3983 } 3984 whereptr += ehdrlen; 3985 3986 nexthdr = *whereptr; 3987 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr); 3988 ehdrlen = (*(whereptr + 1) + 1) * 8; 3989 } 3990 3991 done: 3992 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL) 3993 *is_fragment = B_TRUE; 3994 return (rv); 3995 } 3996 3997 /* 3998 * ip_source_routed_v6: 3999 * This function is called by redirect code (called from ip_input_v6) to 4000 * know whether this packet is source routed through this node i.e 4001 * whether this node (router) is part of the journey. This 4002 * function is called under two cases : 4003 * 4004 * case 1 : Routing header was processed by this node and 4005 * ip_process_rthdr replaced ip6_dst with the next hop 4006 * and we are forwarding the packet to the next hop. 4007 * 4008 * case 2 : Routing header was not processed by this node and we 4009 * are just forwarding the packet. 4010 * 4011 * For case (1) we don't want to send redirects. For case(2) we 4012 * want to send redirects. 4013 */ 4014 static boolean_t 4015 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst) 4016 { 4017 uint8_t nexthdr; 4018 in6_addr_t *addrptr; 4019 ip6_rthdr0_t *rthdr; 4020 uint8_t numaddr; 4021 ip6_hbh_t *hbhhdr; 4022 uint_t ehdrlen; 4023 uint8_t *byteptr; 4024 4025 ip2dbg(("ip_source_routed_v6\n")); 4026 nexthdr = ip6h->ip6_nxt; 4027 ehdrlen = IPV6_HDR_LEN; 4028 4029 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */ 4030 while (nexthdr == IPPROTO_HOPOPTS || 4031 nexthdr == IPPROTO_DSTOPTS) { 4032 byteptr = (uint8_t *)ip6h + ehdrlen; 4033 /* 4034 * Check if we have already processed 4035 * packets or we are just a forwarding 4036 * router which only pulled up msgs up 4037 * to IPV6HDR and one HBH ext header 4038 */ 4039 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4040 ip2dbg(("ip_source_routed_v6: Extension" 4041 " headers not processed\n")); 4042 return (B_FALSE); 4043 } 4044 hbhhdr = (ip6_hbh_t *)byteptr; 4045 nexthdr = hbhhdr->ip6h_nxt; 4046 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1); 4047 } 4048 switch (nexthdr) { 4049 case IPPROTO_ROUTING: 4050 byteptr = (uint8_t *)ip6h + ehdrlen; 4051 /* 4052 * If for some reason, we haven't pulled up 4053 * the routing hdr data mblk, then we must 4054 * not have processed it at all. So for sure 4055 * we are not part of the source routed journey. 4056 */ 4057 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) { 4058 ip2dbg(("ip_source_routed_v6: Routing" 4059 " header not processed\n")); 4060 return (B_FALSE); 4061 } 4062 rthdr = (ip6_rthdr0_t *)byteptr; 4063 /* 4064 * Either we are an intermediate router or the 4065 * last hop before destination and we have 4066 * already processed the routing header. 4067 * If segment_left is greater than or equal to zero, 4068 * then we must be the (numaddr - segleft) entry 4069 * of the routing header. Although ip6r0_segleft 4070 * is a unit8_t variable, we still check for zero 4071 * or greater value, if in case the data type 4072 * is changed someday in future. 4073 */ 4074 if (rthdr->ip6r0_segleft > 0 || 4075 rthdr->ip6r0_segleft == 0) { 4076 numaddr = rthdr->ip6r0_len / 2; 4077 addrptr = (in6_addr_t *)((char *)rthdr + 4078 sizeof (*rthdr)); 4079 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1)); 4080 if (addrptr != NULL) { 4081 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL) 4082 return (B_TRUE); 4083 ip1dbg(("ip_source_routed_v6: Not local\n")); 4084 } 4085 } 4086 /* FALLTHRU */ 4087 default: 4088 ip2dbg(("ip_source_routed_v6: Not source routed here\n")); 4089 return (B_FALSE); 4090 } 4091 } 4092 4093 /* 4094 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation. 4095 * We have not optimized this in terms of number of mblks 4096 * allocated. For instance, for each fragment sent we always allocate a 4097 * mblk to hold the IPv6 header and fragment header. 4098 * 4099 * Assumes that all the extension headers are contained in the first mblk 4100 * and that the fragment header has has already been added by calling 4101 * ip_fraghdr_add_v6. 4102 */ 4103 int 4104 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len, 4105 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 4106 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie) 4107 { 4108 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4109 ip6_t *fip6h; 4110 mblk_t *hmp; 4111 mblk_t *hmp0; 4112 mblk_t *dmp; 4113 ip6_frag_t *fraghdr; 4114 size_t unfragmentable_len; 4115 size_t mlen; 4116 size_t max_chunk; 4117 uint16_t off_flags; 4118 uint16_t offset = 0; 4119 ill_t *ill = nce->nce_ill; 4120 uint8_t nexthdr; 4121 uint8_t *ptr; 4122 ip_stack_t *ipst = ill->ill_ipst; 4123 uint_t priority = mp->b_band; 4124 int error = 0; 4125 4126 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds); 4127 if (max_frag == 0) { 4128 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4129 ip_drop_output("FragFails: zero max_frag", mp, ill); 4130 freemsg(mp); 4131 return (EINVAL); 4132 } 4133 4134 /* 4135 * Caller should have added fraghdr_t to pkt_len, and also 4136 * updated ip6_plen. 4137 */ 4138 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len); 4139 ASSERT(msgdsize(mp) == pkt_len); 4140 4141 /* 4142 * Determine the length of the unfragmentable portion of this 4143 * datagram. This consists of the IPv6 header, a potential 4144 * hop-by-hop options header, a potential pre-routing-header 4145 * destination options header, and a potential routing header. 4146 */ 4147 nexthdr = ip6h->ip6_nxt; 4148 ptr = (uint8_t *)&ip6h[1]; 4149 4150 if (nexthdr == IPPROTO_HOPOPTS) { 4151 ip6_hbh_t *hbh_hdr; 4152 uint_t hdr_len; 4153 4154 hbh_hdr = (ip6_hbh_t *)ptr; 4155 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4156 nexthdr = hbh_hdr->ip6h_nxt; 4157 ptr += hdr_len; 4158 } 4159 if (nexthdr == IPPROTO_DSTOPTS) { 4160 ip6_dest_t *dest_hdr; 4161 uint_t hdr_len; 4162 4163 dest_hdr = (ip6_dest_t *)ptr; 4164 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4165 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4166 nexthdr = dest_hdr->ip6d_nxt; 4167 ptr += hdr_len; 4168 } 4169 } 4170 if (nexthdr == IPPROTO_ROUTING) { 4171 ip6_rthdr_t *rthdr; 4172 uint_t hdr_len; 4173 4174 rthdr = (ip6_rthdr_t *)ptr; 4175 nexthdr = rthdr->ip6r_nxt; 4176 hdr_len = 8 * (rthdr->ip6r_len + 1); 4177 ptr += hdr_len; 4178 } 4179 if (nexthdr != IPPROTO_FRAGMENT) { 4180 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4181 ip_drop_output("FragFails: bad nexthdr", mp, ill); 4182 freemsg(mp); 4183 return (EINVAL); 4184 } 4185 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4186 unfragmentable_len += sizeof (ip6_frag_t); 4187 4188 max_chunk = (max_frag - unfragmentable_len) & ~7; 4189 4190 /* 4191 * Allocate an mblk with enough room for the link-layer 4192 * header and the unfragmentable part of the datagram, which includes 4193 * the fragment header. This (or a copy) will be used as the 4194 * first mblk for each fragment we send. 4195 */ 4196 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp); 4197 if (hmp == NULL) { 4198 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4199 ip_drop_output("FragFails: no hmp", mp, ill); 4200 freemsg(mp); 4201 return (ENOBUFS); 4202 } 4203 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4204 hmp->b_wptr = hmp->b_rptr + unfragmentable_len; 4205 4206 fip6h = (ip6_t *)hmp->b_rptr; 4207 bcopy(ip6h, fip6h, unfragmentable_len); 4208 4209 /* 4210 * pkt_len is set to the total length of the fragmentable data in this 4211 * datagram. For each fragment sent, we will decrement pkt_len 4212 * by the amount of fragmentable data sent in that fragment 4213 * until len reaches zero. 4214 */ 4215 pkt_len -= unfragmentable_len; 4216 4217 /* 4218 * Move read ptr past unfragmentable portion, we don't want this part 4219 * of the data in our fragments. 4220 */ 4221 mp->b_rptr += unfragmentable_len; 4222 if (mp->b_rptr == mp->b_wptr) { 4223 mblk_t *mp1 = mp->b_cont; 4224 freeb(mp); 4225 mp = mp1; 4226 } 4227 4228 while (pkt_len != 0) { 4229 mlen = MIN(pkt_len, max_chunk); 4230 pkt_len -= mlen; 4231 if (pkt_len != 0) { 4232 /* Not last */ 4233 hmp0 = copyb(hmp); 4234 if (hmp0 == NULL) { 4235 BUMP_MIB(ill->ill_ip_mib, 4236 ipIfStatsOutFragFails); 4237 ip_drop_output("FragFails: copyb failed", 4238 mp, ill); 4239 freeb(hmp); 4240 freemsg(mp); 4241 ip1dbg(("ip_fragment_v6: copyb failed\n")); 4242 return (ENOBUFS); 4243 } 4244 off_flags = IP6F_MORE_FRAG; 4245 } else { 4246 /* Last fragment */ 4247 hmp0 = hmp; 4248 hmp = NULL; 4249 off_flags = 0; 4250 } 4251 fip6h = (ip6_t *)(hmp0->b_rptr); 4252 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len - 4253 sizeof (ip6_frag_t)); 4254 4255 fip6h->ip6_plen = htons((uint16_t)(mlen + 4256 unfragmentable_len - IPV6_HDR_LEN)); 4257 /* 4258 * Note: Optimization alert. 4259 * In IPv6 (and IPv4) protocol header, Fragment Offset 4260 * ("offset") is 13 bits wide and in 8-octet units. 4261 * In IPv6 protocol header (unlike IPv4) in a 16 bit field, 4262 * it occupies the most significant 13 bits. 4263 * (least significant 13 bits in IPv4). 4264 * We do not do any shifts here. Not shifting is same effect 4265 * as taking offset value in octet units, dividing by 8 and 4266 * then shifting 3 bits left to line it up in place in proper 4267 * place protocol header. 4268 */ 4269 fraghdr->ip6f_offlg = htons(offset) | off_flags; 4270 4271 if (!(dmp = ip_carve_mp(&mp, mlen))) { 4272 /* mp has already been freed by ip_carve_mp() */ 4273 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4274 ip_drop_output("FragFails: could not carve mp", 4275 hmp0, ill); 4276 if (hmp != NULL) 4277 freeb(hmp); 4278 freeb(hmp0); 4279 ip1dbg(("ip_carve_mp: failed\n")); 4280 return (ENOBUFS); 4281 } 4282 hmp0->b_cont = dmp; 4283 /* Get the priority marking, if any */ 4284 hmp0->b_band = priority; 4285 4286 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates); 4287 4288 error = postfragfn(hmp0, nce, ixaflags, 4289 mlen + unfragmentable_len, xmit_hint, szone, nolzid, 4290 ixa_cookie); 4291 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) { 4292 /* No point in sending the other fragments */ 4293 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails); 4294 ip_drop_output("FragFails: postfragfn failed", 4295 hmp, ill); 4296 freeb(hmp); 4297 freemsg(mp); 4298 return (error); 4299 } 4300 /* No need to redo state machine in loop */ 4301 ixaflags &= ~IXAF_REACH_CONF; 4302 4303 offset += mlen; 4304 } 4305 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs); 4306 return (error); 4307 } 4308 4309 /* 4310 * Add a fragment header to an IPv6 packet. 4311 * Assumes that all the extension headers are contained in the first mblk. 4312 * 4313 * The fragment header is inserted after an hop-by-hop options header 4314 * and after [an optional destinations header followed by] a routing header. 4315 */ 4316 mblk_t * 4317 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa) 4318 { 4319 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 4320 ip6_t *fip6h; 4321 mblk_t *hmp; 4322 ip6_frag_t *fraghdr; 4323 size_t unfragmentable_len; 4324 uint8_t nexthdr; 4325 uint_t prev_nexthdr_offset; 4326 uint8_t *ptr; 4327 uint_t priority = mp->b_band; 4328 ip_stack_t *ipst = ixa->ixa_ipst; 4329 4330 /* 4331 * Determine the length of the unfragmentable portion of this 4332 * datagram. This consists of the IPv6 header, a potential 4333 * hop-by-hop options header, a potential pre-routing-header 4334 * destination options header, and a potential routing header. 4335 */ 4336 nexthdr = ip6h->ip6_nxt; 4337 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h; 4338 ptr = (uint8_t *)&ip6h[1]; 4339 4340 if (nexthdr == IPPROTO_HOPOPTS) { 4341 ip6_hbh_t *hbh_hdr; 4342 uint_t hdr_len; 4343 4344 hbh_hdr = (ip6_hbh_t *)ptr; 4345 hdr_len = 8 * (hbh_hdr->ip6h_len + 1); 4346 nexthdr = hbh_hdr->ip6h_nxt; 4347 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt 4348 - (uint8_t *)ip6h; 4349 ptr += hdr_len; 4350 } 4351 if (nexthdr == IPPROTO_DSTOPTS) { 4352 ip6_dest_t *dest_hdr; 4353 uint_t hdr_len; 4354 4355 dest_hdr = (ip6_dest_t *)ptr; 4356 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) { 4357 hdr_len = 8 * (dest_hdr->ip6d_len + 1); 4358 nexthdr = dest_hdr->ip6d_nxt; 4359 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt 4360 - (uint8_t *)ip6h; 4361 ptr += hdr_len; 4362 } 4363 } 4364 if (nexthdr == IPPROTO_ROUTING) { 4365 ip6_rthdr_t *rthdr; 4366 uint_t hdr_len; 4367 4368 rthdr = (ip6_rthdr_t *)ptr; 4369 nexthdr = rthdr->ip6r_nxt; 4370 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt 4371 - (uint8_t *)ip6h; 4372 hdr_len = 8 * (rthdr->ip6r_len + 1); 4373 ptr += hdr_len; 4374 } 4375 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h); 4376 4377 /* 4378 * Allocate an mblk with enough room for the link-layer 4379 * header, the unfragmentable part of the datagram, and the 4380 * fragment header. 4381 */ 4382 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) + 4383 ipst->ips_ip_wroff_extra, mp); 4384 if (hmp == NULL) { 4385 ill_t *ill = ixa->ixa_nce->nce_ill; 4386 4387 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 4388 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill); 4389 freemsg(mp); 4390 return (NULL); 4391 } 4392 hmp->b_rptr += ipst->ips_ip_wroff_extra; 4393 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t); 4394 4395 fip6h = (ip6_t *)hmp->b_rptr; 4396 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len); 4397 4398 bcopy(ip6h, fip6h, unfragmentable_len); 4399 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t)); 4400 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT; 4401 4402 fraghdr->ip6f_nxt = nexthdr; 4403 fraghdr->ip6f_reserved = 0; 4404 fraghdr->ip6f_offlg = 0; 4405 fraghdr->ip6f_ident = htonl(ident); 4406 4407 /* Get the priority marking, if any */ 4408 hmp->b_band = priority; 4409 4410 /* 4411 * Move read ptr past unfragmentable portion, we don't want this part 4412 * of the data in our fragments. 4413 */ 4414 mp->b_rptr += unfragmentable_len; 4415 hmp->b_cont = mp; 4416 return (hmp); 4417 } 4418 4419 /* 4420 * Determine if the ill and multicast aspects of that packets 4421 * "matches" the conn. 4422 */ 4423 boolean_t 4424 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h) 4425 { 4426 ill_t *ill = ira->ira_rill; 4427 zoneid_t zoneid = ira->ira_zoneid; 4428 uint_t in_ifindex; 4429 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst; 4430 in6_addr_t *v6src_ptr = &ip6h->ip6_src; 4431 4432 /* 4433 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local 4434 * scopeid. This is used to limit 4435 * unicast and multicast reception to conn_incoming_ifindex. 4436 * conn_wantpacket_v6 is called both for unicast and 4437 * multicast packets. 4438 */ 4439 in_ifindex = connp->conn_incoming_ifindex; 4440 4441 /* mpathd can bind to the under IPMP interface, which we allow */ 4442 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) { 4443 if (!IS_UNDER_IPMP(ill)) 4444 return (B_FALSE); 4445 4446 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill)) 4447 return (B_FALSE); 4448 } 4449 4450 if (!IPCL_ZONE_MATCH(connp, zoneid)) 4451 return (B_FALSE); 4452 4453 if (!(ira->ira_flags & IRAF_MULTICAST)) 4454 return (B_TRUE); 4455 4456 if (connp->conn_multi_router) 4457 return (B_TRUE); 4458 4459 if (ira->ira_protocol == IPPROTO_RSVP) 4460 return (B_TRUE); 4461 4462 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr, 4463 ira->ira_ill)); 4464 } 4465 4466 /* 4467 * pr_addr_dbg function provides the needed buffer space to call 4468 * inet_ntop() function's 3rd argument. This function should be 4469 * used by any kernel routine which wants to save INET6_ADDRSTRLEN 4470 * stack buffer space in it's own stack frame. This function uses 4471 * a buffer from it's own stack and prints the information. 4472 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr) 4473 * 4474 * Note: This function can call inet_ntop() once. 4475 */ 4476 void 4477 pr_addr_dbg(char *fmt1, int af, const void *addr) 4478 { 4479 char buf[INET6_ADDRSTRLEN]; 4480 4481 if (fmt1 == NULL) { 4482 ip0dbg(("pr_addr_dbg: Wrong arguments\n")); 4483 return; 4484 } 4485 4486 /* 4487 * This does not compare debug level and just prints 4488 * out. Thus it is the responsibility of the caller 4489 * to check the appropriate debug-level before calling 4490 * this function. 4491 */ 4492 if (ip_debug > 0) { 4493 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf))); 4494 } 4495 4496 4497 } 4498 4499 4500 /* 4501 * Return the length in bytes of the IPv6 headers (base header 4502 * extension headers) that will be needed based on the 4503 * ip_pkt_t structure passed by the caller. 4504 * 4505 * The returned length does not include the length of the upper level 4506 * protocol (ULP) header. 4507 */ 4508 int 4509 ip_total_hdrs_len_v6(const ip_pkt_t *ipp) 4510 { 4511 int len; 4512 4513 len = IPV6_HDR_LEN; 4514 4515 /* 4516 * If there's a security label here, then we ignore any hop-by-hop 4517 * options the user may try to set. 4518 */ 4519 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4520 uint_t hopoptslen; 4521 /* 4522 * Note that ipp_label_len_v6 is just the option - not 4523 * the hopopts extension header. It also needs to be padded 4524 * to a multiple of 8 bytes. 4525 */ 4526 ASSERT(ipp->ipp_label_len_v6 != 0); 4527 hopoptslen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4528 hopoptslen = (hopoptslen + 7)/8 * 8; 4529 len += hopoptslen; 4530 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4531 ASSERT(ipp->ipp_hopoptslen != 0); 4532 len += ipp->ipp_hopoptslen; 4533 } 4534 4535 /* 4536 * En-route destination options 4537 * Only do them if there's a routing header as well 4538 */ 4539 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4540 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4541 ASSERT(ipp->ipp_rthdrdstoptslen != 0); 4542 len += ipp->ipp_rthdrdstoptslen; 4543 } 4544 if (ipp->ipp_fields & IPPF_RTHDR) { 4545 ASSERT(ipp->ipp_rthdrlen != 0); 4546 len += ipp->ipp_rthdrlen; 4547 } 4548 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4549 ASSERT(ipp->ipp_dstoptslen != 0); 4550 len += ipp->ipp_dstoptslen; 4551 } 4552 return (len); 4553 } 4554 4555 /* 4556 * All-purpose routine to build a header chain of an IPv6 header 4557 * followed by any required extension headers and a proto header. 4558 * 4559 * The caller has to set the source and destination address as well as 4560 * ip6_plen. The caller has to massage any routing header and compensate 4561 * for the ULP pseudo-header checksum due to the source route. 4562 * 4563 * The extension headers will all be fully filled in. 4564 */ 4565 void 4566 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp, 4567 uint8_t protocol, uint32_t flowinfo) 4568 { 4569 uint8_t *nxthdr_ptr; 4570 uint8_t *cp; 4571 ip6_t *ip6h = (ip6_t *)buf; 4572 4573 /* Initialize IPv6 header */ 4574 ip6h->ip6_vcf = 4575 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4576 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4577 4578 if (ipp->ipp_fields & IPPF_TCLASS) { 4579 /* Overrides the class part of flowinfo */ 4580 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4581 ipp->ipp_tclass); 4582 } 4583 4584 if (ipp->ipp_fields & IPPF_HOPLIMIT) 4585 ip6h->ip6_hops = ipp->ipp_hoplimit; 4586 else 4587 ip6h->ip6_hops = ipp->ipp_unicast_hops; 4588 4589 if ((ipp->ipp_fields & IPPF_ADDR) && 4590 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4591 ip6h->ip6_src = ipp->ipp_addr; 4592 4593 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4594 cp = (uint8_t *)&ip6h[1]; 4595 /* 4596 * Here's where we have to start stringing together 4597 * any extension headers in the right order: 4598 * Hop-by-hop, destination, routing, and final destination opts. 4599 */ 4600 /* 4601 * If there's a security label here, then we ignore any hop-by-hop 4602 * options the user may try to set. 4603 */ 4604 if (ipp->ipp_fields & IPPF_LABEL_V6) { 4605 /* 4606 * Hop-by-hop options with the label. 4607 * Note that ipp_label_v6 is just the option - not 4608 * the hopopts extension header. It also needs to be padded 4609 * to a multiple of 8 bytes. 4610 */ 4611 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4612 uint_t hopoptslen; 4613 uint_t padlen; 4614 4615 padlen = ipp->ipp_label_len_v6 + sizeof (ip6_hbh_t); 4616 hopoptslen = (padlen + 7)/8 * 8; 4617 padlen = hopoptslen - padlen; 4618 4619 *nxthdr_ptr = IPPROTO_HOPOPTS; 4620 nxthdr_ptr = &hbh->ip6h_nxt; 4621 hbh->ip6h_len = hopoptslen/8 - 1; 4622 cp += sizeof (ip6_hbh_t); 4623 bcopy(ipp->ipp_label_v6, cp, ipp->ipp_label_len_v6); 4624 cp += ipp->ipp_label_len_v6; 4625 4626 ASSERT(padlen <= 7); 4627 switch (padlen) { 4628 case 0: 4629 break; 4630 case 1: 4631 cp[0] = IP6OPT_PAD1; 4632 break; 4633 default: 4634 cp[0] = IP6OPT_PADN; 4635 cp[1] = padlen - 2; 4636 bzero(&cp[2], padlen - 2); 4637 break; 4638 } 4639 cp += padlen; 4640 } else if (ipp->ipp_fields & IPPF_HOPOPTS) { 4641 /* Hop-by-hop options */ 4642 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4643 4644 *nxthdr_ptr = IPPROTO_HOPOPTS; 4645 nxthdr_ptr = &hbh->ip6h_nxt; 4646 4647 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen); 4648 cp += ipp->ipp_hopoptslen; 4649 } 4650 /* 4651 * En-route destination options 4652 * Only do them if there's a routing header as well 4653 */ 4654 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) == 4655 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) { 4656 ip6_dest_t *dst = (ip6_dest_t *)cp; 4657 4658 *nxthdr_ptr = IPPROTO_DSTOPTS; 4659 nxthdr_ptr = &dst->ip6d_nxt; 4660 4661 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen); 4662 cp += ipp->ipp_rthdrdstoptslen; 4663 } 4664 /* 4665 * Routing header next 4666 */ 4667 if (ipp->ipp_fields & IPPF_RTHDR) { 4668 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4669 4670 *nxthdr_ptr = IPPROTO_ROUTING; 4671 nxthdr_ptr = &rt->ip6r_nxt; 4672 4673 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen); 4674 cp += ipp->ipp_rthdrlen; 4675 } 4676 /* 4677 * Do ultimate destination options 4678 */ 4679 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4680 ip6_dest_t *dest = (ip6_dest_t *)cp; 4681 4682 *nxthdr_ptr = IPPROTO_DSTOPTS; 4683 nxthdr_ptr = &dest->ip6d_nxt; 4684 4685 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen); 4686 cp += ipp->ipp_dstoptslen; 4687 } 4688 /* 4689 * Now set the last header pointer to the proto passed in 4690 */ 4691 *nxthdr_ptr = protocol; 4692 ASSERT((int)(cp - buf) == buf_len); 4693 } 4694 4695 /* 4696 * Return a pointer to the routing header extension header 4697 * in the IPv6 header(s) chain passed in. 4698 * If none found, return NULL 4699 * Assumes that all extension headers are in same mblk as the v6 header 4700 */ 4701 ip6_rthdr_t * 4702 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr) 4703 { 4704 ip6_dest_t *desthdr; 4705 ip6_frag_t *fraghdr; 4706 uint_t hdrlen; 4707 uint8_t nexthdr; 4708 uint8_t *ptr = (uint8_t *)&ip6h[1]; 4709 4710 if (ip6h->ip6_nxt == IPPROTO_ROUTING) 4711 return ((ip6_rthdr_t *)ptr); 4712 4713 /* 4714 * The routing header will precede all extension headers 4715 * other than the hop-by-hop and destination options 4716 * extension headers, so if we see anything other than those, 4717 * we're done and didn't find it. 4718 * We could see a destination options header alone but no 4719 * routing header, in which case we'll return NULL as soon as 4720 * we see anything after that. 4721 * Hop-by-hop and destination option headers are identical, 4722 * so we can use either one we want as a template. 4723 */ 4724 nexthdr = ip6h->ip6_nxt; 4725 while (ptr < endptr) { 4726 /* Is there enough left for len + nexthdr? */ 4727 if (ptr + MIN_EHDR_LEN > endptr) 4728 return (NULL); 4729 4730 switch (nexthdr) { 4731 case IPPROTO_HOPOPTS: 4732 case IPPROTO_DSTOPTS: 4733 /* Assumes the headers are identical for hbh and dst */ 4734 desthdr = (ip6_dest_t *)ptr; 4735 hdrlen = 8 * (desthdr->ip6d_len + 1); 4736 nexthdr = desthdr->ip6d_nxt; 4737 break; 4738 4739 case IPPROTO_ROUTING: 4740 return ((ip6_rthdr_t *)ptr); 4741 4742 case IPPROTO_FRAGMENT: 4743 fraghdr = (ip6_frag_t *)ptr; 4744 hdrlen = sizeof (ip6_frag_t); 4745 nexthdr = fraghdr->ip6f_nxt; 4746 break; 4747 4748 default: 4749 return (NULL); 4750 } 4751 ptr += hdrlen; 4752 } 4753 return (NULL); 4754 } 4755 4756 /* 4757 * Called for source-routed packets originating on this node. 4758 * Manipulates the original routing header by moving every entry up 4759 * one slot, placing the first entry in the v6 header's v6_dst field, 4760 * and placing the ultimate destination in the routing header's last 4761 * slot. 4762 * 4763 * Returns the checksum diference between the ultimate destination 4764 * (last hop in the routing header when the packet is sent) and 4765 * the first hop (ip6_dst when the packet is sent) 4766 */ 4767 /* ARGSUSED2 */ 4768 uint32_t 4769 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns) 4770 { 4771 uint_t numaddr; 4772 uint_t i; 4773 in6_addr_t *addrptr; 4774 in6_addr_t tmp; 4775 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth; 4776 uint32_t cksm; 4777 uint32_t addrsum = 0; 4778 uint16_t *ptr; 4779 4780 /* 4781 * Perform any processing needed for source routing. 4782 * We know that all extension headers will be in the same mblk 4783 * as the IPv6 header. 4784 */ 4785 4786 /* 4787 * If no segments left in header, or the header length field is zero, 4788 * don't move hop addresses around; 4789 * Checksum difference is zero. 4790 */ 4791 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0)) 4792 return (0); 4793 4794 ptr = (uint16_t *)&ip6h->ip6_dst; 4795 cksm = 0; 4796 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4797 cksm += ptr[i]; 4798 } 4799 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4800 4801 /* 4802 * Here's where the fun begins - we have to 4803 * move all addresses up one spot, take the 4804 * first hop and make it our first ip6_dst, 4805 * and place the ultimate destination in the 4806 * newly-opened last slot. 4807 */ 4808 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr)); 4809 numaddr = rthdr->ip6r0_len / 2; 4810 tmp = *addrptr; 4811 for (i = 0; i < (numaddr - 1); addrptr++, i++) { 4812 *addrptr = addrptr[1]; 4813 } 4814 *addrptr = ip6h->ip6_dst; 4815 ip6h->ip6_dst = tmp; 4816 4817 /* 4818 * From the checksummed ultimate destination subtract the checksummed 4819 * current ip6_dst (the first hop address). Return that number. 4820 * (In the v4 case, the second part of this is done in each routine 4821 * that calls ip_massage_options(). We do it all in this one place 4822 * for v6). 4823 */ 4824 ptr = (uint16_t *)&ip6h->ip6_dst; 4825 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) { 4826 addrsum += ptr[i]; 4827 } 4828 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF)); 4829 if ((int)cksm < 0) 4830 cksm--; 4831 cksm = (cksm & 0xFFFF) + (cksm >> 16); 4832 4833 return (cksm); 4834 } 4835 4836 void 4837 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp) 4838 { 4839 kstat_t *ksp; 4840 4841 ip6_stat_t template = { 4842 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 }, 4843 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 }, 4844 { "ip6_recv_pullup", KSTAT_DATA_UINT64 }, 4845 { "ip6_db_ref", KSTAT_DATA_UINT64 }, 4846 { "ip6_notaligned", KSTAT_DATA_UINT64 }, 4847 { "ip6_multimblk", KSTAT_DATA_UINT64 }, 4848 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 }, 4849 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 }, 4850 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 4851 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 }, 4852 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4853 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4854 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4855 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 }, 4856 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 }, 4857 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 }, 4858 }; 4859 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net", 4860 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4861 KSTAT_FLAG_VIRTUAL, stackid); 4862 4863 if (ksp == NULL) 4864 return (NULL); 4865 4866 bcopy(&template, ip6_statisticsp, sizeof (template)); 4867 ksp->ks_data = (void *)ip6_statisticsp; 4868 ksp->ks_private = (void *)(uintptr_t)stackid; 4869 4870 kstat_install(ksp); 4871 return (ksp); 4872 } 4873 4874 void 4875 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4876 { 4877 if (ksp != NULL) { 4878 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4879 kstat_delete_netstack(ksp, stackid); 4880 } 4881 } 4882 4883 /* 4884 * The following two functions set and get the value for the 4885 * IPV6_SRC_PREFERENCES socket option. 4886 */ 4887 int 4888 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs) 4889 { 4890 /* 4891 * We only support preferences that are covered by 4892 * IPV6_PREFER_SRC_MASK. 4893 */ 4894 if (prefs & ~IPV6_PREFER_SRC_MASK) 4895 return (EINVAL); 4896 4897 /* 4898 * Look for conflicting preferences or default preferences. If 4899 * both bits of a related pair are clear, the application wants the 4900 * system's default value for that pair. Both bits in a pair can't 4901 * be set. 4902 */ 4903 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) { 4904 prefs |= IPV6_PREFER_SRC_MIPDEFAULT; 4905 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 4906 IPV6_PREFER_SRC_MIPMASK) { 4907 return (EINVAL); 4908 } 4909 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) { 4910 prefs |= IPV6_PREFER_SRC_TMPDEFAULT; 4911 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 4912 IPV6_PREFER_SRC_TMPMASK) { 4913 return (EINVAL); 4914 } 4915 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) { 4916 prefs |= IPV6_PREFER_SRC_CGADEFAULT; 4917 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 4918 IPV6_PREFER_SRC_CGAMASK) { 4919 return (EINVAL); 4920 } 4921 4922 ixa->ixa_src_preferences = prefs; 4923 return (0); 4924 } 4925 4926 size_t 4927 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val) 4928 { 4929 *val = ixa->ixa_src_preferences; 4930 return (sizeof (ixa->ixa_src_preferences)); 4931 } 4932 4933 /* 4934 * Get the size of the IP options (including the IP headers size) 4935 * without including the AH header's size. If till_ah is B_FALSE, 4936 * and if AH header is present, dest options beyond AH header will 4937 * also be included in the returned size. 4938 */ 4939 int 4940 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah) 4941 { 4942 ip6_t *ip6h; 4943 uint8_t nexthdr; 4944 uint8_t *whereptr; 4945 ip6_hbh_t *hbhhdr; 4946 ip6_dest_t *dsthdr; 4947 ip6_rthdr_t *rthdr; 4948 int ehdrlen; 4949 int size; 4950 ah_t *ah; 4951 4952 ip6h = (ip6_t *)mp->b_rptr; 4953 size = IPV6_HDR_LEN; 4954 nexthdr = ip6h->ip6_nxt; 4955 whereptr = (uint8_t *)&ip6h[1]; 4956 for (;;) { 4957 /* Assume IP has already stripped it */ 4958 ASSERT(nexthdr != IPPROTO_FRAGMENT); 4959 switch (nexthdr) { 4960 case IPPROTO_HOPOPTS: 4961 hbhhdr = (ip6_hbh_t *)whereptr; 4962 nexthdr = hbhhdr->ip6h_nxt; 4963 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 4964 break; 4965 case IPPROTO_DSTOPTS: 4966 dsthdr = (ip6_dest_t *)whereptr; 4967 nexthdr = dsthdr->ip6d_nxt; 4968 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 4969 break; 4970 case IPPROTO_ROUTING: 4971 rthdr = (ip6_rthdr_t *)whereptr; 4972 nexthdr = rthdr->ip6r_nxt; 4973 ehdrlen = 8 * (rthdr->ip6r_len + 1); 4974 break; 4975 default : 4976 if (till_ah) { 4977 ASSERT(nexthdr == IPPROTO_AH); 4978 return (size); 4979 } 4980 /* 4981 * If we don't have a AH header to traverse, 4982 * return now. This happens normally for 4983 * outbound datagrams where we have not inserted 4984 * the AH header. 4985 */ 4986 if (nexthdr != IPPROTO_AH) { 4987 return (size); 4988 } 4989 4990 /* 4991 * We don't include the AH header's size 4992 * to be symmetrical with other cases where 4993 * we either don't have a AH header (outbound) 4994 * or peek into the AH header yet (inbound and 4995 * not pulled up yet). 4996 */ 4997 ah = (ah_t *)whereptr; 4998 nexthdr = ah->ah_nexthdr; 4999 ehdrlen = (ah->ah_length << 2) + 8; 5000 5001 if (nexthdr == IPPROTO_DSTOPTS) { 5002 if (whereptr + ehdrlen >= mp->b_wptr) { 5003 /* 5004 * The destination options header 5005 * is not part of the first mblk. 5006 */ 5007 whereptr = mp->b_cont->b_rptr; 5008 } else { 5009 whereptr += ehdrlen; 5010 } 5011 5012 dsthdr = (ip6_dest_t *)whereptr; 5013 ehdrlen = 8 * (dsthdr->ip6d_len + 1); 5014 size += ehdrlen; 5015 } 5016 return (size); 5017 } 5018 whereptr += ehdrlen; 5019 size += ehdrlen; 5020 } 5021 } 5022 5023 /* 5024 * Utility routine that checks if `v6srcp' is a valid address on underlying 5025 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif 5026 * associated with `v6srcp' on success. NOTE: if this is not called from 5027 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the 5028 * group during or after this lookup. 5029 */ 5030 boolean_t 5031 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp) 5032 { 5033 ipif_t *ipif; 5034 5035 5036 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst); 5037 if (ipif != NULL) { 5038 if (ipifp != NULL) 5039 *ipifp = ipif; 5040 else 5041 ipif_refrele(ipif); 5042 return (B_TRUE); 5043 } 5044 5045 if (ip_debug > 2) { 5046 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for " 5047 "src %s\n", AF_INET6, v6srcp); 5048 } 5049 return (B_FALSE); 5050 }