1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  25  */
  26 /* Copyright (c) 1990 Mentat Inc. */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #include <sys/strsun.h>
  31 #define _SUN_TPI_VERSION 2
  32 #include <sys/tihdr.h>
  33 #include <sys/xti_inet.h>
  34 #include <sys/ucred.h>
  35 #include <sys/zone.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/cmn_err.h>
  39 #include <sys/debug.h>
  40 #include <sys/atomic.h>
  41 #include <sys/policy.h>
  42 
  43 #include <sys/systm.h>
  44 #include <sys/param.h>
  45 #include <sys/kmem.h>
  46 #include <sys/sdt.h>
  47 #include <sys/socket.h>
  48 #include <sys/ethernet.h>
  49 #include <sys/mac.h>
  50 #include <net/if.h>
  51 #include <net/if_types.h>
  52 #include <net/if_arp.h>
  53 #include <net/route.h>
  54 #include <sys/sockio.h>
  55 #include <netinet/in.h>
  56 #include <net/if_dl.h>
  57 
  58 #include <inet/common.h>
  59 #include <inet/mi.h>
  60 #include <inet/mib2.h>
  61 #include <inet/nd.h>
  62 #include <inet/arp.h>
  63 #include <inet/snmpcom.h>
  64 #include <inet/kstatcom.h>
  65 
  66 #include <netinet/igmp_var.h>
  67 #include <netinet/ip6.h>
  68 #include <netinet/icmp6.h>
  69 #include <netinet/sctp.h>
  70 
  71 #include <inet/ip.h>
  72 #include <inet/ip_impl.h>
  73 #include <inet/ip6.h>
  74 #include <inet/ip6_asp.h>
  75 #include <inet/tcp.h>
  76 #include <inet/ip_multi.h>
  77 #include <inet/ip_if.h>
  78 #include <inet/ip_ire.h>
  79 #include <inet/ip_ftable.h>
  80 #include <inet/ip_rts.h>
  81 #include <inet/optcom.h>
  82 #include <inet/ip_ndp.h>
  83 #include <inet/ip_listutils.h>
  84 #include <netinet/igmp.h>
  85 #include <netinet/ip_mroute.h>
  86 #include <netinet/udp.h>
  87 #include <inet/ipp_common.h>
  88 
  89 #include <net/pfkeyv2.h>
  90 #include <inet/sadb.h>
  91 #include <inet/ipsec_impl.h>
  92 #include <inet/ipdrop.h>
  93 #include <inet/ip_netinfo.h>
  94 
  95 #include <inet/ipclassifier.h>
  96 #include <inet/sctp_ip.h>
  97 #include <inet/sctp/sctp_impl.h>
  98 #include <inet/udp_impl.h>
  99 #include <sys/sunddi.h>
 100 
 101 #include <sys/tsol/label.h>
 102 #include <sys/tsol/tnet.h>
 103 
 104 /*
 105  * Return how much size is needed for the different ancillary data items
 106  */
 107 uint_t
 108 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
 109     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
 110 {
 111         uint_t          ancil_size;
 112         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
 113 
 114         /*
 115          * If IP_RECVDSTADDR is set we include the destination IP
 116          * address as an option. With IP_RECVOPTS we include all
 117          * the IP options.
 118          */
 119         ancil_size = 0;
 120         if (recv_ancillary.crb_recvdstaddr &&
 121             (ira->ira_flags & IRAF_IS_IPV4)) {
 122                 ancil_size += sizeof (struct T_opthdr) +
 123                     sizeof (struct in_addr);
 124                 IP_STAT(ipst, conn_in_recvdstaddr);
 125         }
 126 
 127         /*
 128          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 129          * are different
 130          */
 131         if (recv_ancillary.crb_ip_recvpktinfo &&
 132             connp->conn_family == AF_INET) {
 133                 ancil_size += sizeof (struct T_opthdr) +
 134                     sizeof (struct in_pktinfo);
 135                 IP_STAT(ipst, conn_in_recvpktinfo);
 136         }
 137 
 138         if ((recv_ancillary.crb_recvopts) &&
 139             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 140                 ancil_size += sizeof (struct T_opthdr) +
 141                     ipp->ipp_ipv4_options_len;
 142                 IP_STAT(ipst, conn_in_recvopts);
 143         }
 144 
 145         if (recv_ancillary.crb_recvslla) {
 146                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 147                 ill_t *ill;
 148 
 149                 /* Make sure ira_l2src is setup if not already */
 150                 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
 151                         ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
 152                             ipst);
 153                         if (ill != NULL) {
 154                                 ip_setl2src(mp, ira, ill);
 155                                 ill_refrele(ill);
 156                         }
 157                 }
 158                 ancil_size += sizeof (struct T_opthdr) +
 159                     sizeof (struct sockaddr_dl);
 160                 IP_STAT(ipst, conn_in_recvslla);
 161         }
 162 
 163         if (recv_ancillary.crb_recvif) {
 164                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
 165                 IP_STAT(ipst, conn_in_recvif);
 166         }
 167 
 168         /*
 169          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 170          * are different
 171          */
 172         if (recv_ancillary.crb_ip_recvpktinfo &&
 173             connp->conn_family == AF_INET6) {
 174                 ancil_size += sizeof (struct T_opthdr) +
 175                     sizeof (struct in6_pktinfo);
 176                 IP_STAT(ipst, conn_in_recvpktinfo);
 177         }
 178 
 179         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 180                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 181                 IP_STAT(ipst, conn_in_recvhoplimit);
 182         }
 183 
 184         if (recv_ancillary.crb_ipv6_recvtclass) {
 185                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 186                 IP_STAT(ipst, conn_in_recvtclass);
 187         }
 188 
 189         if (recv_ancillary.crb_ipv6_recvhopopts &&
 190             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 191                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 192                 IP_STAT(ipst, conn_in_recvhopopts);
 193         }
 194         /*
 195          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 196          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 197          * options that appear before a routing header.
 198          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 199          */
 200         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 201                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 202                     (recv_ancillary.crb_ipv6_recvdstopts &&
 203                     recv_ancillary.crb_ipv6_recvrthdr)) {
 204                         ancil_size += sizeof (struct T_opthdr) +
 205                             ipp->ipp_rthdrdstoptslen;
 206                         IP_STAT(ipst, conn_in_recvrthdrdstopts);
 207                 }
 208         }
 209         if ((recv_ancillary.crb_ipv6_recvrthdr) &&
 210             (ipp->ipp_fields & IPPF_RTHDR)) {
 211                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 212                 IP_STAT(ipst, conn_in_recvrthdr);
 213         }
 214         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 215             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 216             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 217                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 218                 IP_STAT(ipst, conn_in_recvdstopts);
 219         }
 220         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 221                 ancil_size += sizeof (struct T_opthdr) +
 222                     ucredminsize(ira->ira_cred);
 223                 IP_STAT(ipst, conn_in_recvucred);
 224         }
 225 
 226         /*
 227          * If SO_TIMESTAMP is set allocate the appropriate sized
 228          * buffer. Since gethrestime() expects a pointer aligned
 229          * argument, we allocate space necessary for extra
 230          * alignment (even though it might not be used).
 231          */
 232         if (recv_ancillary.crb_timestamp) {
 233                 ancil_size += sizeof (struct T_opthdr) +
 234                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 235                 IP_STAT(ipst, conn_in_timestamp);
 236         }
 237 
 238         /*
 239          * If IP_RECVTOS is set allocate the appropriately sized buffer
 240          */
 241         if (recv_ancillary.crb_recvtos &&
 242             (ira->ira_flags & IRAF_IS_IPV4)) {
 243                 ancil_size += sizeof (struct T_opthdr) +
 244                     P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
 245                 IP_STAT(ipst, conn_in_recvtos);
 246         }
 247 
 248         /*
 249          * If IP_RECVTTL is set allocate the appropriate sized buffer
 250          */
 251         if (recv_ancillary.crb_recvttl &&
 252             (ira->ira_flags & IRAF_IS_IPV4)) {
 253                 ancil_size += sizeof (struct T_opthdr) +
 254                     P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
 255                 IP_STAT(ipst, conn_in_recvttl);
 256         }
 257 
 258         return (ancil_size);
 259 }
 260 
 261 /*
 262  * Lay down the ancillary data items at "ancil_buf".
 263  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
 264  * large buffer - ancil_size.
 265  */
 266 void
 267 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
 268     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
 269 {
 270         /*
 271          * Copy in destination address before options to avoid
 272          * any padding issues.
 273          */
 274         if (recv_ancillary.crb_recvdstaddr &&
 275             (ira->ira_flags & IRAF_IS_IPV4)) {
 276                 struct T_opthdr *toh;
 277                 ipaddr_t *dstptr;
 278 
 279                 toh = (struct T_opthdr *)ancil_buf;
 280                 toh->level = IPPROTO_IP;
 281                 toh->name = IP_RECVDSTADDR;
 282                 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
 283                 toh->status = 0;
 284                 ancil_buf += sizeof (struct T_opthdr);
 285                 dstptr = (ipaddr_t *)ancil_buf;
 286                 *dstptr = ipp->ipp_addr_v4;
 287                 ancil_buf += sizeof (ipaddr_t);
 288                 ancil_size -= toh->len;
 289         }
 290 
 291         /*
 292          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 293          * are different
 294          */
 295         if (recv_ancillary.crb_ip_recvpktinfo &&
 296             connp->conn_family == AF_INET) {
 297                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 298                 struct T_opthdr *toh;
 299                 struct in_pktinfo *pktinfop;
 300                 ill_t *ill;
 301                 ipif_t *ipif;
 302 
 303                 toh = (struct T_opthdr *)ancil_buf;
 304                 toh->level = IPPROTO_IP;
 305                 toh->name = IP_PKTINFO;
 306                 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
 307                 toh->status = 0;
 308                 ancil_buf += sizeof (struct T_opthdr);
 309                 pktinfop = (struct in_pktinfo *)ancil_buf;
 310 
 311                 pktinfop->ipi_ifindex = ira->ira_ruifindex;
 312                 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
 313 
 314                 /* Find a good address to report */
 315                 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
 316                 if (ill != NULL) {
 317                         ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
 318                         if (ipif != NULL) {
 319                                 pktinfop->ipi_spec_dst.s_addr =
 320                                     ipif->ipif_lcl_addr;
 321                                 ipif_refrele(ipif);
 322                         }
 323                         ill_refrele(ill);
 324                 }
 325                 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
 326                 ancil_buf += sizeof (struct in_pktinfo);
 327                 ancil_size -= toh->len;
 328         }
 329 
 330         if ((recv_ancillary.crb_recvopts) &&
 331             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 332                 struct T_opthdr *toh;
 333 
 334                 toh = (struct T_opthdr *)ancil_buf;
 335                 toh->level = IPPROTO_IP;
 336                 toh->name = IP_RECVOPTS;
 337                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
 338                 toh->status = 0;
 339                 ancil_buf += sizeof (struct T_opthdr);
 340                 bcopy(ipp->ipp_ipv4_options, ancil_buf,
 341                     ipp->ipp_ipv4_options_len);
 342                 ancil_buf += ipp->ipp_ipv4_options_len;
 343                 ancil_size -= toh->len;
 344         }
 345 
 346         if (recv_ancillary.crb_recvslla) {
 347                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 348                 struct T_opthdr *toh;
 349                 struct sockaddr_dl *dstptr;
 350                 ill_t *ill;
 351                 int alen = 0;
 352 
 353                 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
 354                 if (ill != NULL)
 355                         alen = ill->ill_phys_addr_length;
 356 
 357                 /*
 358                  * For loopback multicast and broadcast the packet arrives
 359                  * with ira_ruifdex being the physical interface, but
 360                  * ira_l2src is all zero since ip_postfrag_loopback doesn't
 361                  * know our l2src. We don't report the address in that case.
 362                  */
 363                 if (ira->ira_flags & IRAF_LOOPBACK)
 364                         alen = 0;
 365 
 366                 toh = (struct T_opthdr *)ancil_buf;
 367                 toh->level = IPPROTO_IP;
 368                 toh->name = IP_RECVSLLA;
 369                 toh->len = sizeof (struct T_opthdr) +
 370                     sizeof (struct sockaddr_dl);
 371                 toh->status = 0;
 372                 ancil_buf += sizeof (struct T_opthdr);
 373                 dstptr = (struct sockaddr_dl *)ancil_buf;
 374                 dstptr->sdl_family = AF_LINK;
 375                 dstptr->sdl_index = ira->ira_ruifindex;
 376                 if (ill != NULL)
 377                         dstptr->sdl_type = ill->ill_type;
 378                 else
 379                         dstptr->sdl_type = 0;
 380                 dstptr->sdl_nlen = 0;
 381                 dstptr->sdl_alen = alen;
 382                 dstptr->sdl_slen = 0;
 383                 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
 384                 ancil_buf += sizeof (struct sockaddr_dl);
 385                 ancil_size -= toh->len;
 386                 if (ill != NULL)
 387                         ill_refrele(ill);
 388         }
 389 
 390         if (recv_ancillary.crb_recvif) {
 391                 struct T_opthdr *toh;
 392                 uint_t          *dstptr;
 393 
 394                 toh = (struct T_opthdr *)ancil_buf;
 395                 toh->level = IPPROTO_IP;
 396                 toh->name = IP_RECVIF;
 397                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 398                 toh->status = 0;
 399                 ancil_buf += sizeof (struct T_opthdr);
 400                 dstptr = (uint_t *)ancil_buf;
 401                 *dstptr = ira->ira_ruifindex;
 402                 ancil_buf += sizeof (uint_t);
 403                 ancil_size -= toh->len;
 404         }
 405 
 406         /*
 407          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 408          * are different
 409          */
 410         if (recv_ancillary.crb_ip_recvpktinfo &&
 411             connp->conn_family == AF_INET6) {
 412                 struct T_opthdr *toh;
 413                 struct in6_pktinfo *pkti;
 414 
 415                 toh = (struct T_opthdr *)ancil_buf;
 416                 toh->level = IPPROTO_IPV6;
 417                 toh->name = IPV6_PKTINFO;
 418                 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
 419                 toh->status = 0;
 420                 ancil_buf += sizeof (struct T_opthdr);
 421                 pkti = (struct in6_pktinfo *)ancil_buf;
 422                 if (ira->ira_flags & IRAF_IS_IPV4) {
 423                         IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
 424                             &pkti->ipi6_addr);
 425                 } else {
 426                         pkti->ipi6_addr = ipp->ipp_addr;
 427                 }
 428                 pkti->ipi6_ifindex = ira->ira_ruifindex;
 429 
 430                 ancil_buf += sizeof (*pkti);
 431                 ancil_size -= toh->len;
 432         }
 433         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 434                 struct T_opthdr *toh;
 435 
 436                 toh = (struct T_opthdr *)ancil_buf;
 437                 toh->level = IPPROTO_IPV6;
 438                 toh->name = IPV6_HOPLIMIT;
 439                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 440                 toh->status = 0;
 441                 ancil_buf += sizeof (struct T_opthdr);
 442                 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
 443                 ancil_buf += sizeof (uint_t);
 444                 ancil_size -= toh->len;
 445         }
 446         if (recv_ancillary.crb_ipv6_recvtclass) {
 447                 struct T_opthdr *toh;
 448 
 449                 toh = (struct T_opthdr *)ancil_buf;
 450                 toh->level = IPPROTO_IPV6;
 451                 toh->name = IPV6_TCLASS;
 452                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 453                 toh->status = 0;
 454                 ancil_buf += sizeof (struct T_opthdr);
 455 
 456                 if (ira->ira_flags & IRAF_IS_IPV4)
 457                         *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
 458                 else
 459                         *(uint_t *)ancil_buf = ipp->ipp_tclass;
 460                 ancil_buf += sizeof (uint_t);
 461                 ancil_size -= toh->len;
 462         }
 463         if (recv_ancillary.crb_ipv6_recvhopopts &&
 464             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 465                 struct T_opthdr *toh;
 466 
 467                 toh = (struct T_opthdr *)ancil_buf;
 468                 toh->level = IPPROTO_IPV6;
 469                 toh->name = IPV6_HOPOPTS;
 470                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 471                 toh->status = 0;
 472                 ancil_buf += sizeof (struct T_opthdr);
 473                 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
 474                 ancil_buf += ipp->ipp_hopoptslen;
 475                 ancil_size -= toh->len;
 476         }
 477         /*
 478          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 479          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 480          * options that appear before a routing header.
 481          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 482          */
 483         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 484                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 485                     (recv_ancillary.crb_ipv6_recvdstopts &&
 486                     recv_ancillary.crb_ipv6_recvrthdr)) {
 487                         struct T_opthdr *toh;
 488 
 489                         toh = (struct T_opthdr *)ancil_buf;
 490                         toh->level = IPPROTO_IPV6;
 491                         toh->name = IPV6_DSTOPTS;
 492                         toh->len = sizeof (struct T_opthdr) +
 493                             ipp->ipp_rthdrdstoptslen;
 494                         toh->status = 0;
 495                         ancil_buf += sizeof (struct T_opthdr);
 496                         bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
 497                             ipp->ipp_rthdrdstoptslen);
 498                         ancil_buf += ipp->ipp_rthdrdstoptslen;
 499                         ancil_size -= toh->len;
 500                 }
 501         }
 502         if (recv_ancillary.crb_ipv6_recvrthdr &&
 503             (ipp->ipp_fields & IPPF_RTHDR)) {
 504                 struct T_opthdr *toh;
 505 
 506                 toh = (struct T_opthdr *)ancil_buf;
 507                 toh->level = IPPROTO_IPV6;
 508                 toh->name = IPV6_RTHDR;
 509                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 510                 toh->status = 0;
 511                 ancil_buf += sizeof (struct T_opthdr);
 512                 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
 513                 ancil_buf += ipp->ipp_rthdrlen;
 514                 ancil_size -= toh->len;
 515         }
 516         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 517             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 518             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 519                 struct T_opthdr *toh;
 520 
 521                 toh = (struct T_opthdr *)ancil_buf;
 522                 toh->level = IPPROTO_IPV6;
 523                 toh->name = IPV6_DSTOPTS;
 524                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 525                 toh->status = 0;
 526                 ancil_buf += sizeof (struct T_opthdr);
 527                 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
 528                 ancil_buf += ipp->ipp_dstoptslen;
 529                 ancil_size -= toh->len;
 530         }
 531 
 532         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 533                 struct T_opthdr *toh;
 534                 cred_t          *rcr = connp->conn_cred;
 535 
 536                 toh = (struct T_opthdr *)ancil_buf;
 537                 toh->level = SOL_SOCKET;
 538                 toh->name = SCM_UCRED;
 539                 toh->len = sizeof (struct T_opthdr) +
 540                     ucredminsize(ira->ira_cred);
 541                 toh->status = 0;
 542                 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
 543                 ancil_buf += toh->len;
 544                 ancil_size -= toh->len;
 545         }
 546         if (recv_ancillary.crb_timestamp) {
 547                 struct  T_opthdr *toh;
 548 
 549                 toh = (struct T_opthdr *)ancil_buf;
 550                 toh->level = SOL_SOCKET;
 551                 toh->name = SCM_TIMESTAMP;
 552                 toh->len = sizeof (struct T_opthdr) +
 553                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 554                 toh->status = 0;
 555                 ancil_buf += sizeof (struct T_opthdr);
 556                 /* Align for gethrestime() */
 557                 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
 558                     sizeof (intptr_t));
 559                 gethrestime((timestruc_t *)ancil_buf);
 560                 ancil_buf = (uchar_t *)toh + toh->len;
 561                 ancil_size -= toh->len;
 562         }
 563 
 564         if (recv_ancillary.crb_recvtos &&
 565             (ira->ira_flags & IRAF_IS_IPV4)) {
 566                 struct  T_opthdr *toh;
 567                 uint8_t *dstptr;
 568 
 569                 toh = (struct T_opthdr *)ancil_buf;
 570                 toh->level = IPPROTO_IP;
 571                 toh->name = IP_RECVTOS;
 572                 toh->len = sizeof (struct T_opthdr) +
 573                     P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
 574                 toh->status = 0;
 575                 ancil_buf += sizeof (struct T_opthdr);
 576                 dstptr = (uint8_t *)ancil_buf;
 577                 *dstptr = ipp->ipp_type_of_service;
 578                 ancil_buf = (uchar_t *)toh + toh->len;
 579                 ancil_size -= toh->len;
 580                 ASSERT(__TPI_TOPT_ISALIGNED(toh));
 581         }
 582 
 583         if (recv_ancillary.crb_recvttl &&
 584             (ira->ira_flags & IRAF_IS_IPV4)) {
 585                 struct  T_opthdr *toh;
 586                 uint8_t *dstptr;
 587 
 588                 toh = (struct T_opthdr *)ancil_buf;
 589                 toh->level = IPPROTO_IP;
 590                 toh->name = IP_RECVTTL;
 591                 toh->len = sizeof (struct T_opthdr) +
 592                     P2ROUNDUP(sizeof (uint8_t), __TPI_ALIGN_SIZE);
 593                 toh->status = 0;
 594                 ancil_buf += sizeof (struct T_opthdr);
 595                 dstptr = (uint8_t *)ancil_buf;
 596                 *dstptr = ipp->ipp_hoplimit;
 597                 ancil_buf = (uchar_t *)toh + toh->len;
 598                 ancil_size -= toh->len;
 599                 ASSERT(__TPI_TOPT_ISALIGNED(toh));
 600         }
 601 
 602         /* Consumed all of allocated space */
 603         ASSERT(ancil_size == 0);
 604 
 605 }
 606 
 607 /*
 608  * This routine retrieves the current status of socket options.
 609  * It returns the size of the option retrieved, or -1.
 610  */
 611 int
 612 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
 613     uchar_t *ptr)
 614 {
 615         int             *i1 = (int *)ptr;
 616         conn_t          *connp = coa->coa_connp;
 617         ip_xmit_attr_t  *ixa = coa->coa_ixa;
 618         ip_pkt_t        *ipp = coa->coa_ipp;
 619         ip_stack_t      *ipst = ixa->ixa_ipst;
 620         uint_t          len;
 621 
 622         ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
 623 
 624         switch (level) {
 625         case SOL_SOCKET:
 626                 switch (name) {
 627                 case SO_DEBUG:
 628                         *i1 = connp->conn_debug ? SO_DEBUG : 0;
 629                         break;  /* goto sizeof (int) option return */
 630                 case SO_KEEPALIVE:
 631                         *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
 632                         break;
 633                 case SO_LINGER: {
 634                         struct linger *lgr = (struct linger *)ptr;
 635 
 636                         lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
 637                         lgr->l_linger = connp->conn_lingertime;
 638                         }
 639                         return (sizeof (struct linger));
 640 
 641                 case SO_OOBINLINE:
 642                         *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
 643                         break;
 644                 case SO_REUSEADDR:
 645                         *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
 646                         break;  /* goto sizeof (int) option return */
 647                 case SO_TYPE:
 648                         *i1 = connp->conn_so_type;
 649                         break;  /* goto sizeof (int) option return */
 650                 case SO_DONTROUTE:
 651                         *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
 652                             SO_DONTROUTE : 0;
 653                         break;  /* goto sizeof (int) option return */
 654                 case SO_USELOOPBACK:
 655                         *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
 656                         break;  /* goto sizeof (int) option return */
 657                 case SO_BROADCAST:
 658                         *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
 659                         break;  /* goto sizeof (int) option return */
 660 
 661                 case SO_SNDBUF:
 662                         *i1 = connp->conn_sndbuf;
 663                         break;  /* goto sizeof (int) option return */
 664                 case SO_RCVBUF:
 665                         *i1 = connp->conn_rcvbuf;
 666                         break;  /* goto sizeof (int) option return */
 667                 case SO_RCVTIMEO:
 668                 case SO_SNDTIMEO:
 669                         /*
 670                          * Pass these two options in order for third part
 671                          * protocol usage. Here just return directly.
 672                          */
 673                         *i1 = 0;
 674                         break;
 675                 case SO_DGRAM_ERRIND:
 676                         *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
 677                         break;  /* goto sizeof (int) option return */
 678                 case SO_RECVUCRED:
 679                         *i1 = connp->conn_recv_ancillary.crb_recvucred;
 680                         break;  /* goto sizeof (int) option return */
 681                 case SO_TIMESTAMP:
 682                         *i1 = connp->conn_recv_ancillary.crb_timestamp;
 683                         break;  /* goto sizeof (int) option return */
 684                 case SO_VRRP:
 685                         *i1 = connp->conn_isvrrp;
 686                         break;  /* goto sizeof (int) option return */
 687                 case SO_ANON_MLP:
 688                         *i1 = connp->conn_anon_mlp;
 689                         break;  /* goto sizeof (int) option return */
 690                 case SO_MAC_EXEMPT:
 691                         *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
 692                         break;  /* goto sizeof (int) option return */
 693                 case SO_MAC_IMPLICIT:
 694                         *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
 695                         break;  /* goto sizeof (int) option return */
 696                 case SO_ALLZONES:
 697                         *i1 = connp->conn_allzones;
 698                         break;  /* goto sizeof (int) option return */
 699                 case SO_EXCLBIND:
 700                         *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
 701                         break;
 702                 case SO_PROTOTYPE:
 703                         *i1 = connp->conn_proto;
 704                         break;
 705 
 706                 case SO_DOMAIN:
 707                         *i1 = connp->conn_family;
 708                         break;
 709                 default:
 710                         return (-1);
 711                 }
 712                 break;
 713         case IPPROTO_IP:
 714                 if (connp->conn_family != AF_INET)
 715                         return (-1);
 716                 switch (name) {
 717                 case IP_OPTIONS:
 718                 case T_IP_OPTIONS:
 719                         if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
 720                                 return (0);
 721 
 722                         len = ipp->ipp_ipv4_options_len;
 723                         if (len > 0) {
 724                                 bcopy(ipp->ipp_ipv4_options, ptr, len);
 725                         }
 726                         return (len);
 727 
 728                 case IP_PKTINFO: {
 729                         /*
 730                          * This also handles IP_RECVPKTINFO.
 731                          * IP_PKTINFO and IP_RECVPKTINFO have same value.
 732                          * Differentiation is based on the size of the
 733                          * argument passed in.
 734                          */
 735                         struct in_pktinfo *pktinfo;
 736 
 737 #ifdef notdef
 738                         /* optcom doesn't provide a length with "get" */
 739                         if (inlen == sizeof (int)) {
 740                                 /* This is IP_RECVPKTINFO option. */
 741                                 *i1 = connp->conn_recv_ancillary.
 742                                     crb_ip_recvpktinfo;
 743                                 return (sizeof (int));
 744                         }
 745 #endif
 746                         /* XXX assumes that caller has room for max size! */
 747 
 748                         pktinfo = (struct in_pktinfo *)ptr;
 749                         pktinfo->ipi_ifindex = ixa->ixa_ifindex;
 750                         if (ipp->ipp_fields & IPPF_ADDR)
 751                                 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
 752                         else
 753                                 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
 754                         return (sizeof (struct in_pktinfo));
 755                 }
 756                 case IP_DONTFRAG:
 757                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 758                         return (sizeof (int));
 759                 case IP_TOS:
 760                 case T_IP_TOS:
 761                         *i1 = (int)ipp->ipp_type_of_service;
 762                         break;  /* goto sizeof (int) option return */
 763                 case IP_TTL:
 764                         *i1 = (int)ipp->ipp_unicast_hops;
 765                         break;  /* goto sizeof (int) option return */
 766                 case IP_DHCPINIT_IF:
 767                         return (-1);
 768                 case IP_NEXTHOP:
 769                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
 770                                 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
 771                                 return (sizeof (ipaddr_t));
 772                         } else {
 773                                 return (0);
 774                         }
 775 
 776                 case IP_MULTICAST_IF:
 777                         /* 0 address if not set */
 778                         *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
 779                         return (sizeof (ipaddr_t));
 780                 case IP_MULTICAST_TTL:
 781                         *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
 782                         return (sizeof (uchar_t));
 783                 case IP_MULTICAST_LOOP:
 784                         *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 785                         return (sizeof (uint8_t));
 786                 case IP_RECVOPTS:
 787                         *i1 = connp->conn_recv_ancillary.crb_recvopts;
 788                         break;  /* goto sizeof (int) option return */
 789                 case IP_RECVDSTADDR:
 790                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 791                         break;  /* goto sizeof (int) option return */
 792                 case IP_RECVIF:
 793                         *i1 = connp->conn_recv_ancillary.crb_recvif;
 794                         break;  /* goto sizeof (int) option return */
 795                 case IP_RECVSLLA:
 796                         *i1 = connp->conn_recv_ancillary.crb_recvslla;
 797                         break;  /* goto sizeof (int) option return */
 798                 case IP_RECVTTL:
 799                         *i1 = connp->conn_recv_ancillary.crb_recvttl;
 800                         break;  /* goto sizeof (int) option return */
 801                 case IP_RECVTOS:
 802                         *i1 = connp->conn_recv_ancillary.crb_recvtos;
 803                         break;  /* goto sizeof (int) option return */
 804                 case IP_ADD_MEMBERSHIP:
 805                 case IP_DROP_MEMBERSHIP:
 806                 case MCAST_JOIN_GROUP:
 807                 case MCAST_LEAVE_GROUP:
 808                 case IP_BLOCK_SOURCE:
 809                 case IP_UNBLOCK_SOURCE:
 810                 case IP_ADD_SOURCE_MEMBERSHIP:
 811                 case IP_DROP_SOURCE_MEMBERSHIP:
 812                 case MCAST_BLOCK_SOURCE:
 813                 case MCAST_UNBLOCK_SOURCE:
 814                 case MCAST_JOIN_SOURCE_GROUP:
 815                 case MCAST_LEAVE_SOURCE_GROUP:
 816                 case MRT_INIT:
 817                 case MRT_DONE:
 818                 case MRT_ADD_VIF:
 819                 case MRT_DEL_VIF:
 820                 case MRT_ADD_MFC:
 821                 case MRT_DEL_MFC:
 822                         /* cannot "get" the value for these */
 823                         return (-1);
 824                 case MRT_VERSION:
 825                 case MRT_ASSERT:
 826                         (void) ip_mrouter_get(name, connp, ptr);
 827                         return (sizeof (int));
 828                 case IP_SEC_OPT:
 829                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 830                             IPSEC_AF_V4));
 831                 case IP_BOUND_IF:
 832                         /* Zero if not set */
 833                         *i1 = connp->conn_bound_if;
 834                         break;  /* goto sizeof (int) option return */
 835                 case IP_UNSPEC_SRC:
 836                         *i1 = connp->conn_unspec_src;
 837                         break;  /* goto sizeof (int) option return */
 838                 case IP_BROADCAST_TTL:
 839                         if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
 840                                 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
 841                         else
 842                                 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
 843                         return (sizeof (uchar_t));
 844                 default:
 845                         return (-1);
 846                 }
 847                 break;
 848         case IPPROTO_IPV6:
 849                 if (connp->conn_family != AF_INET6)
 850                         return (-1);
 851                 switch (name) {
 852                 case IPV6_UNICAST_HOPS:
 853                         *i1 = (int)ipp->ipp_unicast_hops;
 854                         break;  /* goto sizeof (int) option return */
 855                 case IPV6_MULTICAST_IF:
 856                         /* 0 index if not set */
 857                         *i1 = ixa->ixa_multicast_ifindex;
 858                         break;  /* goto sizeof (int) option return */
 859                 case IPV6_MULTICAST_HOPS:
 860                         *i1 = ixa->ixa_multicast_ttl;
 861                         break;  /* goto sizeof (int) option return */
 862                 case IPV6_MULTICAST_LOOP:
 863                         *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 864                         break;  /* goto sizeof (int) option return */
 865                 case IPV6_JOIN_GROUP:
 866                 case IPV6_LEAVE_GROUP:
 867                 case MCAST_JOIN_GROUP:
 868                 case MCAST_LEAVE_GROUP:
 869                 case MCAST_BLOCK_SOURCE:
 870                 case MCAST_UNBLOCK_SOURCE:
 871                 case MCAST_JOIN_SOURCE_GROUP:
 872                 case MCAST_LEAVE_SOURCE_GROUP:
 873                         /* cannot "get" the value for these */
 874                         return (-1);
 875                 case IPV6_BOUND_IF:
 876                         /* Zero if not set */
 877                         *i1 = connp->conn_bound_if;
 878                         break;  /* goto sizeof (int) option return */
 879                 case IPV6_UNSPEC_SRC:
 880                         *i1 = connp->conn_unspec_src;
 881                         break;  /* goto sizeof (int) option return */
 882                 case IPV6_RECVPKTINFO:
 883                         *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
 884                         break;  /* goto sizeof (int) option return */
 885                 case IPV6_RECVTCLASS:
 886                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
 887                         break;  /* goto sizeof (int) option return */
 888                 case IPV6_RECVPATHMTU:
 889                         *i1 = connp->conn_ipv6_recvpathmtu;
 890                         break;  /* goto sizeof (int) option return */
 891                 case IPV6_RECVHOPLIMIT:
 892                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
 893                         break;  /* goto sizeof (int) option return */
 894                 case IPV6_RECVHOPOPTS:
 895                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
 896                         break;  /* goto sizeof (int) option return */
 897                 case IPV6_RECVDSTOPTS:
 898                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
 899                         break;  /* goto sizeof (int) option return */
 900                 case _OLD_IPV6_RECVDSTOPTS:
 901                         *i1 =
 902                             connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
 903                         break;  /* goto sizeof (int) option return */
 904                 case IPV6_RECVRTHDRDSTOPTS:
 905                         *i1 = connp->conn_recv_ancillary.
 906                             crb_ipv6_recvrthdrdstopts;
 907                         break;  /* goto sizeof (int) option return */
 908                 case IPV6_RECVRTHDR:
 909                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
 910                         break;  /* goto sizeof (int) option return */
 911                 case IPV6_PKTINFO: {
 912                         /* XXX assumes that caller has room for max size! */
 913                         struct in6_pktinfo *pkti;
 914 
 915                         pkti = (struct in6_pktinfo *)ptr;
 916                         pkti->ipi6_ifindex = ixa->ixa_ifindex;
 917                         if (ipp->ipp_fields & IPPF_ADDR)
 918                                 pkti->ipi6_addr = ipp->ipp_addr;
 919                         else
 920                                 pkti->ipi6_addr = ipv6_all_zeros;
 921                         return (sizeof (struct in6_pktinfo));
 922                 }
 923                 case IPV6_TCLASS:
 924                         *i1 = ipp->ipp_tclass;
 925                         break;  /* goto sizeof (int) option return */
 926                 case IPV6_NEXTHOP: {
 927                         sin6_t *sin6 = (sin6_t *)ptr;
 928 
 929                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
 930                                 return (0);
 931 
 932                         *sin6 = sin6_null;
 933                         sin6->sin6_family = AF_INET6;
 934                         sin6->sin6_addr = ixa->ixa_nexthop_v6;
 935 
 936                         return (sizeof (sin6_t));
 937                 }
 938                 case IPV6_HOPOPTS:
 939                         if (!(ipp->ipp_fields & IPPF_HOPOPTS))
 940                                 return (0);
 941                         bcopy(ipp->ipp_hopopts, ptr,
 942                             ipp->ipp_hopoptslen);
 943                         return (ipp->ipp_hopoptslen);
 944                 case IPV6_RTHDRDSTOPTS:
 945                         if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
 946                                 return (0);
 947                         bcopy(ipp->ipp_rthdrdstopts, ptr,
 948                             ipp->ipp_rthdrdstoptslen);
 949                         return (ipp->ipp_rthdrdstoptslen);
 950                 case IPV6_RTHDR:
 951                         if (!(ipp->ipp_fields & IPPF_RTHDR))
 952                                 return (0);
 953                         bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
 954                         return (ipp->ipp_rthdrlen);
 955                 case IPV6_DSTOPTS:
 956                         if (!(ipp->ipp_fields & IPPF_DSTOPTS))
 957                                 return (0);
 958                         bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
 959                         return (ipp->ipp_dstoptslen);
 960                 case IPV6_PATHMTU:
 961                         return (ip_fill_mtuinfo(connp, ixa,
 962                             (struct ip6_mtuinfo *)ptr));
 963                 case IPV6_SEC_OPT:
 964                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 965                             IPSEC_AF_V6));
 966                 case IPV6_SRC_PREFERENCES:
 967                         return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
 968                 case IPV6_DONTFRAG:
 969                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 970                         return (sizeof (int));
 971                 case IPV6_USE_MIN_MTU:
 972                         if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
 973                                 *i1 = ixa->ixa_use_min_mtu;
 974                         else
 975                                 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
 976                         break;
 977                 case IPV6_V6ONLY:
 978                         *i1 = connp->conn_ipv6_v6only;
 979                         return (sizeof (int));
 980                 default:
 981                         return (-1);
 982                 }
 983                 break;
 984         case IPPROTO_UDP:
 985                 switch (name) {
 986                 case UDP_ANONPRIVBIND:
 987                         *i1 = connp->conn_anon_priv_bind;
 988                         break;
 989                 case UDP_EXCLBIND:
 990                         *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
 991                         break;
 992                 default:
 993                         return (-1);
 994                 }
 995                 break;
 996         case IPPROTO_TCP:
 997                 switch (name) {
 998                 case TCP_RECVDSTADDR:
 999                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
1000                         break;
1001                 case TCP_ANONPRIVBIND:
1002                         *i1 = connp->conn_anon_priv_bind;
1003                         break;
1004                 case TCP_EXCLBIND:
1005                         *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
1006                         break;
1007                 default:
1008                         return (-1);
1009                 }
1010                 break;
1011         default:
1012                 return (-1);
1013         }
1014         return (sizeof (int));
1015 }
1016 
1017 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
1018     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1019 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
1020     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1021 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
1022     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1023 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
1024     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1025 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
1026     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1027 
1028 /*
1029  * This routine sets the most common socket options including some
1030  * that are transport/ULP specific.
1031  * It returns errno or zero.
1032  *
1033  * For fixed length options, there is no sanity check
1034  * of passed in length is done. It is assumed *_optcom_req()
1035  * routines do the right thing.
1036  */
1037 int
1038 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1039     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1040 {
1041         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1042 
1043         /* We have different functions for different levels */
1044         switch (level) {
1045         case SOL_SOCKET:
1046                 return (conn_opt_set_socket(coa, name, inlen, invalp,
1047                     checkonly, cr));
1048         case IPPROTO_IP:
1049                 return (conn_opt_set_ip(coa, name, inlen, invalp,
1050                     checkonly, cr));
1051         case IPPROTO_IPV6:
1052                 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1053                     checkonly, cr));
1054         case IPPROTO_UDP:
1055                 return (conn_opt_set_udp(coa, name, inlen, invalp,
1056                     checkonly, cr));
1057         case IPPROTO_TCP:
1058                 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1059                     checkonly, cr));
1060         default:
1061                 return (0);
1062         }
1063 }
1064 
1065 /*
1066  * Handle SOL_SOCKET
1067  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1068  * it implement their own checks and setting of conn_proto.
1069  */
1070 /* ARGSUSED1 */
1071 static int
1072 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1073     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1074 {
1075         conn_t          *connp = coa->coa_connp;
1076         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1077         int             *i1 = (int *)invalp;
1078         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1079 
1080         switch (name) {
1081         case SO_ALLZONES:
1082                 if (IPCL_IS_BOUND(connp))
1083                         return (EINVAL);
1084                 break;
1085         case SO_VRRP:
1086                 if (secpolicy_ip_config(cr, checkonly) != 0)
1087                         return (EACCES);
1088                 break;
1089         case SO_MAC_EXEMPT:
1090                 if (secpolicy_net_mac_aware(cr) != 0)
1091                         return (EACCES);
1092                 if (IPCL_IS_BOUND(connp))
1093                         return (EINVAL);
1094                 break;
1095         case SO_MAC_IMPLICIT:
1096                 if (secpolicy_net_mac_implicit(cr) != 0)
1097                         return (EACCES);
1098                 break;
1099         }
1100         if (checkonly)
1101                 return (0);
1102 
1103         mutex_enter(&connp->conn_lock);
1104         /* Here we set the actual option value */
1105         switch (name) {
1106         case SO_DEBUG:
1107                 connp->conn_debug = onoff;
1108                 break;
1109         case SO_KEEPALIVE:
1110                 connp->conn_keepalive = onoff;
1111                 break;
1112         case SO_LINGER: {
1113                 struct linger *lgr = (struct linger *)invalp;
1114 
1115                 if (lgr->l_onoff) {
1116                         connp->conn_linger = 1;
1117                         connp->conn_lingertime = lgr->l_linger;
1118                 } else {
1119                         connp->conn_linger = 0;
1120                         connp->conn_lingertime = 0;
1121                 }
1122                 break;
1123         }
1124         case SO_OOBINLINE:
1125                 connp->conn_oobinline = onoff;
1126                 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1127                 break;
1128         case SO_REUSEADDR:
1129                 connp->conn_reuseaddr = onoff;
1130                 break;
1131         case SO_DONTROUTE:
1132                 if (onoff)
1133                         ixa->ixa_flags |= IXAF_DONTROUTE;
1134                 else
1135                         ixa->ixa_flags &= ~IXAF_DONTROUTE;
1136                 coa->coa_changed |= COA_ROUTE_CHANGED;
1137                 break;
1138         case SO_USELOOPBACK:
1139                 connp->conn_useloopback = onoff;
1140                 break;
1141         case SO_BROADCAST:
1142                 connp->conn_broadcast = onoff;
1143                 break;
1144         case SO_SNDBUF:
1145                 /* ULP has range checked the value */
1146                 connp->conn_sndbuf = *i1;
1147                 coa->coa_changed |= COA_SNDBUF_CHANGED;
1148                 break;
1149         case SO_RCVBUF:
1150                 /* ULP has range checked the value */
1151                 connp->conn_rcvbuf = *i1;
1152                 coa->coa_changed |= COA_RCVBUF_CHANGED;
1153                 break;
1154         case SO_RCVTIMEO:
1155         case SO_SNDTIMEO:
1156                 /*
1157                  * Pass these two options in order for third part
1158                  * protocol usage.
1159                  */
1160                 break;
1161         case SO_DGRAM_ERRIND:
1162                 connp->conn_dgram_errind = onoff;
1163                 break;
1164         case SO_RECVUCRED:
1165                 connp->conn_recv_ancillary.crb_recvucred = onoff;
1166                 break;
1167         case SO_ALLZONES:
1168                 connp->conn_allzones = onoff;
1169                 coa->coa_changed |= COA_ROUTE_CHANGED;
1170                 if (onoff)
1171                         ixa->ixa_zoneid = ALL_ZONES;
1172                 else
1173                         ixa->ixa_zoneid = connp->conn_zoneid;
1174                 break;
1175         case SO_TIMESTAMP:
1176                 connp->conn_recv_ancillary.crb_timestamp = onoff;
1177                 break;
1178         case SO_VRRP:
1179                 connp->conn_isvrrp = onoff;
1180                 break;
1181         case SO_ANON_MLP:
1182                 connp->conn_anon_mlp = onoff;
1183                 break;
1184         case SO_MAC_EXEMPT:
1185                 connp->conn_mac_mode = onoff ?
1186                     CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1187                 break;
1188         case SO_MAC_IMPLICIT:
1189                 connp->conn_mac_mode = onoff ?
1190                     CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1191                 break;
1192         case SO_EXCLBIND:
1193                 connp->conn_exclbind = onoff;
1194                 break;
1195         }
1196         mutex_exit(&connp->conn_lock);
1197         return (0);
1198 }
1199 
1200 /* Handle IPPROTO_IP */
1201 static int
1202 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1203     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1204 {
1205         conn_t          *connp = coa->coa_connp;
1206         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1207         ip_pkt_t        *ipp = coa->coa_ipp;
1208         int             *i1 = (int *)invalp;
1209         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1210         ipaddr_t        addr = (ipaddr_t)*i1;
1211         uint_t          ifindex;
1212         zoneid_t        zoneid = IPCL_ZONEID(connp);
1213         ipif_t          *ipif;
1214         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1215         int             error;
1216 
1217         if (connp->conn_family != AF_INET)
1218                 return (EINVAL);
1219 
1220         ifindex = UINT_MAX;
1221         switch (name) {
1222         case IP_TTL:
1223                 /* Don't allow zero */
1224                 if (*i1 < 1 || *i1 > 255)
1225                         return (EINVAL);
1226                 break;
1227         case IP_MULTICAST_IF:
1228                 if (addr == INADDR_ANY) {
1229                         /* Clear */
1230                         ifindex = 0;
1231                         break;
1232                 }
1233                 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1234                 if (ipif == NULL)
1235                         return (EHOSTUNREACH);
1236                 /* not supported by the virtual network iface */
1237                 if (IS_VNI(ipif->ipif_ill)) {
1238                         ipif_refrele(ipif);
1239                         return (EINVAL);
1240                 }
1241                 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1242                 ipif_refrele(ipif);
1243                 break;
1244         case IP_NEXTHOP: {
1245                 ire_t   *ire;
1246 
1247                 if (addr == INADDR_ANY) {
1248                         /* Clear */
1249                         break;
1250                 }
1251                 /* Verify that the next-hop is on-link */
1252                 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1253                     NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1254                 if (ire == NULL)
1255                         return (EHOSTUNREACH);
1256                 ire_refrele(ire);
1257                 break;
1258         }
1259         case IP_OPTIONS:
1260         case T_IP_OPTIONS: {
1261                 uint_t newlen;
1262 
1263                 if (ipp->ipp_fields & IPPF_LABEL_V4)
1264                         newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1265                 else
1266                         newlen = inlen;
1267                 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1268                         return (EINVAL);
1269                 }
1270                 break;
1271         }
1272         case IP_PKTINFO: {
1273                 struct in_pktinfo *pktinfo;
1274 
1275                 /* Two different valid lengths */
1276                 if (inlen != sizeof (int) &&
1277                     inlen != sizeof (struct in_pktinfo))
1278                         return (EINVAL);
1279                 if (inlen == sizeof (int))
1280                         break;
1281 
1282                 pktinfo = (struct in_pktinfo *)invalp;
1283                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1284                         switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1285                             zoneid, ipst, B_FALSE)) {
1286                         case IPVL_UNICAST_UP:
1287                         case IPVL_UNICAST_DOWN:
1288                                 break;
1289                         default:
1290                                 return (EADDRNOTAVAIL);
1291                         }
1292                 }
1293                 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1294                     B_FALSE, ipst))
1295                         return (ENXIO);
1296                 break;
1297         }
1298         case IP_BOUND_IF:
1299                 ifindex = *(uint_t *)i1;
1300 
1301                 /* Just check it is ok. */
1302                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1303                         return (ENXIO);
1304                 break;
1305         }
1306         if (checkonly)
1307                 return (0);
1308 
1309         /* Here we set the actual option value */
1310         /*
1311          * conn_lock protects the bitfields, and is used to
1312          * set the fields atomically. Not needed for ixa settings since
1313          * the caller has an exclusive copy of the ixa.
1314          * We can not hold conn_lock across the multicast options though.
1315          */
1316         switch (name) {
1317         case IP_OPTIONS:
1318         case T_IP_OPTIONS:
1319                 /* Save options for use by IP. */
1320                 mutex_enter(&connp->conn_lock);
1321                 error = optcom_pkt_set(invalp, inlen,
1322                     (uchar_t **)&ipp->ipp_ipv4_options,
1323                     &ipp->ipp_ipv4_options_len);
1324                 if (error != 0) {
1325                         mutex_exit(&connp->conn_lock);
1326                         return (error);
1327                 }
1328                 if (ipp->ipp_ipv4_options_len == 0) {
1329                         ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1330                 } else {
1331                         ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1332                 }
1333                 mutex_exit(&connp->conn_lock);
1334                 coa->coa_changed |= COA_HEADER_CHANGED;
1335                 coa->coa_changed |= COA_WROFF_CHANGED;
1336                 break;
1337 
1338         case IP_TTL:
1339                 mutex_enter(&connp->conn_lock);
1340                 ipp->ipp_unicast_hops = *i1;
1341                 mutex_exit(&connp->conn_lock);
1342                 coa->coa_changed |= COA_HEADER_CHANGED;
1343                 break;
1344         case IP_TOS:
1345         case T_IP_TOS:
1346                 mutex_enter(&connp->conn_lock);
1347                 if (*i1 == -1) {
1348                         ipp->ipp_type_of_service = 0;
1349                 } else {
1350                         ipp->ipp_type_of_service = *i1;
1351                 }
1352                 mutex_exit(&connp->conn_lock);
1353                 coa->coa_changed |= COA_HEADER_CHANGED;
1354                 break;
1355         case IP_MULTICAST_IF:
1356                 ixa->ixa_multicast_ifindex = ifindex;
1357                 ixa->ixa_multicast_ifaddr = addr;
1358                 coa->coa_changed |= COA_ROUTE_CHANGED;
1359                 break;
1360         case IP_MULTICAST_TTL:
1361                 ixa->ixa_multicast_ttl = *invalp;
1362                 /* Handled automatically by ip_output */
1363                 break;
1364         case IP_MULTICAST_LOOP:
1365                 if (*invalp != 0)
1366                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1367                 else
1368                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1369                 /* Handled automatically by ip_output */
1370                 break;
1371         case IP_RECVOPTS:
1372                 mutex_enter(&connp->conn_lock);
1373                 connp->conn_recv_ancillary.crb_recvopts = onoff;
1374                 mutex_exit(&connp->conn_lock);
1375                 break;
1376         case IP_RECVDSTADDR:
1377                 mutex_enter(&connp->conn_lock);
1378                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1379                 mutex_exit(&connp->conn_lock);
1380                 break;
1381         case IP_RECVIF:
1382                 mutex_enter(&connp->conn_lock);
1383                 connp->conn_recv_ancillary.crb_recvif = onoff;
1384                 mutex_exit(&connp->conn_lock);
1385                 break;
1386         case IP_RECVSLLA:
1387                 mutex_enter(&connp->conn_lock);
1388                 connp->conn_recv_ancillary.crb_recvslla = onoff;
1389                 mutex_exit(&connp->conn_lock);
1390                 break;
1391         case IP_RECVTTL:
1392                 mutex_enter(&connp->conn_lock);
1393                 connp->conn_recv_ancillary.crb_recvttl = onoff;
1394                 mutex_exit(&connp->conn_lock);
1395                 break;
1396         case IP_RECVTOS:
1397                 mutex_enter(&connp->conn_lock);
1398                 connp->conn_recv_ancillary.crb_recvtos = onoff;
1399                 mutex_exit(&connp->conn_lock);
1400                 break;
1401         case IP_PKTINFO: {
1402                 /*
1403                  * This also handles IP_RECVPKTINFO.
1404                  * IP_PKTINFO and IP_RECVPKTINFO have same value.
1405                  * Differentiation is based on the size of the
1406                  * argument passed in.
1407                  */
1408                 struct in_pktinfo *pktinfo;
1409 
1410                 if (inlen == sizeof (int)) {
1411                         /* This is IP_RECVPKTINFO option. */
1412                         mutex_enter(&connp->conn_lock);
1413                         connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1414                             onoff;
1415                         mutex_exit(&connp->conn_lock);
1416                         break;
1417                 }
1418 
1419                 /* This is IP_PKTINFO option. */
1420                 mutex_enter(&connp->conn_lock);
1421                 pktinfo = (struct in_pktinfo *)invalp;
1422                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1423                         ipp->ipp_fields |= IPPF_ADDR;
1424                         IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1425                             &ipp->ipp_addr);
1426                 } else {
1427                         ipp->ipp_fields &= ~IPPF_ADDR;
1428                         ipp->ipp_addr = ipv6_all_zeros;
1429                 }
1430                 mutex_exit(&connp->conn_lock);
1431                 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1432                 coa->coa_changed |= COA_ROUTE_CHANGED;
1433                 coa->coa_changed |= COA_HEADER_CHANGED;
1434                 break;
1435         }
1436         case IP_DONTFRAG:
1437                 if (onoff) {
1438                         ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1439                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1440                 } else {
1441                         ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1442                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1443                 }
1444                 /* Need to redo ip_attr_connect */
1445                 coa->coa_changed |= COA_ROUTE_CHANGED;
1446                 break;
1447         case IP_ADD_MEMBERSHIP:
1448         case IP_DROP_MEMBERSHIP:
1449         case MCAST_JOIN_GROUP:
1450         case MCAST_LEAVE_GROUP:
1451                 return (ip_opt_set_multicast_group(connp, name,
1452                     invalp, B_FALSE, checkonly));
1453 
1454         case IP_BLOCK_SOURCE:
1455         case IP_UNBLOCK_SOURCE:
1456         case IP_ADD_SOURCE_MEMBERSHIP:
1457         case IP_DROP_SOURCE_MEMBERSHIP:
1458         case MCAST_BLOCK_SOURCE:
1459         case MCAST_UNBLOCK_SOURCE:
1460         case MCAST_JOIN_SOURCE_GROUP:
1461         case MCAST_LEAVE_SOURCE_GROUP:
1462                 return (ip_opt_set_multicast_sources(connp, name,
1463                     invalp, B_FALSE, checkonly));
1464 
1465         case IP_SEC_OPT:
1466                 mutex_enter(&connp->conn_lock);
1467                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1468                 mutex_exit(&connp->conn_lock);
1469                 if (error != 0) {
1470                         return (error);
1471                 }
1472                 /* This is an IPsec policy change - redo ip_attr_connect */
1473                 coa->coa_changed |= COA_ROUTE_CHANGED;
1474                 break;
1475         case IP_NEXTHOP:
1476                 ixa->ixa_nexthop_v4 = addr;
1477                 if (addr != INADDR_ANY)
1478                         ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1479                 else
1480                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1481                 coa->coa_changed |= COA_ROUTE_CHANGED;
1482                 break;
1483 
1484         case IP_BOUND_IF:
1485                 ixa->ixa_ifindex = ifindex;          /* Send */
1486                 mutex_enter(&connp->conn_lock);
1487                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1488                 connp->conn_bound_if = ifindex;              /* getsockopt */
1489                 mutex_exit(&connp->conn_lock);
1490                 coa->coa_changed |= COA_ROUTE_CHANGED;
1491                 break;
1492         case IP_UNSPEC_SRC:
1493                 mutex_enter(&connp->conn_lock);
1494                 connp->conn_unspec_src = onoff;
1495                 if (onoff)
1496                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1497                 else
1498                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1499 
1500                 mutex_exit(&connp->conn_lock);
1501                 break;
1502         case IP_BROADCAST_TTL:
1503                 ixa->ixa_broadcast_ttl = *invalp;
1504                 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1505                 /* Handled automatically by ip_output */
1506                 break;
1507         case MRT_INIT:
1508         case MRT_DONE:
1509         case MRT_ADD_VIF:
1510         case MRT_DEL_VIF:
1511         case MRT_ADD_MFC:
1512         case MRT_DEL_MFC:
1513         case MRT_ASSERT:
1514                 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1515                         return (error);
1516                 }
1517                 error = ip_mrouter_set((int)name, connp, checkonly,
1518                     (uchar_t *)invalp, inlen);
1519                 if (error) {
1520                         return (error);
1521                 }
1522                 return (0);
1523 
1524         }
1525         return (0);
1526 }
1527 
1528 /* Handle IPPROTO_IPV6 */
1529 static int
1530 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1531     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1532 {
1533         conn_t          *connp = coa->coa_connp;
1534         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1535         ip_pkt_t        *ipp = coa->coa_ipp;
1536         int             *i1 = (int *)invalp;
1537         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1538         uint_t          ifindex;
1539         zoneid_t        zoneid = IPCL_ZONEID(connp);
1540         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1541         int             error;
1542 
1543         if (connp->conn_family != AF_INET6)
1544                 return (EINVAL);
1545 
1546         ifindex = UINT_MAX;
1547         switch (name) {
1548         case IPV6_MULTICAST_IF:
1549                 /*
1550                  * The only possible error is EINVAL.
1551                  * We call this option on both V4 and V6
1552                  * If both fail, then this call returns
1553                  * EINVAL. If at least one of them succeeds we
1554                  * return success.
1555                  */
1556                 ifindex = *(uint_t *)i1;
1557 
1558                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1559                     !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1560                         return (EINVAL);
1561                 break;
1562         case IPV6_UNICAST_HOPS:
1563                 /* Don't allow zero. -1 means to use default */
1564                 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1565                         return (EINVAL);
1566                 break;
1567         case IPV6_MULTICAST_HOPS:
1568                 /* -1 means use default */
1569                 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1570                         return (EINVAL);
1571                 break;
1572         case IPV6_MULTICAST_LOOP:
1573                 if (*i1 != 0 && *i1 != 1)
1574                         return (EINVAL);
1575                 break;
1576         case IPV6_BOUND_IF:
1577                 ifindex = *(uint_t *)i1;
1578 
1579                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1580                         return (ENXIO);
1581                 break;
1582         case IPV6_PKTINFO: {
1583                 struct in6_pktinfo *pkti;
1584                 boolean_t isv6;
1585 
1586                 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1587                         return (EINVAL);
1588                 if (inlen == 0)
1589                         break;  /* Clear values below */
1590 
1591                 /*
1592                  * Verify the source address and ifindex. Privileged users
1593                  * can use any source address.
1594                  */
1595                 pkti = (struct in6_pktinfo *)invalp;
1596 
1597                 /*
1598                  * For link-local addresses we use the ipi6_ifindex when
1599                  * we verify the local address.
1600                  * If net_rawaccess then any source address can be used.
1601                  */
1602                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1603                     secpolicy_net_rawaccess(cr) != 0) {
1604                         uint_t scopeid = 0;
1605                         in6_addr_t *v6src = &pkti->ipi6_addr;
1606                         ipaddr_t v4src;
1607                         ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1608 
1609                         if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1610                                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1611                                 if (v4src != INADDR_ANY) {
1612                                         laddr_type = ip_laddr_verify_v4(v4src,
1613                                             zoneid, ipst, B_FALSE);
1614                                 }
1615                         } else {
1616                                 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1617                                         scopeid = pkti->ipi6_ifindex;
1618 
1619                                 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1620                                     ipst, B_FALSE, scopeid);
1621                         }
1622                         switch (laddr_type) {
1623                         case IPVL_UNICAST_UP:
1624                         case IPVL_UNICAST_DOWN:
1625                                 break;
1626                         default:
1627                                 return (EADDRNOTAVAIL);
1628                         }
1629                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1630                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1631                         /* Allow any source */
1632                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1633                 }
1634                 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1635                 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1636                     ipst))
1637                         return (ENXIO);
1638                 break;
1639         }
1640         case IPV6_HOPLIMIT:
1641                 /* It is only allowed as ancilary data */
1642                 if (!coa->coa_ancillary)
1643                         return (EINVAL);
1644 
1645                 if (inlen != 0 && inlen != sizeof (int))
1646                         return (EINVAL);
1647                 if (inlen == sizeof (int)) {
1648                         if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1649                                 return (EINVAL);
1650                 }
1651                 break;
1652         case IPV6_TCLASS:
1653                 if (inlen != 0 && inlen != sizeof (int))
1654                         return (EINVAL);
1655                 if (inlen == sizeof (int)) {
1656                         if (*i1 > 255 || *i1 < -1)
1657                                 return (EINVAL);
1658                 }
1659                 break;
1660         case IPV6_NEXTHOP:
1661                 if (inlen != 0 && inlen != sizeof (sin6_t))
1662                         return (EINVAL);
1663                 if (inlen == sizeof (sin6_t)) {
1664                         sin6_t *sin6 = (sin6_t *)invalp;
1665                         ire_t   *ire;
1666 
1667                         if (sin6->sin6_family != AF_INET6)
1668                                 return (EAFNOSUPPORT);
1669                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1670                                 return (EADDRNOTAVAIL);
1671 
1672                         /* Verify that the next-hop is on-link */
1673                         ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1674                             0, 0, IRE_ONLINK, NULL, zoneid,
1675                             NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1676                         if (ire == NULL)
1677                                 return (EHOSTUNREACH);
1678                         ire_refrele(ire);
1679                         break;
1680                 }
1681                 break;
1682         case IPV6_RTHDR:
1683         case IPV6_DSTOPTS:
1684         case IPV6_RTHDRDSTOPTS:
1685         case IPV6_HOPOPTS: {
1686                 /* All have the length field in the same place */
1687                 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1688                 /*
1689                  * Sanity checks - minimum size, size a multiple of
1690                  * eight bytes, and matching size passed in.
1691                  */
1692                 if (inlen != 0 &&
1693                     inlen != (8 * (hopts->ip6h_len + 1)))
1694                         return (EINVAL);
1695                 break;
1696         }
1697         case IPV6_PATHMTU:
1698                 /* Can't be set */
1699                 return (EINVAL);
1700 
1701         case IPV6_USE_MIN_MTU:
1702                 if (inlen != sizeof (int))
1703                         return (EINVAL);
1704                 if (*i1 < -1 || *i1 > 1)
1705                         return (EINVAL);
1706                 break;
1707         case IPV6_SRC_PREFERENCES:
1708                 if (inlen != sizeof (uint32_t))
1709                         return (EINVAL);
1710                 break;
1711         case IPV6_V6ONLY:
1712                 if (*i1 < 0 || *i1 > 1) {
1713                         return (EINVAL);
1714                 }
1715                 break;
1716         }
1717         if (checkonly)
1718                 return (0);
1719 
1720         /* Here we set the actual option value */
1721         /*
1722          * conn_lock protects the bitfields, and is used to
1723          * set the fields atomically. Not needed for ixa settings since
1724          * the caller has an exclusive copy of the ixa.
1725          * We can not hold conn_lock across the multicast options though.
1726          */
1727         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1728         switch (name) {
1729         case IPV6_MULTICAST_IF:
1730                 ixa->ixa_multicast_ifindex = ifindex;
1731                 /* Need to redo ip_attr_connect */
1732                 coa->coa_changed |= COA_ROUTE_CHANGED;
1733                 break;
1734         case IPV6_UNICAST_HOPS:
1735                 /* -1 means use default */
1736                 mutex_enter(&connp->conn_lock);
1737                 if (*i1 == -1) {
1738                         ipp->ipp_unicast_hops = connp->conn_default_ttl;
1739                 } else {
1740                         ipp->ipp_unicast_hops = (uint8_t)*i1;
1741                 }
1742                 mutex_exit(&connp->conn_lock);
1743                 coa->coa_changed |= COA_HEADER_CHANGED;
1744                 break;
1745         case IPV6_MULTICAST_HOPS:
1746                 /* -1 means use default */
1747                 if (*i1 == -1) {
1748                         ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1749                 } else {
1750                         ixa->ixa_multicast_ttl = (uint8_t)*i1;
1751                 }
1752                 /* Handled automatically by ip_output */
1753                 break;
1754         case IPV6_MULTICAST_LOOP:
1755                 if (*i1 != 0)
1756                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1757                 else
1758                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1759                 /* Handled automatically by ip_output */
1760                 break;
1761         case IPV6_JOIN_GROUP:
1762         case IPV6_LEAVE_GROUP:
1763         case MCAST_JOIN_GROUP:
1764         case MCAST_LEAVE_GROUP:
1765                 return (ip_opt_set_multicast_group(connp, name,
1766                     invalp, B_TRUE, checkonly));
1767 
1768         case MCAST_BLOCK_SOURCE:
1769         case MCAST_UNBLOCK_SOURCE:
1770         case MCAST_JOIN_SOURCE_GROUP:
1771         case MCAST_LEAVE_SOURCE_GROUP:
1772                 return (ip_opt_set_multicast_sources(connp, name,
1773                     invalp, B_TRUE, checkonly));
1774 
1775         case IPV6_BOUND_IF:
1776                 ixa->ixa_ifindex = ifindex;          /* Send */
1777                 mutex_enter(&connp->conn_lock);
1778                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1779                 connp->conn_bound_if = ifindex;              /* getsockopt */
1780                 mutex_exit(&connp->conn_lock);
1781                 coa->coa_changed |= COA_ROUTE_CHANGED;
1782                 break;
1783         case IPV6_UNSPEC_SRC:
1784                 mutex_enter(&connp->conn_lock);
1785                 connp->conn_unspec_src = onoff;
1786                 if (onoff)
1787                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1788                 else
1789                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1790                 mutex_exit(&connp->conn_lock);
1791                 break;
1792         case IPV6_RECVPKTINFO:
1793                 mutex_enter(&connp->conn_lock);
1794                 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1795                 mutex_exit(&connp->conn_lock);
1796                 break;
1797         case IPV6_RECVTCLASS:
1798                 mutex_enter(&connp->conn_lock);
1799                 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1800                 mutex_exit(&connp->conn_lock);
1801                 break;
1802         case IPV6_RECVPATHMTU:
1803                 mutex_enter(&connp->conn_lock);
1804                 connp->conn_ipv6_recvpathmtu = onoff;
1805                 mutex_exit(&connp->conn_lock);
1806                 break;
1807         case IPV6_RECVHOPLIMIT:
1808                 mutex_enter(&connp->conn_lock);
1809                 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1810                     onoff;
1811                 mutex_exit(&connp->conn_lock);
1812                 break;
1813         case IPV6_RECVHOPOPTS:
1814                 mutex_enter(&connp->conn_lock);
1815                 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1816                 mutex_exit(&connp->conn_lock);
1817                 break;
1818         case IPV6_RECVDSTOPTS:
1819                 mutex_enter(&connp->conn_lock);
1820                 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1821                 mutex_exit(&connp->conn_lock);
1822                 break;
1823         case _OLD_IPV6_RECVDSTOPTS:
1824                 mutex_enter(&connp->conn_lock);
1825                 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1826                     onoff;
1827                 mutex_exit(&connp->conn_lock);
1828                 break;
1829         case IPV6_RECVRTHDRDSTOPTS:
1830                 mutex_enter(&connp->conn_lock);
1831                 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1832                     onoff;
1833                 mutex_exit(&connp->conn_lock);
1834                 break;
1835         case IPV6_RECVRTHDR:
1836                 mutex_enter(&connp->conn_lock);
1837                 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1838                 mutex_exit(&connp->conn_lock);
1839                 break;
1840         case IPV6_PKTINFO:
1841                 mutex_enter(&connp->conn_lock);
1842                 if (inlen == 0) {
1843                         ipp->ipp_fields &= ~IPPF_ADDR;
1844                         ipp->ipp_addr = ipv6_all_zeros;
1845                         ixa->ixa_ifindex = 0;
1846                 } else {
1847                         struct in6_pktinfo *pkti;
1848 
1849                         pkti = (struct in6_pktinfo *)invalp;
1850                         ipp->ipp_addr = pkti->ipi6_addr;
1851                         if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1852                                 ipp->ipp_fields |= IPPF_ADDR;
1853                         else
1854                                 ipp->ipp_fields &= ~IPPF_ADDR;
1855                         ixa->ixa_ifindex = pkti->ipi6_ifindex;
1856                 }
1857                 mutex_exit(&connp->conn_lock);
1858                 /* Source and ifindex might have changed */
1859                 coa->coa_changed |= COA_HEADER_CHANGED;
1860                 coa->coa_changed |= COA_ROUTE_CHANGED;
1861                 break;
1862         case IPV6_HOPLIMIT:
1863                 mutex_enter(&connp->conn_lock);
1864                 if (inlen == 0 || *i1 == -1) {
1865                         /* Revert to default */
1866                         ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1867                         ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1868                 } else {
1869                         ipp->ipp_hoplimit = *i1;
1870                         ipp->ipp_fields |= IPPF_HOPLIMIT;
1871                         /* Ensure that it sticks for multicast packets */
1872                         ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1873                 }
1874                 mutex_exit(&connp->conn_lock);
1875                 coa->coa_changed |= COA_HEADER_CHANGED;
1876                 break;
1877         case IPV6_TCLASS:
1878                 /*
1879                  * IPV6_TCLASS accepts -1 as use kernel default
1880                  * and [0, 255] as the actualy traffic class.
1881                  */
1882                 mutex_enter(&connp->conn_lock);
1883                 if (inlen == 0 || *i1 == -1) {
1884                         ipp->ipp_tclass = 0;
1885                         ipp->ipp_fields &= ~IPPF_TCLASS;
1886                 } else {
1887                         ipp->ipp_tclass = *i1;
1888                         ipp->ipp_fields |= IPPF_TCLASS;
1889                 }
1890                 mutex_exit(&connp->conn_lock);
1891                 coa->coa_changed |= COA_HEADER_CHANGED;
1892                 break;
1893         case IPV6_NEXTHOP:
1894                 if (inlen == 0) {
1895                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1896                 } else {
1897                         sin6_t *sin6 = (sin6_t *)invalp;
1898 
1899                         ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1900                         if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1901                                 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1902                         else
1903                                 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1904                 }
1905                 coa->coa_changed |= COA_ROUTE_CHANGED;
1906                 break;
1907         case IPV6_HOPOPTS:
1908                 mutex_enter(&connp->conn_lock);
1909                 error = optcom_pkt_set(invalp, inlen,
1910                     (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1911                 if (error != 0) {
1912                         mutex_exit(&connp->conn_lock);
1913                         return (error);
1914                 }
1915                 if (ipp->ipp_hopoptslen == 0) {
1916                         ipp->ipp_fields &= ~IPPF_HOPOPTS;
1917                 } else {
1918                         ipp->ipp_fields |= IPPF_HOPOPTS;
1919                 }
1920                 mutex_exit(&connp->conn_lock);
1921                 coa->coa_changed |= COA_HEADER_CHANGED;
1922                 coa->coa_changed |= COA_WROFF_CHANGED;
1923                 break;
1924         case IPV6_RTHDRDSTOPTS:
1925                 mutex_enter(&connp->conn_lock);
1926                 error = optcom_pkt_set(invalp, inlen,
1927                     (uchar_t **)&ipp->ipp_rthdrdstopts,
1928                     &ipp->ipp_rthdrdstoptslen);
1929                 if (error != 0) {
1930                         mutex_exit(&connp->conn_lock);
1931                         return (error);
1932                 }
1933                 if (ipp->ipp_rthdrdstoptslen == 0) {
1934                         ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1935                 } else {
1936                         ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1937                 }
1938                 mutex_exit(&connp->conn_lock);
1939                 coa->coa_changed |= COA_HEADER_CHANGED;
1940                 coa->coa_changed |= COA_WROFF_CHANGED;
1941                 break;
1942         case IPV6_DSTOPTS:
1943                 mutex_enter(&connp->conn_lock);
1944                 error = optcom_pkt_set(invalp, inlen,
1945                     (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1946                 if (error != 0) {
1947                         mutex_exit(&connp->conn_lock);
1948                         return (error);
1949                 }
1950                 if (ipp->ipp_dstoptslen == 0) {
1951                         ipp->ipp_fields &= ~IPPF_DSTOPTS;
1952                 } else {
1953                         ipp->ipp_fields |= IPPF_DSTOPTS;
1954                 }
1955                 mutex_exit(&connp->conn_lock);
1956                 coa->coa_changed |= COA_HEADER_CHANGED;
1957                 coa->coa_changed |= COA_WROFF_CHANGED;
1958                 break;
1959         case IPV6_RTHDR:
1960                 mutex_enter(&connp->conn_lock);
1961                 error = optcom_pkt_set(invalp, inlen,
1962                     (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1963                 if (error != 0) {
1964                         mutex_exit(&connp->conn_lock);
1965                         return (error);
1966                 }
1967                 if (ipp->ipp_rthdrlen == 0) {
1968                         ipp->ipp_fields &= ~IPPF_RTHDR;
1969                 } else {
1970                         ipp->ipp_fields |= IPPF_RTHDR;
1971                 }
1972                 mutex_exit(&connp->conn_lock);
1973                 coa->coa_changed |= COA_HEADER_CHANGED;
1974                 coa->coa_changed |= COA_WROFF_CHANGED;
1975                 break;
1976 
1977         case IPV6_DONTFRAG:
1978                 if (onoff) {
1979                         ixa->ixa_flags |= IXAF_DONTFRAG;
1980                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1981                 } else {
1982                         ixa->ixa_flags &= ~IXAF_DONTFRAG;
1983                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1984                 }
1985                 /* Need to redo ip_attr_connect */
1986                 coa->coa_changed |= COA_ROUTE_CHANGED;
1987                 break;
1988 
1989         case IPV6_USE_MIN_MTU:
1990                 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1991                 ixa->ixa_use_min_mtu = *i1;
1992                 /* Need to redo ip_attr_connect */
1993                 coa->coa_changed |= COA_ROUTE_CHANGED;
1994                 break;
1995 
1996         case IPV6_SEC_OPT:
1997                 mutex_enter(&connp->conn_lock);
1998                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1999                 mutex_exit(&connp->conn_lock);
2000                 if (error != 0) {
2001                         return (error);
2002                 }
2003                 /* This is an IPsec policy change - redo ip_attr_connect */
2004                 coa->coa_changed |= COA_ROUTE_CHANGED;
2005                 break;
2006         case IPV6_SRC_PREFERENCES:
2007                 /*
2008                  * This socket option only affects connected
2009                  * sockets that haven't already bound to a specific
2010                  * IPv6 address.  In other words, sockets that
2011                  * don't call bind() with an address other than the
2012                  * unspecified address and that call connect().
2013                  * ip_set_destination_v6() passes these preferences
2014                  * to the ipif_select_source_v6() function.
2015                  */
2016                 mutex_enter(&connp->conn_lock);
2017                 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
2018                 mutex_exit(&connp->conn_lock);
2019                 if (error != 0) {
2020                         return (error);
2021                 }
2022                 break;
2023         case IPV6_V6ONLY:
2024                 mutex_enter(&connp->conn_lock);
2025                 connp->conn_ipv6_v6only = onoff;
2026                 mutex_exit(&connp->conn_lock);
2027                 break;
2028         }
2029         return (0);
2030 }
2031 
2032 /* Handle IPPROTO_UDP */
2033 /* ARGSUSED1 */
2034 static int
2035 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2037 {
2038         conn_t          *connp = coa->coa_connp;
2039         int             *i1 = (int *)invalp;
2040         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2041         int             error;
2042 
2043         switch (name) {
2044         case UDP_ANONPRIVBIND:
2045                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2046                         return (error);
2047                 }
2048                 break;
2049         }
2050         if (checkonly)
2051                 return (0);
2052 
2053         /* Here we set the actual option value */
2054         mutex_enter(&connp->conn_lock);
2055         switch (name) {
2056         case UDP_ANONPRIVBIND:
2057                 connp->conn_anon_priv_bind = onoff;
2058                 break;
2059         case UDP_EXCLBIND:
2060                 connp->conn_exclbind = onoff;
2061                 break;
2062         }
2063         mutex_exit(&connp->conn_lock);
2064         return (0);
2065 }
2066 
2067 /* Handle IPPROTO_TCP */
2068 /* ARGSUSED1 */
2069 static int
2070 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2071     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2072 {
2073         conn_t          *connp = coa->coa_connp;
2074         int             *i1 = (int *)invalp;
2075         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2076         int             error;
2077 
2078         switch (name) {
2079         case TCP_ANONPRIVBIND:
2080                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2081                         return (error);
2082                 }
2083                 break;
2084         }
2085         if (checkonly)
2086                 return (0);
2087 
2088         /* Here we set the actual option value */
2089         mutex_enter(&connp->conn_lock);
2090         switch (name) {
2091         case TCP_ANONPRIVBIND:
2092                 connp->conn_anon_priv_bind = onoff;
2093                 break;
2094         case TCP_EXCLBIND:
2095                 connp->conn_exclbind = onoff;
2096                 break;
2097         case TCP_RECVDSTADDR:
2098                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2099                 break;
2100         }
2101         mutex_exit(&connp->conn_lock);
2102         return (0);
2103 }
2104 
2105 int
2106 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2107 {
2108         sin_t           *sin;
2109         sin6_t          *sin6;
2110 
2111         if (connp->conn_family == AF_INET) {
2112                 if (*salenp < sizeof (sin_t))
2113                         return (EINVAL);
2114 
2115                 *salenp = sizeof (sin_t);
2116                 /* Fill zeroes and then initialize non-zero fields */
2117                 sin = (sin_t *)sa;
2118                 *sin = sin_null;
2119                 sin->sin_family = AF_INET;
2120                 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2121                     !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2122                         sin->sin_addr.s_addr = connp->conn_saddr_v4;
2123                 } else {
2124                         /*
2125                          * INADDR_ANY
2126                          * conn_saddr is not set, we might be bound to
2127                          * broadcast/multicast. Use conn_bound_addr as
2128                          * local address instead (that could
2129                          * also still be INADDR_ANY)
2130                          */
2131                         sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2132                 }
2133                 sin->sin_port = connp->conn_lport;
2134         } else {
2135                 if (*salenp < sizeof (sin6_t))
2136                         return (EINVAL);
2137 
2138                 *salenp = sizeof (sin6_t);
2139                 /* Fill zeroes and then initialize non-zero fields */
2140                 sin6 = (sin6_t *)sa;
2141                 *sin6 = sin6_null;
2142                 sin6->sin6_family = AF_INET6;
2143                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2144                         sin6->sin6_addr = connp->conn_saddr_v6;
2145                 } else {
2146                         /*
2147                          * conn_saddr is not set, we might be bound to
2148                          * broadcast/multicast. Use conn_bound_addr as
2149                          * local address instead (which could
2150                          * also still be unspecified)
2151                          */
2152                         sin6->sin6_addr = connp->conn_bound_addr_v6;
2153                 }
2154                 sin6->sin6_port = connp->conn_lport;
2155                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2156                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2157                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2158         }
2159         return (0);
2160 }
2161 
2162 int
2163 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2164 {
2165         struct sockaddr_in      *sin;
2166         struct sockaddr_in6     *sin6;
2167 
2168         if (connp->conn_family == AF_INET) {
2169                 if (*salenp < sizeof (sin_t))
2170                         return (EINVAL);
2171 
2172                 *salenp = sizeof (sin_t);
2173                 /* initialize */
2174                 sin = (sin_t *)sa;
2175                 *sin = sin_null;
2176                 sin->sin_family = AF_INET;
2177                 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2178                 sin->sin_port = connp->conn_fport;
2179         } else {
2180                 if (*salenp < sizeof (sin6_t))
2181                         return (EINVAL);
2182 
2183                 *salenp = sizeof (sin6_t);
2184                 /* initialize */
2185                 sin6 = (sin6_t *)sa;
2186                 *sin6 = sin6_null;
2187                 sin6->sin6_family = AF_INET6;
2188                 sin6->sin6_addr = connp->conn_faddr_v6;
2189                 sin6->sin6_port =  connp->conn_fport;
2190                 sin6->sin6_flowinfo = connp->conn_flowinfo;
2191                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2192                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2193                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2194         }
2195         return (0);
2196 }
2197 
2198 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2199 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2200 
2201 /*
2202  * Allocate and fill in conn_ht_iphc based on the current information
2203  * in the conn.
2204  * Normally used when we bind() and connect().
2205  * Returns failure if can't allocate memory, or if there is a problem
2206  * with a routing header/option.
2207  *
2208  * We allocate space for the transport header (ulp_hdr_len + extra) and
2209  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2210  * The extra is there for transports that want some spare room for future
2211  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2212  * excludes the extra part.
2213  *
2214  * We massage an routing option/header and store the ckecksum difference
2215  * in conn_sum.
2216  *
2217  * Caller needs to update conn_wroff if desired.
2218  */
2219 int
2220 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2221     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2222 {
2223         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2224         ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
2225         uint_t          ip_hdr_length;
2226         uchar_t         *hdrs;
2227         uint_t          hdrs_len;
2228 
2229         ASSERT(MUTEX_HELD(&connp->conn_lock));
2230 
2231         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2233                 /* In case of TX label and IP options it can be too much */
2234                 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2235                         /* Preserves existing TX errno for this */
2236                         return (EHOSTUNREACH);
2237                 }
2238         } else {
2239                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2240         }
2241         ixa->ixa_ip_hdr_length = ip_hdr_length;
2242         hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2243         ASSERT(hdrs_len != 0);
2244 
2245         if (hdrs_len != connp->conn_ht_iphc_allocated) {
2246                 /* Allocate new before we free any old */
2247                 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2248                 if (hdrs == NULL)
2249                         return (ENOMEM);
2250 
2251                 if (connp->conn_ht_iphc != NULL) {
2252                         kmem_free(connp->conn_ht_iphc,
2253                             connp->conn_ht_iphc_allocated);
2254                 }
2255                 connp->conn_ht_iphc = hdrs;
2256                 connp->conn_ht_iphc_allocated = hdrs_len;
2257         } else {
2258                 hdrs = connp->conn_ht_iphc;
2259         }
2260         hdrs_len -= extra;
2261         connp->conn_ht_iphc_len = hdrs_len;
2262 
2263         connp->conn_ht_ulp = hdrs + ip_hdr_length;
2264         connp->conn_ht_ulp_len = ulp_hdr_length;
2265 
2266         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2267                 ipha_t  *ipha = (ipha_t *)hdrs;
2268 
2269                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2270                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2271                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2272                 ipha->ipha_length = htons(hdrs_len);
2273                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2274                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2275                 else
2276                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2277 
2278                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2279                         connp->conn_sum = cksum_massage_options_v4(ipha,
2280                             connp->conn_netstack);
2281                 } else {
2282                         connp->conn_sum = 0;
2283                 }
2284         } else {
2285                 ip6_t   *ip6h = (ip6_t *)hdrs;
2286 
2287                 ip6h->ip6_src = *v6src;
2288                 ip6h->ip6_dst = *v6dst;
2289                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2290                     flowinfo);
2291                 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2292 
2293                 if (ipp->ipp_fields & IPPF_RTHDR) {
2294                         connp->conn_sum = cksum_massage_options_v6(ip6h,
2295                             ip_hdr_length, connp->conn_netstack);
2296 
2297                         /*
2298                          * Verify that the first hop isn't a mapped address.
2299                          * Routers along the path need to do this verification
2300                          * for subsequent hops.
2301                          */
2302                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2303                                 return (EADDRNOTAVAIL);
2304 
2305                 } else {
2306                         connp->conn_sum = 0;
2307                 }
2308         }
2309         return (0);
2310 }
2311 
2312 /*
2313  * Prepend a header template to data_mp based on the ip_pkt_t
2314  * and the passed in source, destination and protocol.
2315  *
2316  * Returns failure if can't allocate memory, in which case data_mp is freed.
2317  * We allocate space for the transport header (ulp_hdr_len) and
2318  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2319  *
2320  * We massage an routing option/header and return the ckecksum difference
2321  * in *sump. This is in host byte order.
2322  *
2323  * Caller needs to update conn_wroff if desired.
2324  */
2325 mblk_t *
2326 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2327     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2328     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2329     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2330 {
2331         uint_t          ip_hdr_length;
2332         uchar_t         *hdrs;
2333         uint_t          hdrs_len;
2334         mblk_t          *mp;
2335 
2336         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2337                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2338                 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2339         } else {
2340                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2341         }
2342         hdrs_len = ip_hdr_length + ulp_hdr_length;
2343         ASSERT(hdrs_len != 0);
2344 
2345         ixa->ixa_ip_hdr_length = ip_hdr_length;
2346 
2347         /* Can we prepend to data_mp? */
2348         if (data_mp != NULL &&
2349             data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2350             data_mp->b_datap->db_ref == 1) {
2351                 hdrs = data_mp->b_rptr - hdrs_len;
2352                 data_mp->b_rptr = hdrs;
2353                 mp = data_mp;
2354         } else {
2355                 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2356                 if (mp == NULL) {
2357                         freemsg(data_mp);
2358                         *errorp = ENOMEM;
2359                         return (NULL);
2360                 }
2361                 mp->b_wptr = mp->b_datap->db_lim;
2362                 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2363                 mp->b_cont = data_mp;
2364         }
2365 
2366         /*
2367          * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2368          * if PKTINFO (aka IPPF_ADDR) was set.
2369          */
2370         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2371                 ipha_t *ipha = (ipha_t *)hdrs;
2372 
2373                 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2374                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2375                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2376                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2377                 ipha->ipha_length = htons(hdrs_len + data_length);
2378                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2379                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2380                 else
2381                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2382 
2383                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2384                         *sump = cksum_massage_options_v4(ipha,
2385                             ixa->ixa_ipst->ips_netstack);
2386                 } else {
2387                         *sump = 0;
2388                 }
2389         } else {
2390                 ip6_t *ip6h = (ip6_t *)hdrs;
2391 
2392                 ip6h->ip6_src = *v6src;
2393                 ip6h->ip6_dst = *v6dst;
2394                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2395                 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2396 
2397                 if (ipp->ipp_fields & IPPF_RTHDR) {
2398                         *sump = cksum_massage_options_v6(ip6h,
2399                             ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2400 
2401                         /*
2402                          * Verify that the first hop isn't a mapped address.
2403                          * Routers along the path need to do this verification
2404                          * for subsequent hops.
2405                          */
2406                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2407                                 *errorp = EADDRNOTAVAIL;
2408                                 freemsg(mp);
2409                                 return (NULL);
2410                         }
2411                 } else {
2412                         *sump = 0;
2413                 }
2414         }
2415         return (mp);
2416 }
2417 
2418 /*
2419  * Massage a source route if any putting the first hop
2420  * in ipha_dst. Compute a starting value for the checksum which
2421  * takes into account that the original ipha_dst should be
2422  * included in the checksum but that IP will include the
2423  * first hop from the source route in the tcp checksum.
2424  */
2425 static uint32_t
2426 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2427 {
2428         in_addr_t       dst;
2429         uint32_t        cksum;
2430 
2431         /* Get last hop then diff against first hop */
2432         cksum = ip_massage_options(ipha, ns);
2433         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2434         dst = ipha->ipha_dst;
2435         cksum -= ((dst >> 16) + (dst & 0xffff));
2436         if ((int)cksum < 0)
2437                 cksum--;
2438         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2439         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2440         ASSERT(cksum < 0x10000);
2441         return (ntohs(cksum));
2442 }
2443 
2444 static uint32_t
2445 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2446 {
2447         uint8_t         *end;
2448         ip6_rthdr_t     *rth;
2449         uint32_t        cksum;
2450 
2451         end = (uint8_t *)ip6h + ip_hdr_len;
2452         rth = ip_find_rthdr_v6(ip6h, end);
2453         if (rth == NULL)
2454                 return (0);
2455 
2456         cksum = ip_massage_options_v6(ip6h, rth, ns);
2457         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2458         ASSERT(cksum < 0x10000);
2459         return (ntohs(cksum));
2460 }
2461 
2462 /*
2463  * ULPs that change the destination address need to call this for each
2464  * change to discard any state about a previous destination that might
2465  * have been multicast or multirt.
2466  */
2467 void
2468 ip_attr_newdst(ip_xmit_attr_t *ixa)
2469 {
2470         ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2471             IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2472             IXAF_NO_LOOP_ZONEID_SET);
2473 }
2474 
2475 /*
2476  * Determine the nexthop which will be used.
2477  * Normally this is just the destination, but if a IPv4 source route, or
2478  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2479  * there.
2480  */
2481 void
2482 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2483     const in6_addr_t *dst, in6_addr_t *nexthop)
2484 {
2485         if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2486                 *nexthop = *dst;
2487                 return;
2488         }
2489         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2490                 ipaddr_t v4dst;
2491                 ipaddr_t v4nexthop;
2492 
2493                 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2494                 v4nexthop = ip_pkt_source_route_v4(ipp);
2495                 if (v4nexthop == INADDR_ANY)
2496                         v4nexthop = v4dst;
2497 
2498                 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2499         } else {
2500                 const in6_addr_t *v6nexthop;
2501 
2502                 v6nexthop = ip_pkt_source_route_v6(ipp);
2503                 if (v6nexthop == NULL)
2504                         v6nexthop = dst;
2505 
2506                 *nexthop = *v6nexthop;
2507         }
2508 }
2509 
2510 /*
2511  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2512  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2513  * case (connected latching is done in conn_connect).
2514  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2515  * set, but doesn't otherwise use the conn_t.
2516  *
2517  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2518  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2519  *
2520  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2521  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2522  *
2523  * Updates laddrp and uinfo if they are non-NULL.
2524  *
2525  * TSOL notes: The callers if ip_attr_connect must check if the destination
2526  * is different than before and in that case redo conn_update_label.
2527  * The callers of conn_connect do not need that since conn_connect
2528  * performs the conn_update_label.
2529  */
2530 int
2531 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2532     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2533     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2534     iulp_t *uinfo, uint32_t flags)
2535 {
2536         in6_addr_t              laddr = *v6src;
2537         int                     error;
2538 
2539         ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2540 
2541         if (connp->conn_zone_is_global)
2542                 flags |= IPDF_ZONE_IS_GLOBAL;
2543         else
2544                 flags &= ~IPDF_ZONE_IS_GLOBAL;
2545 
2546         /*
2547          * Lookup the route to determine a source address and the uinfo.
2548          * If the ULP has a source route option then the caller will
2549          * have set v6nexthop to be the first hop.
2550          */
2551         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2552                 ipaddr_t v4dst;
2553                 ipaddr_t v4src, v4nexthop;
2554 
2555                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2556                 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2557                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2558 
2559                 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2560                         flags &= ~IPDF_SELECT_SRC;
2561                 else
2562                         flags |= IPDF_SELECT_SRC;
2563 
2564                 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2565                     uinfo, flags, connp->conn_mac_mode);
2566                 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2567         } else {
2568                 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2569                         flags &= ~IPDF_SELECT_SRC;
2570                 else
2571                         flags |= IPDF_SELECT_SRC;
2572 
2573                 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2574                     uinfo, flags, connp->conn_mac_mode);
2575         }
2576         /* Pass out some address even if we hit a RTF_REJECT etc */
2577         if (laddrp != NULL)
2578                 *laddrp = laddr;
2579 
2580         if (error != 0)
2581                 return (error);
2582 
2583         if (flags & IPDF_IPSEC) {
2584                 /*
2585                  * Set any IPsec policy in ixa. Routine also looks at ULP
2586                  * ports.
2587                  */
2588                 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2589         }
2590         return (0);
2591 }
2592 
2593 /*
2594  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2595  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2596  * usable for SCTP, since SCTP has multiple faddrs.
2597  *
2598  * Caller must hold conn_lock to provide atomic constency between the
2599  * conn_t's addresses and the ixa.
2600  * NOTE: this function drops and reaquires conn_lock since it can't be
2601  * held across ip_attr_connect/ip_set_destination.
2602  *
2603  * The caller needs to handle inserting in the receive-side fanout when
2604  * appropriate after conn_connect returns.
2605  */
2606 int
2607 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2608 {
2609         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2610         in6_addr_t      nexthop;
2611         in6_addr_t      saddr, faddr;
2612         in_port_t       fport;
2613         int             error;
2614 
2615         ASSERT(MUTEX_HELD(&connp->conn_lock));
2616 
2617         if (connp->conn_ipversion == IPV4_VERSION)
2618                 ixa->ixa_flags |= IXAF_IS_IPV4;
2619         else
2620                 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2621 
2622         /* We do IPsec latching below - hence no caching in ip_attr_connect */
2623         flags &= ~IPDF_IPSEC;
2624 
2625         /* In case we had previously done an ip_attr_connect */
2626         ip_attr_newdst(ixa);
2627 
2628         /*
2629          * Determine the nexthop and copy the addresses before dropping
2630          * conn_lock.
2631          */
2632         ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2633             &connp->conn_faddr_v6, &nexthop);
2634         saddr = connp->conn_saddr_v6;
2635         faddr = connp->conn_faddr_v6;
2636         fport = connp->conn_fport;
2637 
2638         mutex_exit(&connp->conn_lock);
2639         error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2640             &saddr, uinfo, flags | IPDF_VERIFY_DST);
2641         mutex_enter(&connp->conn_lock);
2642 
2643         /* Could have changed even if an error */
2644         connp->conn_saddr_v6 = saddr;
2645         if (error != 0)
2646                 return (error);
2647 
2648         /*
2649          * Check whether Trusted Solaris policy allows communication with this
2650          * host, and pretend that the destination is unreachable if not.
2651          * Compute any needed label and place it in ipp_label_v4/v6.
2652          *
2653          * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2654          * the packet.
2655          *
2656          * TSOL Note: Any concurrent threads would pick a different ixa
2657          * (and ipp if they are to change the ipp)  so we
2658          * don't have to worry about concurrent threads.
2659          */
2660         if (is_system_labeled()) {
2661                 if (connp->conn_mlp_type != mlptSingle)
2662                         return (ECONNREFUSED);
2663 
2664                 /*
2665                  * conn_update_label will set ipp_label* which will later
2666                  * be used by conn_build_hdr_template.
2667                  */
2668                 error = conn_update_label(connp, ixa,
2669                     &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2670                 if (error != 0)
2671                         return (error);
2672         }
2673 
2674         /*
2675          * Ensure that we match on the selected local address.
2676          * This overrides conn_laddr in the case we had earlier bound to a
2677          * multicast or broadcast address.
2678          */
2679         connp->conn_laddr_v6 = connp->conn_saddr_v6;
2680 
2681         /*
2682          * Allow setting new policies.
2683          * The addresses/ports are already set, thus the IPsec policy calls
2684          * can handle their passed-in conn's.
2685          */
2686         connp->conn_policy_cached = B_FALSE;
2687 
2688         /*
2689          * Cache IPsec policy in this conn.  If we have per-socket policy,
2690          * we'll cache that.  If we don't, we'll inherit global policy.
2691          *
2692          * This is done before the caller inserts in the receive-side fanout.
2693          * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2694          * for connections where we don't have a policy. This is to prevent
2695          * global policy lookups in the inbound path.
2696          *
2697          * If we insert before we set conn_policy_cached,
2698          * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2699          * because global policy cound be non-empty. We normally call
2700          * ipsec_check_policy() for conn_policy_cached connections only if
2701          * conn_in_enforce_policy is set. But in this case,
2702          * conn_policy_cached can get set anytime since we made the
2703          * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2704          * called, which will make the above assumption false.  Thus, we
2705          * need to insert after we set conn_policy_cached.
2706          */
2707         error = ipsec_conn_cache_policy(connp,
2708             connp->conn_ipversion == IPV4_VERSION);
2709         if (error != 0)
2710                 return (error);
2711 
2712         /*
2713          * We defer to do LSO check until here since now we have better idea
2714          * whether IPsec is present. If the underlying ill is LSO capable,
2715          * copy its capability in so the ULP can decide whether to enable LSO
2716          * on this connection. So far, only TCP/IPv4 is implemented, so won't
2717          * claim LSO for IPv6.
2718          *
2719          * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2720          * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2721          */
2722         ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2723 
2724         ASSERT(ixa->ixa_ire != NULL);
2725         if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2726             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2727             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2728             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2729             (ixa->ixa_nce != NULL) &&
2730             ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2731             ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2732             ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2733                 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2734                 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2735         }
2736 
2737         /* Check whether ZEROCOPY capability is usable for this connection. */
2738         ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2739 
2740         if ((flags & IPDF_ZCOPY) &&
2741             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2742             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2743             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2744             (ixa->ixa_nce != NULL) &&
2745             ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2746                 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2747         }
2748         return (0);
2749 }
2750 
2751 /*
2752  * Predicates to check if the addresses match conn_last*
2753  */
2754 
2755 /*
2756  * Compare the conn against an address.
2757  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2758  */
2759 boolean_t
2760 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2761 {
2762         ASSERT(connp->conn_family == AF_INET);
2763         return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2764             sin->sin_port == connp->conn_lastdstport);
2765 }
2766 
2767 /*
2768  * Compare, including for mapped addresses
2769  */
2770 boolean_t
2771 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2772 {
2773         return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2774             sin6->sin6_port == connp->conn_lastdstport &&
2775             sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2776             sin6->sin6_scope_id == connp->conn_lastscopeid);
2777 }
2778 
2779 /*
2780  * Compute a label and place it in the ip_packet_t.
2781  * Handles IPv4 and IPv6.
2782  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2783  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2784  * has been called.
2785  */
2786 int
2787 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2788     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2789 {
2790         int             err;
2791         ipaddr_t        v4dst;
2792 
2793         if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2794                 uchar_t         opt_storage[IP_MAX_OPT_LENGTH];
2795 
2796                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2797 
2798                 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2799                     v4dst, opt_storage, ixa->ixa_ipst);
2800                 if (err == 0) {
2801                         /* Length contained in opt_storage[IPOPT_OLEN] */
2802                         err = optcom_pkt_set(opt_storage,
2803                             opt_storage[IPOPT_OLEN],
2804                             (uchar_t **)&ipp->ipp_label_v4,
2805                             &ipp->ipp_label_len_v4);
2806                 }
2807                 if (err != 0) {
2808                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2809                             char *, "conn(1) failed to update options(2) "
2810                             "on ixa(3)",
2811                             conn_t *, connp, char *, opt_storage,
2812                             ip_xmit_attr_t *, ixa);
2813                 }
2814                 if (ipp->ipp_label_len_v4 != 0)
2815                         ipp->ipp_fields |= IPPF_LABEL_V4;
2816                 else
2817                         ipp->ipp_fields &= ~IPPF_LABEL_V4;
2818         } else {
2819                 uchar_t         opt_storage[TSOL_MAX_IPV6_OPTION];
2820                 uint_t          optlen;
2821 
2822                 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2823                     v6dst, opt_storage, ixa->ixa_ipst);
2824                 if (err == 0) {
2825                         /*
2826                          * Note that ipp_label_v6 is just the option - not
2827                          * the hopopts extension header.
2828                          *
2829                          * Length contained in opt_storage[IPOPT_OLEN], but
2830                          * that doesn't include the two byte options header.
2831                          */
2832                         optlen = opt_storage[IPOPT_OLEN];
2833                         if (optlen != 0)
2834                                 optlen += 2;
2835 
2836                         err = optcom_pkt_set(opt_storage, optlen,
2837                             (uchar_t **)&ipp->ipp_label_v6,
2838                             &ipp->ipp_label_len_v6);
2839                 }
2840                 if (err != 0) {
2841                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2842                             char *, "conn(1) failed to update options(2) "
2843                             "on ixa(3)",
2844                             conn_t *, connp, char *, opt_storage,
2845                             ip_xmit_attr_t *, ixa);
2846                 }
2847                 if (ipp->ipp_label_len_v6 != 0)
2848                         ipp->ipp_fields |= IPPF_LABEL_V6;
2849                 else
2850                         ipp->ipp_fields &= ~IPPF_LABEL_V6;
2851         }
2852         return (err);
2853 }
2854 
2855 /*
2856  * Inherit all options settings from the parent/listener to the eager.
2857  * Returns zero on success; ENOMEM if memory allocation failed.
2858  *
2859  * We assume that the eager has not had any work done i.e., the conn_ixa
2860  * and conn_xmit_ipp are all zero.
2861  * Furthermore we assume that no other thread can access the eager (because
2862  * it isn't inserted in any fanout list).
2863  */
2864 int
2865 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2866 {
2867         cred_t  *credp;
2868         int     err;
2869         void    *notify_cookie;
2870         uint32_t xmit_hint;
2871 
2872         econnp->conn_family = lconnp->conn_family;
2873         econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2874         econnp->conn_wq = lconnp->conn_wq;
2875         econnp->conn_rq = lconnp->conn_rq;
2876 
2877         /*
2878          * Make a safe copy of the transmit attributes.
2879          * conn_connect will later be used by the caller to setup the ire etc.
2880          */
2881         ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2882         ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2883         ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2884         ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2885 
2886         /* Preserve ixa_notify_cookie and xmit_hint */
2887         notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2888         xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2889         ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2890         econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2891         econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2892 
2893         econnp->conn_bound_if = lconnp->conn_bound_if;
2894         econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2895 
2896         /* Inherit all RECV options */
2897         econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2898 
2899         err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2900             KM_NOSLEEP);
2901         if (err != 0)
2902                 return (err);
2903 
2904         econnp->conn_zoneid = lconnp->conn_zoneid;
2905         econnp->conn_allzones = lconnp->conn_allzones;
2906 
2907         /* This is odd. Pick a flowlabel for each connection instead? */
2908         econnp->conn_flowinfo = lconnp->conn_flowinfo;
2909 
2910         econnp->conn_default_ttl = lconnp->conn_default_ttl;
2911 
2912         /*
2913          * TSOL: tsol_input_proc() needs the eager's cred before the
2914          * eager is accepted
2915          */
2916         ASSERT(lconnp->conn_cred != NULL);
2917         econnp->conn_cred = credp = lconnp->conn_cred;
2918         crhold(credp);
2919         econnp->conn_cpid = lconnp->conn_cpid;
2920         econnp->conn_open_time = ddi_get_lbolt64();
2921 
2922         /*
2923          * Cache things in the ixa without any refhold.
2924          * Listener might not have set up ixa_cred
2925          */
2926         ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2927         econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2928         econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2929         if (is_system_labeled())
2930                 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2931 
2932         /*
2933          * If the caller has the process-wide flag set, then default to MAC
2934          * exempt mode.  This allows read-down to unlabeled hosts.
2935          */
2936         if (getpflags(NET_MAC_AWARE, credp) != 0)
2937                 econnp->conn_mac_mode = CONN_MAC_AWARE;
2938 
2939         econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2940 
2941         /*
2942          * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2943          * via soaccept()->soinheritoptions() which essentially applies
2944          * all the listener options to the new connection. The options that we
2945          * need to take care of are:
2946          * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2947          * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2948          * SO_SNDBUF, SO_RCVBUF.
2949          *
2950          * SO_RCVBUF:   conn_rcvbuf is set.
2951          * SO_SNDBUF:   conn_sndbuf is set.
2952          */
2953 
2954         /* Could we define a struct and use a struct copy for this? */
2955         econnp->conn_sndbuf = lconnp->conn_sndbuf;
2956         econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2957         econnp->conn_sndlowat = lconnp->conn_sndlowat;
2958         econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2959         econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2960         econnp->conn_oobinline = lconnp->conn_oobinline;
2961         econnp->conn_debug = lconnp->conn_debug;
2962         econnp->conn_keepalive = lconnp->conn_keepalive;
2963         econnp->conn_linger = lconnp->conn_linger;
2964         econnp->conn_lingertime = lconnp->conn_lingertime;
2965 
2966         /* Set the IP options */
2967         econnp->conn_broadcast = lconnp->conn_broadcast;
2968         econnp->conn_useloopback = lconnp->conn_useloopback;
2969         econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2970         return (0);
2971 }