1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /* Copyright (c) 1990 Mentat Inc. */
  26 
  27 #include <sys/types.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsun.h>
  30 #define _SUN_TPI_VERSION 2
  31 #include <sys/tihdr.h>
  32 #include <sys/xti_inet.h>
  33 #include <sys/ucred.h>
  34 #include <sys/zone.h>
  35 #include <sys/ddi.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/debug.h>
  39 #include <sys/atomic.h>
  40 #include <sys/policy.h>
  41 
  42 #include <sys/systm.h>
  43 #include <sys/param.h>
  44 #include <sys/kmem.h>
  45 #include <sys/sdt.h>
  46 #include <sys/socket.h>
  47 #include <sys/ethernet.h>
  48 #include <sys/mac.h>
  49 #include <net/if.h>
  50 #include <net/if_types.h>
  51 #include <net/if_arp.h>
  52 #include <net/route.h>
  53 #include <sys/sockio.h>
  54 #include <netinet/in.h>
  55 #include <net/if_dl.h>
  56 
  57 #include <inet/common.h>
  58 #include <inet/mi.h>
  59 #include <inet/mib2.h>
  60 #include <inet/nd.h>
  61 #include <inet/arp.h>
  62 #include <inet/snmpcom.h>
  63 #include <inet/kstatcom.h>
  64 
  65 #include <netinet/igmp_var.h>
  66 #include <netinet/ip6.h>
  67 #include <netinet/icmp6.h>
  68 #include <netinet/sctp.h>
  69 
  70 #include <inet/ip.h>
  71 #include <inet/ip_impl.h>
  72 #include <inet/ip6.h>
  73 #include <inet/ip6_asp.h>
  74 #include <inet/tcp.h>
  75 #include <inet/ip_multi.h>
  76 #include <inet/ip_if.h>
  77 #include <inet/ip_ire.h>
  78 #include <inet/ip_ftable.h>
  79 #include <inet/ip_rts.h>
  80 #include <inet/optcom.h>
  81 #include <inet/ip_ndp.h>
  82 #include <inet/ip_listutils.h>
  83 #include <netinet/igmp.h>
  84 #include <netinet/ip_mroute.h>
  85 #include <netinet/udp.h>
  86 #include <inet/ipp_common.h>
  87 
  88 #include <net/pfkeyv2.h>
  89 #include <inet/sadb.h>
  90 #include <inet/ipsec_impl.h>
  91 #include <inet/ipdrop.h>
  92 #include <inet/ip_netinfo.h>
  93 
  94 #include <inet/ipclassifier.h>
  95 #include <inet/sctp_ip.h>
  96 #include <inet/sctp/sctp_impl.h>
  97 #include <inet/udp_impl.h>
  98 #include <sys/sunddi.h>
  99 
 100 #include <sys/tsol/label.h>
 101 #include <sys/tsol/tnet.h>
 102 
 103 /*
 104  * Return how much size is needed for the different ancillary data items
 105  */
 106 uint_t
 107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
 108     ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
 109 {
 110         uint_t          ancil_size;
 111         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
 112 
 113         /*
 114          * If IP_RECVDSTADDR is set we include the destination IP
 115          * address as an option. With IP_RECVOPTS we include all
 116          * the IP options.
 117          */
 118         ancil_size = 0;
 119         if (recv_ancillary.crb_recvdstaddr &&
 120             (ira->ira_flags & IRAF_IS_IPV4)) {
 121                 ancil_size += sizeof (struct T_opthdr) +
 122                     sizeof (struct in_addr);
 123                 IP_STAT(ipst, conn_in_recvdstaddr);
 124         }
 125 
 126         /*
 127          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 128          * are different
 129          */
 130         if (recv_ancillary.crb_ip_recvpktinfo &&
 131             connp->conn_family == AF_INET) {
 132                 ancil_size += sizeof (struct T_opthdr) +
 133                     sizeof (struct in_pktinfo);
 134                 IP_STAT(ipst, conn_in_recvpktinfo);
 135         }
 136 
 137         if ((recv_ancillary.crb_recvopts) &&
 138             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 139                 ancil_size += sizeof (struct T_opthdr) +
 140                     ipp->ipp_ipv4_options_len;
 141                 IP_STAT(ipst, conn_in_recvopts);
 142         }
 143 
 144         if (recv_ancillary.crb_recvslla) {
 145                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 146                 ill_t *ill;
 147 
 148                 /* Make sure ira_l2src is setup if not already */
 149                 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
 150                         ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
 151                             ipst);
 152                         if (ill != NULL) {
 153                                 ip_setl2src(mp, ira, ill);
 154                                 ill_refrele(ill);
 155                         }
 156                 }
 157                 ancil_size += sizeof (struct T_opthdr) +
 158                     sizeof (struct sockaddr_dl);
 159                 IP_STAT(ipst, conn_in_recvslla);
 160         }
 161 
 162         if (recv_ancillary.crb_recvif) {
 163                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
 164                 IP_STAT(ipst, conn_in_recvif);
 165         }
 166 
 167         /*
 168          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 169          * are different
 170          */
 171         if (recv_ancillary.crb_ip_recvpktinfo &&
 172             connp->conn_family == AF_INET6) {
 173                 ancil_size += sizeof (struct T_opthdr) +
 174                     sizeof (struct in6_pktinfo);
 175                 IP_STAT(ipst, conn_in_recvpktinfo);
 176         }
 177 
 178         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 179                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 180                 IP_STAT(ipst, conn_in_recvhoplimit);
 181         }
 182 
 183         if (recv_ancillary.crb_ipv6_recvtclass) {
 184                 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
 185                 IP_STAT(ipst, conn_in_recvtclass);
 186         }
 187 
 188         if (recv_ancillary.crb_ipv6_recvhopopts &&
 189             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 190                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 191                 IP_STAT(ipst, conn_in_recvhopopts);
 192         }
 193         /*
 194          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 195          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 196          * options that appear before a routing header.
 197          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 198          */
 199         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 200                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 201                     (recv_ancillary.crb_ipv6_recvdstopts &&
 202                     recv_ancillary.crb_ipv6_recvrthdr)) {
 203                         ancil_size += sizeof (struct T_opthdr) +
 204                             ipp->ipp_rthdrdstoptslen;
 205                         IP_STAT(ipst, conn_in_recvrthdrdstopts);
 206                 }
 207         }
 208         if ((recv_ancillary.crb_ipv6_recvrthdr) &&
 209             (ipp->ipp_fields & IPPF_RTHDR)) {
 210                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 211                 IP_STAT(ipst, conn_in_recvrthdr);
 212         }
 213         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 214             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 215             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 216                 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 217                 IP_STAT(ipst, conn_in_recvdstopts);
 218         }
 219         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 220                 ancil_size += sizeof (struct T_opthdr) +
 221                     ucredminsize(ira->ira_cred);
 222                 IP_STAT(ipst, conn_in_recvucred);
 223         }
 224 
 225         /*
 226          * If SO_TIMESTAMP is set allocate the appropriate sized
 227          * buffer. Since gethrestime() expects a pointer aligned
 228          * argument, we allocate space necessary for extra
 229          * alignment (even though it might not be used).
 230          */
 231         if (recv_ancillary.crb_timestamp) {
 232                 ancil_size += sizeof (struct T_opthdr) +
 233                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 234                 IP_STAT(ipst, conn_in_timestamp);
 235         }
 236 
 237         /*
 238          * If IP_RECVTTL is set allocate the appropriate sized buffer
 239          */
 240         if (recv_ancillary.crb_recvttl &&
 241             (ira->ira_flags & IRAF_IS_IPV4)) {
 242                 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
 243                 IP_STAT(ipst, conn_in_recvttl);
 244         }
 245 
 246         return (ancil_size);
 247 }
 248 
 249 /*
 250  * Lay down the ancillary data items at "ancil_buf".
 251  * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
 252  * large buffer - ancil_size.
 253  */
 254 void
 255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
 256     ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
 257 {
 258         /*
 259          * Copy in destination address before options to avoid
 260          * any padding issues.
 261          */
 262         if (recv_ancillary.crb_recvdstaddr &&
 263             (ira->ira_flags & IRAF_IS_IPV4)) {
 264                 struct T_opthdr *toh;
 265                 ipaddr_t *dstptr;
 266 
 267                 toh = (struct T_opthdr *)ancil_buf;
 268                 toh->level = IPPROTO_IP;
 269                 toh->name = IP_RECVDSTADDR;
 270                 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
 271                 toh->status = 0;
 272                 ancil_buf += sizeof (struct T_opthdr);
 273                 dstptr = (ipaddr_t *)ancil_buf;
 274                 *dstptr = ipp->ipp_addr_v4;
 275                 ancil_buf += sizeof (ipaddr_t);
 276                 ancil_size -= toh->len;
 277         }
 278 
 279         /*
 280          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 281          * are different
 282          */
 283         if (recv_ancillary.crb_ip_recvpktinfo &&
 284             connp->conn_family == AF_INET) {
 285                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 286                 struct T_opthdr *toh;
 287                 struct in_pktinfo *pktinfop;
 288                 ill_t *ill;
 289                 ipif_t *ipif;
 290 
 291                 toh = (struct T_opthdr *)ancil_buf;
 292                 toh->level = IPPROTO_IP;
 293                 toh->name = IP_PKTINFO;
 294                 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
 295                 toh->status = 0;
 296                 ancil_buf += sizeof (struct T_opthdr);
 297                 pktinfop = (struct in_pktinfo *)ancil_buf;
 298 
 299                 pktinfop->ipi_ifindex = ira->ira_ruifindex;
 300                 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
 301 
 302                 /* Find a good address to report */
 303                 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
 304                 if (ill != NULL) {
 305                         ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
 306                         if (ipif != NULL) {
 307                                 pktinfop->ipi_spec_dst.s_addr =
 308                                     ipif->ipif_lcl_addr;
 309                                 ipif_refrele(ipif);
 310                         }
 311                         ill_refrele(ill);
 312                 }
 313                 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
 314                 ancil_buf += sizeof (struct in_pktinfo);
 315                 ancil_size -= toh->len;
 316         }
 317 
 318         if ((recv_ancillary.crb_recvopts) &&
 319             (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
 320                 struct T_opthdr *toh;
 321 
 322                 toh = (struct T_opthdr *)ancil_buf;
 323                 toh->level = IPPROTO_IP;
 324                 toh->name = IP_RECVOPTS;
 325                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
 326                 toh->status = 0;
 327                 ancil_buf += sizeof (struct T_opthdr);
 328                 bcopy(ipp->ipp_ipv4_options, ancil_buf,
 329                     ipp->ipp_ipv4_options_len);
 330                 ancil_buf += ipp->ipp_ipv4_options_len;
 331                 ancil_size -= toh->len;
 332         }
 333 
 334         if (recv_ancillary.crb_recvslla) {
 335                 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
 336                 struct T_opthdr *toh;
 337                 struct sockaddr_dl *dstptr;
 338                 ill_t *ill;
 339                 int alen = 0;
 340 
 341                 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
 342                 if (ill != NULL)
 343                         alen = ill->ill_phys_addr_length;
 344 
 345                 /*
 346                  * For loopback multicast and broadcast the packet arrives
 347                  * with ira_ruifdex being the physical interface, but
 348                  * ira_l2src is all zero since ip_postfrag_loopback doesn't
 349                  * know our l2src. We don't report the address in that case.
 350                  */
 351                 if (ira->ira_flags & IRAF_LOOPBACK)
 352                         alen = 0;
 353 
 354                 toh = (struct T_opthdr *)ancil_buf;
 355                 toh->level = IPPROTO_IP;
 356                 toh->name = IP_RECVSLLA;
 357                 toh->len = sizeof (struct T_opthdr) +
 358                     sizeof (struct sockaddr_dl);
 359                 toh->status = 0;
 360                 ancil_buf += sizeof (struct T_opthdr);
 361                 dstptr = (struct sockaddr_dl *)ancil_buf;
 362                 dstptr->sdl_family = AF_LINK;
 363                 dstptr->sdl_index = ira->ira_ruifindex;
 364                 if (ill != NULL)
 365                         dstptr->sdl_type = ill->ill_type;
 366                 else
 367                         dstptr->sdl_type = 0;
 368                 dstptr->sdl_nlen = 0;
 369                 dstptr->sdl_alen = alen;
 370                 dstptr->sdl_slen = 0;
 371                 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
 372                 ancil_buf += sizeof (struct sockaddr_dl);
 373                 ancil_size -= toh->len;
 374                 if (ill != NULL)
 375                         ill_refrele(ill);
 376         }
 377 
 378         if (recv_ancillary.crb_recvif) {
 379                 struct T_opthdr *toh;
 380                 uint_t          *dstptr;
 381 
 382                 toh = (struct T_opthdr *)ancil_buf;
 383                 toh->level = IPPROTO_IP;
 384                 toh->name = IP_RECVIF;
 385                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 386                 toh->status = 0;
 387                 ancil_buf += sizeof (struct T_opthdr);
 388                 dstptr = (uint_t *)ancil_buf;
 389                 *dstptr = ira->ira_ruifindex;
 390                 ancil_buf += sizeof (uint_t);
 391                 ancil_size -= toh->len;
 392         }
 393 
 394         /*
 395          * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
 396          * are different
 397          */
 398         if (recv_ancillary.crb_ip_recvpktinfo &&
 399             connp->conn_family == AF_INET6) {
 400                 struct T_opthdr *toh;
 401                 struct in6_pktinfo *pkti;
 402 
 403                 toh = (struct T_opthdr *)ancil_buf;
 404                 toh->level = IPPROTO_IPV6;
 405                 toh->name = IPV6_PKTINFO;
 406                 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
 407                 toh->status = 0;
 408                 ancil_buf += sizeof (struct T_opthdr);
 409                 pkti = (struct in6_pktinfo *)ancil_buf;
 410                 if (ira->ira_flags & IRAF_IS_IPV4) {
 411                         IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
 412                             &pkti->ipi6_addr);
 413                 } else {
 414                         pkti->ipi6_addr = ipp->ipp_addr;
 415                 }
 416                 pkti->ipi6_ifindex = ira->ira_ruifindex;
 417 
 418                 ancil_buf += sizeof (*pkti);
 419                 ancil_size -= toh->len;
 420         }
 421         if (recv_ancillary.crb_ipv6_recvhoplimit) {
 422                 struct T_opthdr *toh;
 423 
 424                 toh = (struct T_opthdr *)ancil_buf;
 425                 toh->level = IPPROTO_IPV6;
 426                 toh->name = IPV6_HOPLIMIT;
 427                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 428                 toh->status = 0;
 429                 ancil_buf += sizeof (struct T_opthdr);
 430                 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
 431                 ancil_buf += sizeof (uint_t);
 432                 ancil_size -= toh->len;
 433         }
 434         if (recv_ancillary.crb_ipv6_recvtclass) {
 435                 struct T_opthdr *toh;
 436 
 437                 toh = (struct T_opthdr *)ancil_buf;
 438                 toh->level = IPPROTO_IPV6;
 439                 toh->name = IPV6_TCLASS;
 440                 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
 441                 toh->status = 0;
 442                 ancil_buf += sizeof (struct T_opthdr);
 443 
 444                 if (ira->ira_flags & IRAF_IS_IPV4)
 445                         *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
 446                 else
 447                         *(uint_t *)ancil_buf = ipp->ipp_tclass;
 448                 ancil_buf += sizeof (uint_t);
 449                 ancil_size -= toh->len;
 450         }
 451         if (recv_ancillary.crb_ipv6_recvhopopts &&
 452             (ipp->ipp_fields & IPPF_HOPOPTS)) {
 453                 struct T_opthdr *toh;
 454 
 455                 toh = (struct T_opthdr *)ancil_buf;
 456                 toh->level = IPPROTO_IPV6;
 457                 toh->name = IPV6_HOPOPTS;
 458                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
 459                 toh->status = 0;
 460                 ancil_buf += sizeof (struct T_opthdr);
 461                 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
 462                 ancil_buf += ipp->ipp_hopoptslen;
 463                 ancil_size -= toh->len;
 464         }
 465         /*
 466          * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
 467          * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
 468          * options that appear before a routing header.
 469          * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
 470          */
 471         if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
 472                 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
 473                     (recv_ancillary.crb_ipv6_recvdstopts &&
 474                     recv_ancillary.crb_ipv6_recvrthdr)) {
 475                         struct T_opthdr *toh;
 476 
 477                         toh = (struct T_opthdr *)ancil_buf;
 478                         toh->level = IPPROTO_IPV6;
 479                         toh->name = IPV6_DSTOPTS;
 480                         toh->len = sizeof (struct T_opthdr) +
 481                             ipp->ipp_rthdrdstoptslen;
 482                         toh->status = 0;
 483                         ancil_buf += sizeof (struct T_opthdr);
 484                         bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
 485                             ipp->ipp_rthdrdstoptslen);
 486                         ancil_buf += ipp->ipp_rthdrdstoptslen;
 487                         ancil_size -= toh->len;
 488                 }
 489         }
 490         if (recv_ancillary.crb_ipv6_recvrthdr &&
 491             (ipp->ipp_fields & IPPF_RTHDR)) {
 492                 struct T_opthdr *toh;
 493 
 494                 toh = (struct T_opthdr *)ancil_buf;
 495                 toh->level = IPPROTO_IPV6;
 496                 toh->name = IPV6_RTHDR;
 497                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
 498                 toh->status = 0;
 499                 ancil_buf += sizeof (struct T_opthdr);
 500                 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
 501                 ancil_buf += ipp->ipp_rthdrlen;
 502                 ancil_size -= toh->len;
 503         }
 504         if ((recv_ancillary.crb_ipv6_recvdstopts ||
 505             recv_ancillary.crb_old_ipv6_recvdstopts) &&
 506             (ipp->ipp_fields & IPPF_DSTOPTS)) {
 507                 struct T_opthdr *toh;
 508 
 509                 toh = (struct T_opthdr *)ancil_buf;
 510                 toh->level = IPPROTO_IPV6;
 511                 toh->name = IPV6_DSTOPTS;
 512                 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
 513                 toh->status = 0;
 514                 ancil_buf += sizeof (struct T_opthdr);
 515                 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
 516                 ancil_buf += ipp->ipp_dstoptslen;
 517                 ancil_size -= toh->len;
 518         }
 519 
 520         if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
 521                 struct T_opthdr *toh;
 522                 cred_t          *rcr = connp->conn_cred;
 523 
 524                 toh = (struct T_opthdr *)ancil_buf;
 525                 toh->level = SOL_SOCKET;
 526                 toh->name = SCM_UCRED;
 527                 toh->len = sizeof (struct T_opthdr) +
 528                     ucredminsize(ira->ira_cred);
 529                 toh->status = 0;
 530                 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
 531                 ancil_buf += toh->len;
 532                 ancil_size -= toh->len;
 533         }
 534         if (recv_ancillary.crb_timestamp) {
 535                 struct  T_opthdr *toh;
 536 
 537                 toh = (struct T_opthdr *)ancil_buf;
 538                 toh->level = SOL_SOCKET;
 539                 toh->name = SCM_TIMESTAMP;
 540                 toh->len = sizeof (struct T_opthdr) +
 541                     sizeof (timestruc_t) + _POINTER_ALIGNMENT;
 542                 toh->status = 0;
 543                 ancil_buf += sizeof (struct T_opthdr);
 544                 /* Align for gethrestime() */
 545                 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
 546                     sizeof (intptr_t));
 547                 gethrestime((timestruc_t *)ancil_buf);
 548                 ancil_buf = (uchar_t *)toh + toh->len;
 549                 ancil_size -= toh->len;
 550         }
 551 
 552         /*
 553          * CAUTION:
 554          * Due to aligment issues
 555          * Processing of IP_RECVTTL option
 556          * should always be the last. Adding
 557          * any option processing after this will
 558          * cause alignment panic.
 559          */
 560         if (recv_ancillary.crb_recvttl &&
 561             (ira->ira_flags & IRAF_IS_IPV4)) {
 562                 struct  T_opthdr *toh;
 563                 uint8_t *dstptr;
 564 
 565                 toh = (struct T_opthdr *)ancil_buf;
 566                 toh->level = IPPROTO_IP;
 567                 toh->name = IP_RECVTTL;
 568                 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
 569                 toh->status = 0;
 570                 ancil_buf += sizeof (struct T_opthdr);
 571                 dstptr = (uint8_t *)ancil_buf;
 572                 *dstptr = ipp->ipp_hoplimit;
 573                 ancil_buf += sizeof (uint8_t);
 574                 ancil_size -= toh->len;
 575         }
 576 
 577         /* Consumed all of allocated space */
 578         ASSERT(ancil_size == 0);
 579 
 580 }
 581 
 582 /*
 583  * This routine retrieves the current status of socket options.
 584  * It returns the size of the option retrieved, or -1.
 585  */
 586 int
 587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
 588     uchar_t *ptr)
 589 {
 590         int             *i1 = (int *)ptr;
 591         conn_t          *connp = coa->coa_connp;
 592         ip_xmit_attr_t  *ixa = coa->coa_ixa;
 593         ip_pkt_t        *ipp = coa->coa_ipp;
 594         ip_stack_t      *ipst = ixa->ixa_ipst;
 595         uint_t          len;
 596 
 597         ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
 598 
 599         switch (level) {
 600         case SOL_SOCKET:
 601                 switch (name) {
 602                 case SO_DEBUG:
 603                         *i1 = connp->conn_debug ? SO_DEBUG : 0;
 604                         break;  /* goto sizeof (int) option return */
 605                 case SO_KEEPALIVE:
 606                         *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
 607                         break;
 608                 case SO_LINGER: {
 609                         struct linger *lgr = (struct linger *)ptr;
 610 
 611                         lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
 612                         lgr->l_linger = connp->conn_lingertime;
 613                         }
 614                         return (sizeof (struct linger));
 615 
 616                 case SO_OOBINLINE:
 617                         *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
 618                         break;
 619                 case SO_REUSEADDR:
 620                         *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
 621                         break;  /* goto sizeof (int) option return */
 622                 case SO_TYPE:
 623                         *i1 = connp->conn_so_type;
 624                         break;  /* goto sizeof (int) option return */
 625                 case SO_DONTROUTE:
 626                         *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
 627                             SO_DONTROUTE : 0;
 628                         break;  /* goto sizeof (int) option return */
 629                 case SO_USELOOPBACK:
 630                         *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
 631                         break;  /* goto sizeof (int) option return */
 632                 case SO_BROADCAST:
 633                         *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
 634                         break;  /* goto sizeof (int) option return */
 635 
 636                 case SO_SNDBUF:
 637                         *i1 = connp->conn_sndbuf;
 638                         break;  /* goto sizeof (int) option return */
 639                 case SO_RCVBUF:
 640                         *i1 = connp->conn_rcvbuf;
 641                         break;  /* goto sizeof (int) option return */
 642                 case SO_RCVTIMEO:
 643                 case SO_SNDTIMEO:
 644                         /*
 645                          * Pass these two options in order for third part
 646                          * protocol usage. Here just return directly.
 647                          */
 648                         *i1 = 0;
 649                         break;
 650                 case SO_DGRAM_ERRIND:
 651                         *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
 652                         break;  /* goto sizeof (int) option return */
 653                 case SO_RECVUCRED:
 654                         *i1 = connp->conn_recv_ancillary.crb_recvucred;
 655                         break;  /* goto sizeof (int) option return */
 656                 case SO_TIMESTAMP:
 657                         *i1 = connp->conn_recv_ancillary.crb_timestamp;
 658                         break;  /* goto sizeof (int) option return */
 659                 case SO_VRRP:
 660                         *i1 = connp->conn_isvrrp;
 661                         break;  /* goto sizeof (int) option return */
 662                 case SO_ANON_MLP:
 663                         *i1 = connp->conn_anon_mlp;
 664                         break;  /* goto sizeof (int) option return */
 665                 case SO_MAC_EXEMPT:
 666                         *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
 667                         break;  /* goto sizeof (int) option return */
 668                 case SO_MAC_IMPLICIT:
 669                         *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
 670                         break;  /* goto sizeof (int) option return */
 671                 case SO_ALLZONES:
 672                         *i1 = connp->conn_allzones;
 673                         break;  /* goto sizeof (int) option return */
 674                 case SO_EXCLBIND:
 675                         *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
 676                         break;
 677                 case SO_PROTOTYPE:
 678                         *i1 = connp->conn_proto;
 679                         break;
 680 
 681                 case SO_DOMAIN:
 682                         *i1 = connp->conn_family;
 683                         break;
 684                 default:
 685                         return (-1);
 686                 }
 687                 break;
 688         case IPPROTO_IP:
 689                 if (connp->conn_family != AF_INET)
 690                         return (-1);
 691                 switch (name) {
 692                 case IP_OPTIONS:
 693                 case T_IP_OPTIONS:
 694                         if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
 695                                 return (0);
 696 
 697                         len = ipp->ipp_ipv4_options_len;
 698                         if (len > 0) {
 699                                 bcopy(ipp->ipp_ipv4_options, ptr, len);
 700                         }
 701                         return (len);
 702 
 703                 case IP_PKTINFO: {
 704                         /*
 705                          * This also handles IP_RECVPKTINFO.
 706                          * IP_PKTINFO and IP_RECVPKTINFO have same value.
 707                          * Differentiation is based on the size of the
 708                          * argument passed in.
 709                          */
 710                         struct in_pktinfo *pktinfo;
 711 
 712 #ifdef notdef
 713                         /* optcom doesn't provide a length with "get" */
 714                         if (inlen == sizeof (int)) {
 715                                 /* This is IP_RECVPKTINFO option. */
 716                                 *i1 = connp->conn_recv_ancillary.
 717                                     crb_ip_recvpktinfo;
 718                                 return (sizeof (int));
 719                         }
 720 #endif
 721                         /* XXX assumes that caller has room for max size! */
 722 
 723                         pktinfo = (struct in_pktinfo *)ptr;
 724                         pktinfo->ipi_ifindex = ixa->ixa_ifindex;
 725                         if (ipp->ipp_fields & IPPF_ADDR)
 726                                 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
 727                         else
 728                                 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
 729                         return (sizeof (struct in_pktinfo));
 730                 }
 731                 case IP_DONTFRAG:
 732                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 733                         return (sizeof (int));
 734                 case IP_TOS:
 735                 case T_IP_TOS:
 736                         *i1 = (int)ipp->ipp_type_of_service;
 737                         break;  /* goto sizeof (int) option return */
 738                 case IP_TTL:
 739                         *i1 = (int)ipp->ipp_unicast_hops;
 740                         break;  /* goto sizeof (int) option return */
 741                 case IP_DHCPINIT_IF:
 742                         return (-1);
 743                 case IP_NEXTHOP:
 744                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
 745                                 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
 746                                 return (sizeof (ipaddr_t));
 747                         } else {
 748                                 return (0);
 749                         }
 750 
 751                 case IP_MULTICAST_IF:
 752                         /* 0 address if not set */
 753                         *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
 754                         return (sizeof (ipaddr_t));
 755                 case IP_MULTICAST_TTL:
 756                         *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
 757                         return (sizeof (uchar_t));
 758                 case IP_MULTICAST_LOOP:
 759                         *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 760                         return (sizeof (uint8_t));
 761                 case IP_RECVOPTS:
 762                         *i1 = connp->conn_recv_ancillary.crb_recvopts;
 763                         break;  /* goto sizeof (int) option return */
 764                 case IP_RECVDSTADDR:
 765                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 766                         break;  /* goto sizeof (int) option return */
 767                 case IP_RECVIF:
 768                         *i1 = connp->conn_recv_ancillary.crb_recvif;
 769                         break;  /* goto sizeof (int) option return */
 770                 case IP_RECVSLLA:
 771                         *i1 = connp->conn_recv_ancillary.crb_recvslla;
 772                         break;  /* goto sizeof (int) option return */
 773                 case IP_RECVTTL:
 774                         *i1 = connp->conn_recv_ancillary.crb_recvttl;
 775                         break;  /* goto sizeof (int) option return */
 776                 case IP_ADD_MEMBERSHIP:
 777                 case IP_DROP_MEMBERSHIP:
 778                 case MCAST_JOIN_GROUP:
 779                 case MCAST_LEAVE_GROUP:
 780                 case IP_BLOCK_SOURCE:
 781                 case IP_UNBLOCK_SOURCE:
 782                 case IP_ADD_SOURCE_MEMBERSHIP:
 783                 case IP_DROP_SOURCE_MEMBERSHIP:
 784                 case MCAST_BLOCK_SOURCE:
 785                 case MCAST_UNBLOCK_SOURCE:
 786                 case MCAST_JOIN_SOURCE_GROUP:
 787                 case MCAST_LEAVE_SOURCE_GROUP:
 788                 case MRT_INIT:
 789                 case MRT_DONE:
 790                 case MRT_ADD_VIF:
 791                 case MRT_DEL_VIF:
 792                 case MRT_ADD_MFC:
 793                 case MRT_DEL_MFC:
 794                         /* cannot "get" the value for these */
 795                         return (-1);
 796                 case MRT_VERSION:
 797                 case MRT_ASSERT:
 798                         (void) ip_mrouter_get(name, connp, ptr);
 799                         return (sizeof (int));
 800                 case IP_SEC_OPT:
 801                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 802                             IPSEC_AF_V4));
 803                 case IP_BOUND_IF:
 804                         /* Zero if not set */
 805                         *i1 = connp->conn_bound_if;
 806                         break;  /* goto sizeof (int) option return */
 807                 case IP_UNSPEC_SRC:
 808                         *i1 = connp->conn_unspec_src;
 809                         break;  /* goto sizeof (int) option return */
 810                 case IP_BROADCAST_TTL:
 811                         if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
 812                                 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
 813                         else
 814                                 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
 815                         return (sizeof (uchar_t));
 816                 default:
 817                         return (-1);
 818                 }
 819                 break;
 820         case IPPROTO_IPV6:
 821                 if (connp->conn_family != AF_INET6)
 822                         return (-1);
 823                 switch (name) {
 824                 case IPV6_UNICAST_HOPS:
 825                         *i1 = (int)ipp->ipp_unicast_hops;
 826                         break;  /* goto sizeof (int) option return */
 827                 case IPV6_MULTICAST_IF:
 828                         /* 0 index if not set */
 829                         *i1 = ixa->ixa_multicast_ifindex;
 830                         break;  /* goto sizeof (int) option return */
 831                 case IPV6_MULTICAST_HOPS:
 832                         *i1 = ixa->ixa_multicast_ttl;
 833                         break;  /* goto sizeof (int) option return */
 834                 case IPV6_MULTICAST_LOOP:
 835                         *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
 836                         break;  /* goto sizeof (int) option return */
 837                 case IPV6_JOIN_GROUP:
 838                 case IPV6_LEAVE_GROUP:
 839                 case MCAST_JOIN_GROUP:
 840                 case MCAST_LEAVE_GROUP:
 841                 case MCAST_BLOCK_SOURCE:
 842                 case MCAST_UNBLOCK_SOURCE:
 843                 case MCAST_JOIN_SOURCE_GROUP:
 844                 case MCAST_LEAVE_SOURCE_GROUP:
 845                         /* cannot "get" the value for these */
 846                         return (-1);
 847                 case IPV6_BOUND_IF:
 848                         /* Zero if not set */
 849                         *i1 = connp->conn_bound_if;
 850                         break;  /* goto sizeof (int) option return */
 851                 case IPV6_UNSPEC_SRC:
 852                         *i1 = connp->conn_unspec_src;
 853                         break;  /* goto sizeof (int) option return */
 854                 case IPV6_RECVPKTINFO:
 855                         *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
 856                         break;  /* goto sizeof (int) option return */
 857                 case IPV6_RECVTCLASS:
 858                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
 859                         break;  /* goto sizeof (int) option return */
 860                 case IPV6_RECVPATHMTU:
 861                         *i1 = connp->conn_ipv6_recvpathmtu;
 862                         break;  /* goto sizeof (int) option return */
 863                 case IPV6_RECVHOPLIMIT:
 864                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
 865                         break;  /* goto sizeof (int) option return */
 866                 case IPV6_RECVHOPOPTS:
 867                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
 868                         break;  /* goto sizeof (int) option return */
 869                 case IPV6_RECVDSTOPTS:
 870                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
 871                         break;  /* goto sizeof (int) option return */
 872                 case _OLD_IPV6_RECVDSTOPTS:
 873                         *i1 =
 874                             connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
 875                         break;  /* goto sizeof (int) option return */
 876                 case IPV6_RECVRTHDRDSTOPTS:
 877                         *i1 = connp->conn_recv_ancillary.
 878                             crb_ipv6_recvrthdrdstopts;
 879                         break;  /* goto sizeof (int) option return */
 880                 case IPV6_RECVRTHDR:
 881                         *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
 882                         break;  /* goto sizeof (int) option return */
 883                 case IPV6_PKTINFO: {
 884                         /* XXX assumes that caller has room for max size! */
 885                         struct in6_pktinfo *pkti;
 886 
 887                         pkti = (struct in6_pktinfo *)ptr;
 888                         pkti->ipi6_ifindex = ixa->ixa_ifindex;
 889                         if (ipp->ipp_fields & IPPF_ADDR)
 890                                 pkti->ipi6_addr = ipp->ipp_addr;
 891                         else
 892                                 pkti->ipi6_addr = ipv6_all_zeros;
 893                         return (sizeof (struct in6_pktinfo));
 894                 }
 895                 case IPV6_TCLASS:
 896                         *i1 = ipp->ipp_tclass;
 897                         break;  /* goto sizeof (int) option return */
 898                 case IPV6_NEXTHOP: {
 899                         sin6_t *sin6 = (sin6_t *)ptr;
 900 
 901                         if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
 902                                 return (0);
 903 
 904                         *sin6 = sin6_null;
 905                         sin6->sin6_family = AF_INET6;
 906                         sin6->sin6_addr = ixa->ixa_nexthop_v6;
 907 
 908                         return (sizeof (sin6_t));
 909                 }
 910                 case IPV6_HOPOPTS:
 911                         if (!(ipp->ipp_fields & IPPF_HOPOPTS))
 912                                 return (0);
 913                         bcopy(ipp->ipp_hopopts, ptr,
 914                             ipp->ipp_hopoptslen);
 915                         return (ipp->ipp_hopoptslen);
 916                 case IPV6_RTHDRDSTOPTS:
 917                         if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
 918                                 return (0);
 919                         bcopy(ipp->ipp_rthdrdstopts, ptr,
 920                             ipp->ipp_rthdrdstoptslen);
 921                         return (ipp->ipp_rthdrdstoptslen);
 922                 case IPV6_RTHDR:
 923                         if (!(ipp->ipp_fields & IPPF_RTHDR))
 924                                 return (0);
 925                         bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
 926                         return (ipp->ipp_rthdrlen);
 927                 case IPV6_DSTOPTS:
 928                         if (!(ipp->ipp_fields & IPPF_DSTOPTS))
 929                                 return (0);
 930                         bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
 931                         return (ipp->ipp_dstoptslen);
 932                 case IPV6_PATHMTU:
 933                         return (ip_fill_mtuinfo(connp, ixa,
 934                             (struct ip6_mtuinfo *)ptr));
 935                 case IPV6_SEC_OPT:
 936                         return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
 937                             IPSEC_AF_V6));
 938                 case IPV6_SRC_PREFERENCES:
 939                         return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
 940                 case IPV6_DONTFRAG:
 941                         *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
 942                         return (sizeof (int));
 943                 case IPV6_USE_MIN_MTU:
 944                         if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
 945                                 *i1 = ixa->ixa_use_min_mtu;
 946                         else
 947                                 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
 948                         break;
 949                 case IPV6_V6ONLY:
 950                         *i1 = connp->conn_ipv6_v6only;
 951                         return (sizeof (int));
 952                 default:
 953                         return (-1);
 954                 }
 955                 break;
 956         case IPPROTO_UDP:
 957                 switch (name) {
 958                 case UDP_ANONPRIVBIND:
 959                         *i1 = connp->conn_anon_priv_bind;
 960                         break;
 961                 case UDP_EXCLBIND:
 962                         *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
 963                         break;
 964                 default:
 965                         return (-1);
 966                 }
 967                 break;
 968         case IPPROTO_TCP:
 969                 switch (name) {
 970                 case TCP_RECVDSTADDR:
 971                         *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
 972                         break;
 973                 case TCP_ANONPRIVBIND:
 974                         *i1 = connp->conn_anon_priv_bind;
 975                         break;
 976                 case TCP_EXCLBIND:
 977                         *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
 978                         break;
 979                 default:
 980                         return (-1);
 981                 }
 982                 break;
 983         default:
 984                 return (-1);
 985         }
 986         return (sizeof (int));
 987 }
 988 
 989 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
 990     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 991 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
 992     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 993 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
 994     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 995 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
 996     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 997 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
 998     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
 999 
1000 /*
1001  * This routine sets the most common socket options including some
1002  * that are transport/ULP specific.
1003  * It returns errno or zero.
1004  *
1005  * For fixed length options, there is no sanity check
1006  * of passed in length is done. It is assumed *_optcom_req()
1007  * routines do the right thing.
1008  */
1009 int
1010 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011     uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1012 {
1013         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1014 
1015         /* We have different functions for different levels */
1016         switch (level) {
1017         case SOL_SOCKET:
1018                 return (conn_opt_set_socket(coa, name, inlen, invalp,
1019                     checkonly, cr));
1020         case IPPROTO_IP:
1021                 return (conn_opt_set_ip(coa, name, inlen, invalp,
1022                     checkonly, cr));
1023         case IPPROTO_IPV6:
1024                 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025                     checkonly, cr));
1026         case IPPROTO_UDP:
1027                 return (conn_opt_set_udp(coa, name, inlen, invalp,
1028                     checkonly, cr));
1029         case IPPROTO_TCP:
1030                 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031                     checkonly, cr));
1032         default:
1033                 return (0);
1034         }
1035 }
1036 
1037 /*
1038  * Handle SOL_SOCKET
1039  * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040  * it implement their own checks and setting of conn_proto.
1041  */
1042 /* ARGSUSED1 */
1043 static int
1044 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1046 {
1047         conn_t          *connp = coa->coa_connp;
1048         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1049         int             *i1 = (int *)invalp;
1050         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1051 
1052         switch (name) {
1053         case SO_ALLZONES:
1054                 if (IPCL_IS_BOUND(connp))
1055                         return (EINVAL);
1056                 break;
1057         case SO_VRRP:
1058                 if (secpolicy_ip_config(cr, checkonly) != 0)
1059                         return (EACCES);
1060                 break;
1061         case SO_MAC_EXEMPT:
1062                 if (secpolicy_net_mac_aware(cr) != 0)
1063                         return (EACCES);
1064                 if (IPCL_IS_BOUND(connp))
1065                         return (EINVAL);
1066                 break;
1067         case SO_MAC_IMPLICIT:
1068                 if (secpolicy_net_mac_implicit(cr) != 0)
1069                         return (EACCES);
1070                 break;
1071         }
1072         if (checkonly)
1073                 return (0);
1074 
1075         mutex_enter(&connp->conn_lock);
1076         /* Here we set the actual option value */
1077         switch (name) {
1078         case SO_DEBUG:
1079                 connp->conn_debug = onoff;
1080                 break;
1081         case SO_KEEPALIVE:
1082                 connp->conn_keepalive = onoff;
1083                 break;
1084         case SO_LINGER: {
1085                 struct linger *lgr = (struct linger *)invalp;
1086 
1087                 if (lgr->l_onoff) {
1088                         connp->conn_linger = 1;
1089                         connp->conn_lingertime = lgr->l_linger;
1090                 } else {
1091                         connp->conn_linger = 0;
1092                         connp->conn_lingertime = 0;
1093                 }
1094                 break;
1095         }
1096         case SO_OOBINLINE:
1097                 connp->conn_oobinline = onoff;
1098                 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099                 break;
1100         case SO_REUSEADDR:
1101                 connp->conn_reuseaddr = onoff;
1102                 break;
1103         case SO_DONTROUTE:
1104                 if (onoff)
1105                         ixa->ixa_flags |= IXAF_DONTROUTE;
1106                 else
1107                         ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108                 coa->coa_changed |= COA_ROUTE_CHANGED;
1109                 break;
1110         case SO_USELOOPBACK:
1111                 connp->conn_useloopback = onoff;
1112                 break;
1113         case SO_BROADCAST:
1114                 connp->conn_broadcast = onoff;
1115                 break;
1116         case SO_SNDBUF:
1117                 /* ULP has range checked the value */
1118                 connp->conn_sndbuf = *i1;
1119                 coa->coa_changed |= COA_SNDBUF_CHANGED;
1120                 break;
1121         case SO_RCVBUF:
1122                 /* ULP has range checked the value */
1123                 connp->conn_rcvbuf = *i1;
1124                 coa->coa_changed |= COA_RCVBUF_CHANGED;
1125                 break;
1126         case SO_RCVTIMEO:
1127         case SO_SNDTIMEO:
1128                 /*
1129                  * Pass these two options in order for third part
1130                  * protocol usage.
1131                  */
1132                 break;
1133         case SO_DGRAM_ERRIND:
1134                 connp->conn_dgram_errind = onoff;
1135                 break;
1136         case SO_RECVUCRED:
1137                 connp->conn_recv_ancillary.crb_recvucred = onoff;
1138                 break;
1139         case SO_ALLZONES:
1140                 connp->conn_allzones = onoff;
1141                 coa->coa_changed |= COA_ROUTE_CHANGED;
1142                 if (onoff)
1143                         ixa->ixa_zoneid = ALL_ZONES;
1144                 else
1145                         ixa->ixa_zoneid = connp->conn_zoneid;
1146                 break;
1147         case SO_TIMESTAMP:
1148                 connp->conn_recv_ancillary.crb_timestamp = onoff;
1149                 break;
1150         case SO_VRRP:
1151                 connp->conn_isvrrp = onoff;
1152                 break;
1153         case SO_ANON_MLP:
1154                 connp->conn_anon_mlp = onoff;
1155                 break;
1156         case SO_MAC_EXEMPT:
1157                 connp->conn_mac_mode = onoff ?
1158                     CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159                 break;
1160         case SO_MAC_IMPLICIT:
1161                 connp->conn_mac_mode = onoff ?
1162                     CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163                 break;
1164         case SO_EXCLBIND:
1165                 connp->conn_exclbind = onoff;
1166                 break;
1167         }
1168         mutex_exit(&connp->conn_lock);
1169         return (0);
1170 }
1171 
1172 /* Handle IPPROTO_IP */
1173 static int
1174 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1176 {
1177         conn_t          *connp = coa->coa_connp;
1178         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1179         ip_pkt_t        *ipp = coa->coa_ipp;
1180         int             *i1 = (int *)invalp;
1181         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1182         ipaddr_t        addr = (ipaddr_t)*i1;
1183         uint_t          ifindex;
1184         zoneid_t        zoneid = IPCL_ZONEID(connp);
1185         ipif_t          *ipif;
1186         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1187         int             error;
1188 
1189         if (connp->conn_family != AF_INET)
1190                 return (EINVAL);
1191 
1192         ifindex = UINT_MAX;
1193         switch (name) {
1194         case IP_TTL:
1195                 /* Don't allow zero */
1196                 if (*i1 < 1 || *i1 > 255)
1197                         return (EINVAL);
1198                 break;
1199         case IP_MULTICAST_IF:
1200                 if (addr == INADDR_ANY) {
1201                         /* Clear */
1202                         ifindex = 0;
1203                         break;
1204                 }
1205                 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1206                 if (ipif == NULL)
1207                         return (EHOSTUNREACH);
1208                 /* not supported by the virtual network iface */
1209                 if (IS_VNI(ipif->ipif_ill)) {
1210                         ipif_refrele(ipif);
1211                         return (EINVAL);
1212                 }
1213                 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1214                 ipif_refrele(ipif);
1215                 break;
1216         case IP_NEXTHOP: {
1217                 ire_t   *ire;
1218 
1219                 if (addr == INADDR_ANY) {
1220                         /* Clear */
1221                         break;
1222                 }
1223                 /* Verify that the next-hop is on-link */
1224                 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1225                     NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1226                 if (ire == NULL)
1227                         return (EHOSTUNREACH);
1228                 ire_refrele(ire);
1229                 break;
1230         }
1231         case IP_OPTIONS:
1232         case T_IP_OPTIONS: {
1233                 uint_t newlen;
1234 
1235                 if (ipp->ipp_fields & IPPF_LABEL_V4)
1236                         newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1237                 else
1238                         newlen = inlen;
1239                 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1240                         return (EINVAL);
1241                 }
1242                 break;
1243         }
1244         case IP_PKTINFO: {
1245                 struct in_pktinfo *pktinfo;
1246 
1247                 /* Two different valid lengths */
1248                 if (inlen != sizeof (int) &&
1249                     inlen != sizeof (struct in_pktinfo))
1250                         return (EINVAL);
1251                 if (inlen == sizeof (int))
1252                         break;
1253 
1254                 pktinfo = (struct in_pktinfo *)invalp;
1255                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1256                         switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1257                             zoneid, ipst, B_FALSE)) {
1258                         case IPVL_UNICAST_UP:
1259                         case IPVL_UNICAST_DOWN:
1260                                 break;
1261                         default:
1262                                 return (EADDRNOTAVAIL);
1263                         }
1264                 }
1265                 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1266                     B_FALSE, ipst))
1267                         return (ENXIO);
1268                 break;
1269         }
1270         case IP_BOUND_IF:
1271                 ifindex = *(uint_t *)i1;
1272 
1273                 /* Just check it is ok. */
1274                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1275                         return (ENXIO);
1276                 break;
1277         }
1278         if (checkonly)
1279                 return (0);
1280 
1281         /* Here we set the actual option value */
1282         /*
1283          * conn_lock protects the bitfields, and is used to
1284          * set the fields atomically. Not needed for ixa settings since
1285          * the caller has an exclusive copy of the ixa.
1286          * We can not hold conn_lock across the multicast options though.
1287          */
1288         switch (name) {
1289         case IP_OPTIONS:
1290         case T_IP_OPTIONS:
1291                 /* Save options for use by IP. */
1292                 mutex_enter(&connp->conn_lock);
1293                 error = optcom_pkt_set(invalp, inlen,
1294                     (uchar_t **)&ipp->ipp_ipv4_options,
1295                     &ipp->ipp_ipv4_options_len);
1296                 if (error != 0) {
1297                         mutex_exit(&connp->conn_lock);
1298                         return (error);
1299                 }
1300                 if (ipp->ipp_ipv4_options_len == 0) {
1301                         ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1302                 } else {
1303                         ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1304                 }
1305                 mutex_exit(&connp->conn_lock);
1306                 coa->coa_changed |= COA_HEADER_CHANGED;
1307                 coa->coa_changed |= COA_WROFF_CHANGED;
1308                 break;
1309 
1310         case IP_TTL:
1311                 mutex_enter(&connp->conn_lock);
1312                 ipp->ipp_unicast_hops = *i1;
1313                 mutex_exit(&connp->conn_lock);
1314                 coa->coa_changed |= COA_HEADER_CHANGED;
1315                 break;
1316         case IP_TOS:
1317         case T_IP_TOS:
1318                 mutex_enter(&connp->conn_lock);
1319                 if (*i1 == -1) {
1320                         ipp->ipp_type_of_service = 0;
1321                 } else {
1322                         ipp->ipp_type_of_service = *i1;
1323                 }
1324                 mutex_exit(&connp->conn_lock);
1325                 coa->coa_changed |= COA_HEADER_CHANGED;
1326                 break;
1327         case IP_MULTICAST_IF:
1328                 ixa->ixa_multicast_ifindex = ifindex;
1329                 ixa->ixa_multicast_ifaddr = addr;
1330                 coa->coa_changed |= COA_ROUTE_CHANGED;
1331                 break;
1332         case IP_MULTICAST_TTL:
1333                 ixa->ixa_multicast_ttl = *invalp;
1334                 /* Handled automatically by ip_output */
1335                 break;
1336         case IP_MULTICAST_LOOP:
1337                 if (*invalp != 0)
1338                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1339                 else
1340                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1341                 /* Handled automatically by ip_output */
1342                 break;
1343         case IP_RECVOPTS:
1344                 mutex_enter(&connp->conn_lock);
1345                 connp->conn_recv_ancillary.crb_recvopts = onoff;
1346                 mutex_exit(&connp->conn_lock);
1347                 break;
1348         case IP_RECVDSTADDR:
1349                 mutex_enter(&connp->conn_lock);
1350                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1351                 mutex_exit(&connp->conn_lock);
1352                 break;
1353         case IP_RECVIF:
1354                 mutex_enter(&connp->conn_lock);
1355                 connp->conn_recv_ancillary.crb_recvif = onoff;
1356                 mutex_exit(&connp->conn_lock);
1357                 break;
1358         case IP_RECVSLLA:
1359                 mutex_enter(&connp->conn_lock);
1360                 connp->conn_recv_ancillary.crb_recvslla = onoff;
1361                 mutex_exit(&connp->conn_lock);
1362                 break;
1363         case IP_RECVTTL:
1364                 mutex_enter(&connp->conn_lock);
1365                 connp->conn_recv_ancillary.crb_recvttl = onoff;
1366                 mutex_exit(&connp->conn_lock);
1367                 break;
1368         case IP_PKTINFO: {
1369                 /*
1370                  * This also handles IP_RECVPKTINFO.
1371                  * IP_PKTINFO and IP_RECVPKTINFO have same value.
1372                  * Differentiation is based on the size of the
1373                  * argument passed in.
1374                  */
1375                 struct in_pktinfo *pktinfo;
1376 
1377                 if (inlen == sizeof (int)) {
1378                         /* This is IP_RECVPKTINFO option. */
1379                         mutex_enter(&connp->conn_lock);
1380                         connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1381                             onoff;
1382                         mutex_exit(&connp->conn_lock);
1383                         break;
1384                 }
1385 
1386                 /* This is IP_PKTINFO option. */
1387                 mutex_enter(&connp->conn_lock);
1388                 pktinfo = (struct in_pktinfo *)invalp;
1389                 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1390                         ipp->ipp_fields |= IPPF_ADDR;
1391                         IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1392                             &ipp->ipp_addr);
1393                 } else {
1394                         ipp->ipp_fields &= ~IPPF_ADDR;
1395                         ipp->ipp_addr = ipv6_all_zeros;
1396                 }
1397                 mutex_exit(&connp->conn_lock);
1398                 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1399                 coa->coa_changed |= COA_ROUTE_CHANGED;
1400                 coa->coa_changed |= COA_HEADER_CHANGED;
1401                 break;
1402         }
1403         case IP_DONTFRAG:
1404                 if (onoff) {
1405                         ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1406                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1407                 } else {
1408                         ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1409                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1410                 }
1411                 /* Need to redo ip_attr_connect */
1412                 coa->coa_changed |= COA_ROUTE_CHANGED;
1413                 break;
1414         case IP_ADD_MEMBERSHIP:
1415         case IP_DROP_MEMBERSHIP:
1416         case MCAST_JOIN_GROUP:
1417         case MCAST_LEAVE_GROUP:
1418                 return (ip_opt_set_multicast_group(connp, name,
1419                     invalp, B_FALSE, checkonly));
1420 
1421         case IP_BLOCK_SOURCE:
1422         case IP_UNBLOCK_SOURCE:
1423         case IP_ADD_SOURCE_MEMBERSHIP:
1424         case IP_DROP_SOURCE_MEMBERSHIP:
1425         case MCAST_BLOCK_SOURCE:
1426         case MCAST_UNBLOCK_SOURCE:
1427         case MCAST_JOIN_SOURCE_GROUP:
1428         case MCAST_LEAVE_SOURCE_GROUP:
1429                 return (ip_opt_set_multicast_sources(connp, name,
1430                     invalp, B_FALSE, checkonly));
1431 
1432         case IP_SEC_OPT:
1433                 mutex_enter(&connp->conn_lock);
1434                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1435                 mutex_exit(&connp->conn_lock);
1436                 if (error != 0) {
1437                         return (error);
1438                 }
1439                 /* This is an IPsec policy change - redo ip_attr_connect */
1440                 coa->coa_changed |= COA_ROUTE_CHANGED;
1441                 break;
1442         case IP_NEXTHOP:
1443                 ixa->ixa_nexthop_v4 = addr;
1444                 if (addr != INADDR_ANY)
1445                         ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1446                 else
1447                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1448                 coa->coa_changed |= COA_ROUTE_CHANGED;
1449                 break;
1450 
1451         case IP_BOUND_IF:
1452                 ixa->ixa_ifindex = ifindex;          /* Send */
1453                 mutex_enter(&connp->conn_lock);
1454                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1455                 connp->conn_bound_if = ifindex;              /* getsockopt */
1456                 mutex_exit(&connp->conn_lock);
1457                 coa->coa_changed |= COA_ROUTE_CHANGED;
1458                 break;
1459         case IP_UNSPEC_SRC:
1460                 mutex_enter(&connp->conn_lock);
1461                 connp->conn_unspec_src = onoff;
1462                 if (onoff)
1463                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1464                 else
1465                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1466 
1467                 mutex_exit(&connp->conn_lock);
1468                 break;
1469         case IP_BROADCAST_TTL:
1470                 ixa->ixa_broadcast_ttl = *invalp;
1471                 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1472                 /* Handled automatically by ip_output */
1473                 break;
1474         case MRT_INIT:
1475         case MRT_DONE:
1476         case MRT_ADD_VIF:
1477         case MRT_DEL_VIF:
1478         case MRT_ADD_MFC:
1479         case MRT_DEL_MFC:
1480         case MRT_ASSERT:
1481                 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1482                         return (error);
1483                 }
1484                 error = ip_mrouter_set((int)name, connp, checkonly,
1485                     (uchar_t *)invalp, inlen);
1486                 if (error) {
1487                         return (error);
1488                 }
1489                 return (0);
1490 
1491         }
1492         return (0);
1493 }
1494 
1495 /* Handle IPPROTO_IPV6 */
1496 static int
1497 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1498     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1499 {
1500         conn_t          *connp = coa->coa_connp;
1501         ip_xmit_attr_t  *ixa = coa->coa_ixa;
1502         ip_pkt_t        *ipp = coa->coa_ipp;
1503         int             *i1 = (int *)invalp;
1504         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
1505         uint_t          ifindex;
1506         zoneid_t        zoneid = IPCL_ZONEID(connp);
1507         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1508         int             error;
1509 
1510         if (connp->conn_family != AF_INET6)
1511                 return (EINVAL);
1512 
1513         ifindex = UINT_MAX;
1514         switch (name) {
1515         case IPV6_MULTICAST_IF:
1516                 /*
1517                  * The only possible error is EINVAL.
1518                  * We call this option on both V4 and V6
1519                  * If both fail, then this call returns
1520                  * EINVAL. If at least one of them succeeds we
1521                  * return success.
1522                  */
1523                 ifindex = *(uint_t *)i1;
1524 
1525                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1526                     !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1527                         return (EINVAL);
1528                 break;
1529         case IPV6_UNICAST_HOPS:
1530                 /* Don't allow zero. -1 means to use default */
1531                 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1532                         return (EINVAL);
1533                 break;
1534         case IPV6_MULTICAST_HOPS:
1535                 /* -1 means use default */
1536                 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1537                         return (EINVAL);
1538                 break;
1539         case IPV6_MULTICAST_LOOP:
1540                 if (*i1 != 0 && *i1 != 1)
1541                         return (EINVAL);
1542                 break;
1543         case IPV6_BOUND_IF:
1544                 ifindex = *(uint_t *)i1;
1545 
1546                 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1547                         return (ENXIO);
1548                 break;
1549         case IPV6_PKTINFO: {
1550                 struct in6_pktinfo *pkti;
1551                 boolean_t isv6;
1552 
1553                 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1554                         return (EINVAL);
1555                 if (inlen == 0)
1556                         break;  /* Clear values below */
1557 
1558                 /*
1559                  * Verify the source address and ifindex. Privileged users
1560                  * can use any source address.
1561                  */
1562                 pkti = (struct in6_pktinfo *)invalp;
1563 
1564                 /*
1565                  * For link-local addresses we use the ipi6_ifindex when
1566                  * we verify the local address.
1567                  * If net_rawaccess then any source address can be used.
1568                  */
1569                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1570                     secpolicy_net_rawaccess(cr) != 0) {
1571                         uint_t scopeid = 0;
1572                         in6_addr_t *v6src = &pkti->ipi6_addr;
1573                         ipaddr_t v4src;
1574                         ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1575 
1576                         if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1577                                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1578                                 if (v4src != INADDR_ANY) {
1579                                         laddr_type = ip_laddr_verify_v4(v4src,
1580                                             zoneid, ipst, B_FALSE);
1581                                 }
1582                         } else {
1583                                 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1584                                         scopeid = pkti->ipi6_ifindex;
1585 
1586                                 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1587                                     ipst, B_FALSE, scopeid);
1588                         }
1589                         switch (laddr_type) {
1590                         case IPVL_UNICAST_UP:
1591                         case IPVL_UNICAST_DOWN:
1592                                 break;
1593                         default:
1594                                 return (EADDRNOTAVAIL);
1595                         }
1596                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1597                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1598                         /* Allow any source */
1599                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1600                 }
1601                 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1602                 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1603                     ipst))
1604                         return (ENXIO);
1605                 break;
1606         }
1607         case IPV6_HOPLIMIT:
1608                 /* It is only allowed as ancilary data */
1609                 if (!coa->coa_ancillary)
1610                         return (EINVAL);
1611 
1612                 if (inlen != 0 && inlen != sizeof (int))
1613                         return (EINVAL);
1614                 if (inlen == sizeof (int)) {
1615                         if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1616                                 return (EINVAL);
1617                 }
1618                 break;
1619         case IPV6_TCLASS:
1620                 if (inlen != 0 && inlen != sizeof (int))
1621                         return (EINVAL);
1622                 if (inlen == sizeof (int)) {
1623                         if (*i1 > 255 || *i1 < -1)
1624                                 return (EINVAL);
1625                 }
1626                 break;
1627         case IPV6_NEXTHOP:
1628                 if (inlen != 0 && inlen != sizeof (sin6_t))
1629                         return (EINVAL);
1630                 if (inlen == sizeof (sin6_t)) {
1631                         sin6_t *sin6 = (sin6_t *)invalp;
1632                         ire_t   *ire;
1633 
1634                         if (sin6->sin6_family != AF_INET6)
1635                                 return (EAFNOSUPPORT);
1636                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1637                                 return (EADDRNOTAVAIL);
1638 
1639                         /* Verify that the next-hop is on-link */
1640                         ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1641                             0, 0, IRE_ONLINK, NULL, zoneid,
1642                             NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1643                         if (ire == NULL)
1644                                 return (EHOSTUNREACH);
1645                         ire_refrele(ire);
1646                         break;
1647                 }
1648                 break;
1649         case IPV6_RTHDR:
1650         case IPV6_DSTOPTS:
1651         case IPV6_RTHDRDSTOPTS:
1652         case IPV6_HOPOPTS: {
1653                 /* All have the length field in the same place */
1654                 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1655                 /*
1656                  * Sanity checks - minimum size, size a multiple of
1657                  * eight bytes, and matching size passed in.
1658                  */
1659                 if (inlen != 0 &&
1660                     inlen != (8 * (hopts->ip6h_len + 1)))
1661                         return (EINVAL);
1662                 break;
1663         }
1664         case IPV6_PATHMTU:
1665                 /* Can't be set */
1666                 return (EINVAL);
1667 
1668         case IPV6_USE_MIN_MTU:
1669                 if (inlen != sizeof (int))
1670                         return (EINVAL);
1671                 if (*i1 < -1 || *i1 > 1)
1672                         return (EINVAL);
1673                 break;
1674         case IPV6_SRC_PREFERENCES:
1675                 if (inlen != sizeof (uint32_t))
1676                         return (EINVAL);
1677                 break;
1678         case IPV6_V6ONLY:
1679                 if (*i1 < 0 || *i1 > 1) {
1680                         return (EINVAL);
1681                 }
1682                 break;
1683         }
1684         if (checkonly)
1685                 return (0);
1686 
1687         /* Here we set the actual option value */
1688         /*
1689          * conn_lock protects the bitfields, and is used to
1690          * set the fields atomically. Not needed for ixa settings since
1691          * the caller has an exclusive copy of the ixa.
1692          * We can not hold conn_lock across the multicast options though.
1693          */
1694         ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1695         switch (name) {
1696         case IPV6_MULTICAST_IF:
1697                 ixa->ixa_multicast_ifindex = ifindex;
1698                 /* Need to redo ip_attr_connect */
1699                 coa->coa_changed |= COA_ROUTE_CHANGED;
1700                 break;
1701         case IPV6_UNICAST_HOPS:
1702                 /* -1 means use default */
1703                 mutex_enter(&connp->conn_lock);
1704                 if (*i1 == -1) {
1705                         ipp->ipp_unicast_hops = connp->conn_default_ttl;
1706                 } else {
1707                         ipp->ipp_unicast_hops = (uint8_t)*i1;
1708                 }
1709                 mutex_exit(&connp->conn_lock);
1710                 coa->coa_changed |= COA_HEADER_CHANGED;
1711                 break;
1712         case IPV6_MULTICAST_HOPS:
1713                 /* -1 means use default */
1714                 if (*i1 == -1) {
1715                         ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1716                 } else {
1717                         ixa->ixa_multicast_ttl = (uint8_t)*i1;
1718                 }
1719                 /* Handled automatically by ip_output */
1720                 break;
1721         case IPV6_MULTICAST_LOOP:
1722                 if (*i1 != 0)
1723                         ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1724                 else
1725                         ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1726                 /* Handled automatically by ip_output */
1727                 break;
1728         case IPV6_JOIN_GROUP:
1729         case IPV6_LEAVE_GROUP:
1730         case MCAST_JOIN_GROUP:
1731         case MCAST_LEAVE_GROUP:
1732                 return (ip_opt_set_multicast_group(connp, name,
1733                     invalp, B_TRUE, checkonly));
1734 
1735         case MCAST_BLOCK_SOURCE:
1736         case MCAST_UNBLOCK_SOURCE:
1737         case MCAST_JOIN_SOURCE_GROUP:
1738         case MCAST_LEAVE_SOURCE_GROUP:
1739                 return (ip_opt_set_multicast_sources(connp, name,
1740                     invalp, B_TRUE, checkonly));
1741 
1742         case IPV6_BOUND_IF:
1743                 ixa->ixa_ifindex = ifindex;          /* Send */
1744                 mutex_enter(&connp->conn_lock);
1745                 connp->conn_incoming_ifindex = ifindex;      /* Receive */
1746                 connp->conn_bound_if = ifindex;              /* getsockopt */
1747                 mutex_exit(&connp->conn_lock);
1748                 coa->coa_changed |= COA_ROUTE_CHANGED;
1749                 break;
1750         case IPV6_UNSPEC_SRC:
1751                 mutex_enter(&connp->conn_lock);
1752                 connp->conn_unspec_src = onoff;
1753                 if (onoff)
1754                         ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1755                 else
1756                         ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1757                 mutex_exit(&connp->conn_lock);
1758                 break;
1759         case IPV6_RECVPKTINFO:
1760                 mutex_enter(&connp->conn_lock);
1761                 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1762                 mutex_exit(&connp->conn_lock);
1763                 break;
1764         case IPV6_RECVTCLASS:
1765                 mutex_enter(&connp->conn_lock);
1766                 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1767                 mutex_exit(&connp->conn_lock);
1768                 break;
1769         case IPV6_RECVPATHMTU:
1770                 mutex_enter(&connp->conn_lock);
1771                 connp->conn_ipv6_recvpathmtu = onoff;
1772                 mutex_exit(&connp->conn_lock);
1773                 break;
1774         case IPV6_RECVHOPLIMIT:
1775                 mutex_enter(&connp->conn_lock);
1776                 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1777                     onoff;
1778                 mutex_exit(&connp->conn_lock);
1779                 break;
1780         case IPV6_RECVHOPOPTS:
1781                 mutex_enter(&connp->conn_lock);
1782                 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1783                 mutex_exit(&connp->conn_lock);
1784                 break;
1785         case IPV6_RECVDSTOPTS:
1786                 mutex_enter(&connp->conn_lock);
1787                 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1788                 mutex_exit(&connp->conn_lock);
1789                 break;
1790         case _OLD_IPV6_RECVDSTOPTS:
1791                 mutex_enter(&connp->conn_lock);
1792                 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1793                     onoff;
1794                 mutex_exit(&connp->conn_lock);
1795                 break;
1796         case IPV6_RECVRTHDRDSTOPTS:
1797                 mutex_enter(&connp->conn_lock);
1798                 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1799                     onoff;
1800                 mutex_exit(&connp->conn_lock);
1801                 break;
1802         case IPV6_RECVRTHDR:
1803                 mutex_enter(&connp->conn_lock);
1804                 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1805                 mutex_exit(&connp->conn_lock);
1806                 break;
1807         case IPV6_PKTINFO:
1808                 mutex_enter(&connp->conn_lock);
1809                 if (inlen == 0) {
1810                         ipp->ipp_fields &= ~IPPF_ADDR;
1811                         ipp->ipp_addr = ipv6_all_zeros;
1812                         ixa->ixa_ifindex = 0;
1813                 } else {
1814                         struct in6_pktinfo *pkti;
1815 
1816                         pkti = (struct in6_pktinfo *)invalp;
1817                         ipp->ipp_addr = pkti->ipi6_addr;
1818                         if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1819                                 ipp->ipp_fields |= IPPF_ADDR;
1820                         else
1821                                 ipp->ipp_fields &= ~IPPF_ADDR;
1822                         ixa->ixa_ifindex = pkti->ipi6_ifindex;
1823                 }
1824                 mutex_exit(&connp->conn_lock);
1825                 /* Source and ifindex might have changed */
1826                 coa->coa_changed |= COA_HEADER_CHANGED;
1827                 coa->coa_changed |= COA_ROUTE_CHANGED;
1828                 break;
1829         case IPV6_HOPLIMIT:
1830                 mutex_enter(&connp->conn_lock);
1831                 if (inlen == 0 || *i1 == -1) {
1832                         /* Revert to default */
1833                         ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1834                         ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1835                 } else {
1836                         ipp->ipp_hoplimit = *i1;
1837                         ipp->ipp_fields |= IPPF_HOPLIMIT;
1838                         /* Ensure that it sticks for multicast packets */
1839                         ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1840                 }
1841                 mutex_exit(&connp->conn_lock);
1842                 coa->coa_changed |= COA_HEADER_CHANGED;
1843                 break;
1844         case IPV6_TCLASS:
1845                 /*
1846                  * IPV6_TCLASS accepts -1 as use kernel default
1847                  * and [0, 255] as the actualy traffic class.
1848                  */
1849                 mutex_enter(&connp->conn_lock);
1850                 if (inlen == 0 || *i1 == -1) {
1851                         ipp->ipp_tclass = 0;
1852                         ipp->ipp_fields &= ~IPPF_TCLASS;
1853                 } else {
1854                         ipp->ipp_tclass = *i1;
1855                         ipp->ipp_fields |= IPPF_TCLASS;
1856                 }
1857                 mutex_exit(&connp->conn_lock);
1858                 coa->coa_changed |= COA_HEADER_CHANGED;
1859                 break;
1860         case IPV6_NEXTHOP:
1861                 if (inlen == 0) {
1862                         ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1863                 } else {
1864                         sin6_t *sin6 = (sin6_t *)invalp;
1865 
1866                         ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1867                         if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1868                                 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1869                         else
1870                                 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1871                 }
1872                 coa->coa_changed |= COA_ROUTE_CHANGED;
1873                 break;
1874         case IPV6_HOPOPTS:
1875                 mutex_enter(&connp->conn_lock);
1876                 error = optcom_pkt_set(invalp, inlen,
1877                     (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1878                 if (error != 0) {
1879                         mutex_exit(&connp->conn_lock);
1880                         return (error);
1881                 }
1882                 if (ipp->ipp_hopoptslen == 0) {
1883                         ipp->ipp_fields &= ~IPPF_HOPOPTS;
1884                 } else {
1885                         ipp->ipp_fields |= IPPF_HOPOPTS;
1886                 }
1887                 mutex_exit(&connp->conn_lock);
1888                 coa->coa_changed |= COA_HEADER_CHANGED;
1889                 coa->coa_changed |= COA_WROFF_CHANGED;
1890                 break;
1891         case IPV6_RTHDRDSTOPTS:
1892                 mutex_enter(&connp->conn_lock);
1893                 error = optcom_pkt_set(invalp, inlen,
1894                     (uchar_t **)&ipp->ipp_rthdrdstopts,
1895                     &ipp->ipp_rthdrdstoptslen);
1896                 if (error != 0) {
1897                         mutex_exit(&connp->conn_lock);
1898                         return (error);
1899                 }
1900                 if (ipp->ipp_rthdrdstoptslen == 0) {
1901                         ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1902                 } else {
1903                         ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1904                 }
1905                 mutex_exit(&connp->conn_lock);
1906                 coa->coa_changed |= COA_HEADER_CHANGED;
1907                 coa->coa_changed |= COA_WROFF_CHANGED;
1908                 break;
1909         case IPV6_DSTOPTS:
1910                 mutex_enter(&connp->conn_lock);
1911                 error = optcom_pkt_set(invalp, inlen,
1912                     (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1913                 if (error != 0) {
1914                         mutex_exit(&connp->conn_lock);
1915                         return (error);
1916                 }
1917                 if (ipp->ipp_dstoptslen == 0) {
1918                         ipp->ipp_fields &= ~IPPF_DSTOPTS;
1919                 } else {
1920                         ipp->ipp_fields |= IPPF_DSTOPTS;
1921                 }
1922                 mutex_exit(&connp->conn_lock);
1923                 coa->coa_changed |= COA_HEADER_CHANGED;
1924                 coa->coa_changed |= COA_WROFF_CHANGED;
1925                 break;
1926         case IPV6_RTHDR:
1927                 mutex_enter(&connp->conn_lock);
1928                 error = optcom_pkt_set(invalp, inlen,
1929                     (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1930                 if (error != 0) {
1931                         mutex_exit(&connp->conn_lock);
1932                         return (error);
1933                 }
1934                 if (ipp->ipp_rthdrlen == 0) {
1935                         ipp->ipp_fields &= ~IPPF_RTHDR;
1936                 } else {
1937                         ipp->ipp_fields |= IPPF_RTHDR;
1938                 }
1939                 mutex_exit(&connp->conn_lock);
1940                 coa->coa_changed |= COA_HEADER_CHANGED;
1941                 coa->coa_changed |= COA_WROFF_CHANGED;
1942                 break;
1943 
1944         case IPV6_DONTFRAG:
1945                 if (onoff) {
1946                         ixa->ixa_flags |= IXAF_DONTFRAG;
1947                         ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1948                 } else {
1949                         ixa->ixa_flags &= ~IXAF_DONTFRAG;
1950                         ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1951                 }
1952                 /* Need to redo ip_attr_connect */
1953                 coa->coa_changed |= COA_ROUTE_CHANGED;
1954                 break;
1955 
1956         case IPV6_USE_MIN_MTU:
1957                 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1958                 ixa->ixa_use_min_mtu = *i1;
1959                 /* Need to redo ip_attr_connect */
1960                 coa->coa_changed |= COA_ROUTE_CHANGED;
1961                 break;
1962 
1963         case IPV6_SEC_OPT:
1964                 mutex_enter(&connp->conn_lock);
1965                 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1966                 mutex_exit(&connp->conn_lock);
1967                 if (error != 0) {
1968                         return (error);
1969                 }
1970                 /* This is an IPsec policy change - redo ip_attr_connect */
1971                 coa->coa_changed |= COA_ROUTE_CHANGED;
1972                 break;
1973         case IPV6_SRC_PREFERENCES:
1974                 /*
1975                  * This socket option only affects connected
1976                  * sockets that haven't already bound to a specific
1977                  * IPv6 address.  In other words, sockets that
1978                  * don't call bind() with an address other than the
1979                  * unspecified address and that call connect().
1980                  * ip_set_destination_v6() passes these preferences
1981                  * to the ipif_select_source_v6() function.
1982                  */
1983                 mutex_enter(&connp->conn_lock);
1984                 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1985                 mutex_exit(&connp->conn_lock);
1986                 if (error != 0) {
1987                         return (error);
1988                 }
1989                 break;
1990         case IPV6_V6ONLY:
1991                 mutex_enter(&connp->conn_lock);
1992                 connp->conn_ipv6_v6only = onoff;
1993                 mutex_exit(&connp->conn_lock);
1994                 break;
1995         }
1996         return (0);
1997 }
1998 
1999 /* Handle IPPROTO_UDP */
2000 /* ARGSUSED1 */
2001 static int
2002 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2003     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2004 {
2005         conn_t          *connp = coa->coa_connp;
2006         int             *i1 = (int *)invalp;
2007         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2008         int             error;
2009 
2010         switch (name) {
2011         case UDP_ANONPRIVBIND:
2012                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2013                         return (error);
2014                 }
2015                 break;
2016         }
2017         if (checkonly)
2018                 return (0);
2019 
2020         /* Here we set the actual option value */
2021         mutex_enter(&connp->conn_lock);
2022         switch (name) {
2023         case UDP_ANONPRIVBIND:
2024                 connp->conn_anon_priv_bind = onoff;
2025                 break;
2026         case UDP_EXCLBIND:
2027                 connp->conn_exclbind = onoff;
2028                 break;
2029         }
2030         mutex_exit(&connp->conn_lock);
2031         return (0);
2032 }
2033 
2034 /* Handle IPPROTO_TCP */
2035 /* ARGSUSED1 */
2036 static int
2037 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2038     uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2039 {
2040         conn_t          *connp = coa->coa_connp;
2041         int             *i1 = (int *)invalp;
2042         boolean_t       onoff = (*i1 == 0) ? 0 : 1;
2043         int             error;
2044 
2045         switch (name) {
2046         case TCP_ANONPRIVBIND:
2047                 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2048                         return (error);
2049                 }
2050                 break;
2051         }
2052         if (checkonly)
2053                 return (0);
2054 
2055         /* Here we set the actual option value */
2056         mutex_enter(&connp->conn_lock);
2057         switch (name) {
2058         case TCP_ANONPRIVBIND:
2059                 connp->conn_anon_priv_bind = onoff;
2060                 break;
2061         case TCP_EXCLBIND:
2062                 connp->conn_exclbind = onoff;
2063                 break;
2064         case TCP_RECVDSTADDR:
2065                 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2066                 break;
2067         }
2068         mutex_exit(&connp->conn_lock);
2069         return (0);
2070 }
2071 
2072 int
2073 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2074 {
2075         sin_t           *sin;
2076         sin6_t          *sin6;
2077 
2078         if (connp->conn_family == AF_INET) {
2079                 if (*salenp < sizeof (sin_t))
2080                         return (EINVAL);
2081 
2082                 *salenp = sizeof (sin_t);
2083                 /* Fill zeroes and then initialize non-zero fields */
2084                 sin = (sin_t *)sa;
2085                 *sin = sin_null;
2086                 sin->sin_family = AF_INET;
2087                 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2088                     !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2089                         sin->sin_addr.s_addr = connp->conn_saddr_v4;
2090                 } else {
2091                         /*
2092                          * INADDR_ANY
2093                          * conn_saddr is not set, we might be bound to
2094                          * broadcast/multicast. Use conn_bound_addr as
2095                          * local address instead (that could
2096                          * also still be INADDR_ANY)
2097                          */
2098                         sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2099                 }
2100                 sin->sin_port = connp->conn_lport;
2101         } else {
2102                 if (*salenp < sizeof (sin6_t))
2103                         return (EINVAL);
2104 
2105                 *salenp = sizeof (sin6_t);
2106                 /* Fill zeroes and then initialize non-zero fields */
2107                 sin6 = (sin6_t *)sa;
2108                 *sin6 = sin6_null;
2109                 sin6->sin6_family = AF_INET6;
2110                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2111                         sin6->sin6_addr = connp->conn_saddr_v6;
2112                 } else {
2113                         /*
2114                          * conn_saddr is not set, we might be bound to
2115                          * broadcast/multicast. Use conn_bound_addr as
2116                          * local address instead (which could
2117                          * also still be unspecified)
2118                          */
2119                         sin6->sin6_addr = connp->conn_bound_addr_v6;
2120                 }
2121                 sin6->sin6_port = connp->conn_lport;
2122                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2123                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2124                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2125         }
2126         return (0);
2127 }
2128 
2129 int
2130 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2131 {
2132         struct sockaddr_in      *sin;
2133         struct sockaddr_in6     *sin6;
2134 
2135         if (connp->conn_family == AF_INET) {
2136                 if (*salenp < sizeof (sin_t))
2137                         return (EINVAL);
2138 
2139                 *salenp = sizeof (sin_t);
2140                 /* initialize */
2141                 sin = (sin_t *)sa;
2142                 *sin = sin_null;
2143                 sin->sin_family = AF_INET;
2144                 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2145                 sin->sin_port = connp->conn_fport;
2146         } else {
2147                 if (*salenp < sizeof (sin6_t))
2148                         return (EINVAL);
2149 
2150                 *salenp = sizeof (sin6_t);
2151                 /* initialize */
2152                 sin6 = (sin6_t *)sa;
2153                 *sin6 = sin6_null;
2154                 sin6->sin6_family = AF_INET6;
2155                 sin6->sin6_addr = connp->conn_faddr_v6;
2156                 sin6->sin6_port =  connp->conn_fport;
2157                 sin6->sin6_flowinfo = connp->conn_flowinfo;
2158                 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2159                     (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2160                         sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2161         }
2162         return (0);
2163 }
2164 
2165 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2166 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2167 
2168 /*
2169  * Allocate and fill in conn_ht_iphc based on the current information
2170  * in the conn.
2171  * Normally used when we bind() and connect().
2172  * Returns failure if can't allocate memory, or if there is a problem
2173  * with a routing header/option.
2174  *
2175  * We allocate space for the transport header (ulp_hdr_len + extra) and
2176  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2177  * The extra is there for transports that want some spare room for future
2178  * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2179  * excludes the extra part.
2180  *
2181  * We massage an routing option/header and store the ckecksum difference
2182  * in conn_sum.
2183  *
2184  * Caller needs to update conn_wroff if desired.
2185  */
2186 int
2187 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2188     const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2189 {
2190         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2191         ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
2192         uint_t          ip_hdr_length;
2193         uchar_t         *hdrs;
2194         uint_t          hdrs_len;
2195 
2196         ASSERT(MUTEX_HELD(&connp->conn_lock));
2197 
2198         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2199                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2200                 /* In case of TX label and IP options it can be too much */
2201                 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2202                         /* Preserves existing TX errno for this */
2203                         return (EHOSTUNREACH);
2204                 }
2205         } else {
2206                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2207         }
2208         ixa->ixa_ip_hdr_length = ip_hdr_length;
2209         hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2210         ASSERT(hdrs_len != 0);
2211 
2212         if (hdrs_len != connp->conn_ht_iphc_allocated) {
2213                 /* Allocate new before we free any old */
2214                 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2215                 if (hdrs == NULL)
2216                         return (ENOMEM);
2217 
2218                 if (connp->conn_ht_iphc != NULL) {
2219                         kmem_free(connp->conn_ht_iphc,
2220                             connp->conn_ht_iphc_allocated);
2221                 }
2222                 connp->conn_ht_iphc = hdrs;
2223                 connp->conn_ht_iphc_allocated = hdrs_len;
2224         } else {
2225                 hdrs = connp->conn_ht_iphc;
2226         }
2227         hdrs_len -= extra;
2228         connp->conn_ht_iphc_len = hdrs_len;
2229 
2230         connp->conn_ht_ulp = hdrs + ip_hdr_length;
2231         connp->conn_ht_ulp_len = ulp_hdr_length;
2232 
2233         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2234                 ipha_t  *ipha = (ipha_t *)hdrs;
2235 
2236                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2237                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2238                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2239                 ipha->ipha_length = htons(hdrs_len);
2240                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2241                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2242                 else
2243                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2244 
2245                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2246                         connp->conn_sum = cksum_massage_options_v4(ipha,
2247                             connp->conn_netstack);
2248                 } else {
2249                         connp->conn_sum = 0;
2250                 }
2251         } else {
2252                 ip6_t   *ip6h = (ip6_t *)hdrs;
2253 
2254                 ip6h->ip6_src = *v6src;
2255                 ip6h->ip6_dst = *v6dst;
2256                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2257                     flowinfo);
2258                 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2259 
2260                 if (ipp->ipp_fields & IPPF_RTHDR) {
2261                         connp->conn_sum = cksum_massage_options_v6(ip6h,
2262                             ip_hdr_length, connp->conn_netstack);
2263 
2264                         /*
2265                          * Verify that the first hop isn't a mapped address.
2266                          * Routers along the path need to do this verification
2267                          * for subsequent hops.
2268                          */
2269                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2270                                 return (EADDRNOTAVAIL);
2271 
2272                 } else {
2273                         connp->conn_sum = 0;
2274                 }
2275         }
2276         return (0);
2277 }
2278 
2279 /*
2280  * Prepend a header template to data_mp based on the ip_pkt_t
2281  * and the passed in source, destination and protocol.
2282  *
2283  * Returns failure if can't allocate memory, in which case data_mp is freed.
2284  * We allocate space for the transport header (ulp_hdr_len) and
2285  * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2286  *
2287  * We massage an routing option/header and return the ckecksum difference
2288  * in *sump. This is in host byte order.
2289  *
2290  * Caller needs to update conn_wroff if desired.
2291  */
2292 mblk_t *
2293 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2294     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2295     uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2296     uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2297 {
2298         uint_t          ip_hdr_length;
2299         uchar_t         *hdrs;
2300         uint_t          hdrs_len;
2301         mblk_t          *mp;
2302 
2303         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2304                 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2305                 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2306         } else {
2307                 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2308         }
2309         hdrs_len = ip_hdr_length + ulp_hdr_length;
2310         ASSERT(hdrs_len != 0);
2311 
2312         ixa->ixa_ip_hdr_length = ip_hdr_length;
2313 
2314         /* Can we prepend to data_mp? */
2315         if (data_mp != NULL &&
2316             data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2317             data_mp->b_datap->db_ref == 1) {
2318                 hdrs = data_mp->b_rptr - hdrs_len;
2319                 data_mp->b_rptr = hdrs;
2320                 mp = data_mp;
2321         } else {
2322                 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2323                 if (mp == NULL) {
2324                         freemsg(data_mp);
2325                         *errorp = ENOMEM;
2326                         return (NULL);
2327                 }
2328                 mp->b_wptr = mp->b_datap->db_lim;
2329                 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2330                 mp->b_cont = data_mp;
2331         }
2332 
2333         /*
2334          * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2335          * if PKTINFO (aka IPPF_ADDR) was set.
2336          */
2337         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2338                 ipha_t *ipha = (ipha_t *)hdrs;
2339 
2340                 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2341                 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2342                 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2343                 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2344                 ipha->ipha_length = htons(hdrs_len + data_length);
2345                 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2346                         ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2347                 else
2348                         ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2349 
2350                 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2351                         *sump = cksum_massage_options_v4(ipha,
2352                             ixa->ixa_ipst->ips_netstack);
2353                 } else {
2354                         *sump = 0;
2355                 }
2356         } else {
2357                 ip6_t *ip6h = (ip6_t *)hdrs;
2358 
2359                 ip6h->ip6_src = *v6src;
2360                 ip6h->ip6_dst = *v6dst;
2361                 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2362                 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2363 
2364                 if (ipp->ipp_fields & IPPF_RTHDR) {
2365                         *sump = cksum_massage_options_v6(ip6h,
2366                             ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2367 
2368                         /*
2369                          * Verify that the first hop isn't a mapped address.
2370                          * Routers along the path need to do this verification
2371                          * for subsequent hops.
2372                          */
2373                         if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2374                                 *errorp = EADDRNOTAVAIL;
2375                                 freemsg(mp);
2376                                 return (NULL);
2377                         }
2378                 } else {
2379                         *sump = 0;
2380                 }
2381         }
2382         return (mp);
2383 }
2384 
2385 /*
2386  * Massage a source route if any putting the first hop
2387  * in ipha_dst. Compute a starting value for the checksum which
2388  * takes into account that the original ipha_dst should be
2389  * included in the checksum but that IP will include the
2390  * first hop from the source route in the tcp checksum.
2391  */
2392 static uint32_t
2393 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2394 {
2395         in_addr_t       dst;
2396         uint32_t        cksum;
2397 
2398         /* Get last hop then diff against first hop */
2399         cksum = ip_massage_options(ipha, ns);
2400         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2401         dst = ipha->ipha_dst;
2402         cksum -= ((dst >> 16) + (dst & 0xffff));
2403         if ((int)cksum < 0)
2404                 cksum--;
2405         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2406         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2407         ASSERT(cksum < 0x10000);
2408         return (ntohs(cksum));
2409 }
2410 
2411 static uint32_t
2412 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2413 {
2414         uint8_t         *end;
2415         ip6_rthdr_t     *rth;
2416         uint32_t        cksum;
2417 
2418         end = (uint8_t *)ip6h + ip_hdr_len;
2419         rth = ip_find_rthdr_v6(ip6h, end);
2420         if (rth == NULL)
2421                 return (0);
2422 
2423         cksum = ip_massage_options_v6(ip6h, rth, ns);
2424         cksum = (cksum & 0xFFFF) + (cksum >> 16);
2425         ASSERT(cksum < 0x10000);
2426         return (ntohs(cksum));
2427 }
2428 
2429 /*
2430  * ULPs that change the destination address need to call this for each
2431  * change to discard any state about a previous destination that might
2432  * have been multicast or multirt.
2433  */
2434 void
2435 ip_attr_newdst(ip_xmit_attr_t *ixa)
2436 {
2437         ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2438             IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2439             IXAF_NO_LOOP_ZONEID_SET);
2440 }
2441 
2442 /*
2443  * Determine the nexthop which will be used.
2444  * Normally this is just the destination, but if a IPv4 source route, or
2445  * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2446  * there.
2447  */
2448 void
2449 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2450     const in6_addr_t *dst, in6_addr_t *nexthop)
2451 {
2452         if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2453                 *nexthop = *dst;
2454                 return;
2455         }
2456         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2457                 ipaddr_t v4dst;
2458                 ipaddr_t v4nexthop;
2459 
2460                 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2461                 v4nexthop = ip_pkt_source_route_v4(ipp);
2462                 if (v4nexthop == INADDR_ANY)
2463                         v4nexthop = v4dst;
2464 
2465                 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2466         } else {
2467                 const in6_addr_t *v6nexthop;
2468 
2469                 v6nexthop = ip_pkt_source_route_v6(ipp);
2470                 if (v6nexthop == NULL)
2471                         v6nexthop = dst;
2472 
2473                 *nexthop = *v6nexthop;
2474         }
2475 }
2476 
2477 /*
2478  * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2479  * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2480  * case (connected latching is done in conn_connect).
2481  * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2482  * set, but doesn't otherwise use the conn_t.
2483  *
2484  * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2485  * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2486  *
2487  * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2488  * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2489  *
2490  * Updates laddrp and uinfo if they are non-NULL.
2491  *
2492  * TSOL notes: The callers if ip_attr_connect must check if the destination
2493  * is different than before and in that case redo conn_update_label.
2494  * The callers of conn_connect do not need that since conn_connect
2495  * performs the conn_update_label.
2496  */
2497 int
2498 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2499     const in6_addr_t *v6src, const in6_addr_t *v6dst,
2500     const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2501     iulp_t *uinfo, uint32_t flags)
2502 {
2503         in6_addr_t              laddr = *v6src;
2504         int                     error;
2505 
2506         ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2507 
2508         if (connp->conn_zone_is_global)
2509                 flags |= IPDF_ZONE_IS_GLOBAL;
2510         else
2511                 flags &= ~IPDF_ZONE_IS_GLOBAL;
2512 
2513         /*
2514          * Lookup the route to determine a source address and the uinfo.
2515          * If the ULP has a source route option then the caller will
2516          * have set v6nexthop to be the first hop.
2517          */
2518         if (ixa->ixa_flags & IXAF_IS_IPV4) {
2519                 ipaddr_t v4dst;
2520                 ipaddr_t v4src, v4nexthop;
2521 
2522                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2523                 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2524                 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2525 
2526                 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2527                         flags &= ~IPDF_SELECT_SRC;
2528                 else
2529                         flags |= IPDF_SELECT_SRC;
2530 
2531                 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2532                     uinfo, flags, connp->conn_mac_mode);
2533                 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2534         } else {
2535                 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2536                         flags &= ~IPDF_SELECT_SRC;
2537                 else
2538                         flags |= IPDF_SELECT_SRC;
2539 
2540                 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2541                     uinfo, flags, connp->conn_mac_mode);
2542         }
2543         /* Pass out some address even if we hit a RTF_REJECT etc */
2544         if (laddrp != NULL)
2545                 *laddrp = laddr;
2546 
2547         if (error != 0)
2548                 return (error);
2549 
2550         if (flags & IPDF_IPSEC) {
2551                 /*
2552                  * Set any IPsec policy in ixa. Routine also looks at ULP
2553                  * ports.
2554                  */
2555                 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2556         }
2557         return (0);
2558 }
2559 
2560 /*
2561  * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2562  * Assumes that conn_faddr and conn_fport are already set. As such it is not
2563  * usable for SCTP, since SCTP has multiple faddrs.
2564  *
2565  * Caller must hold conn_lock to provide atomic constency between the
2566  * conn_t's addresses and the ixa.
2567  * NOTE: this function drops and reaquires conn_lock since it can't be
2568  * held across ip_attr_connect/ip_set_destination.
2569  *
2570  * The caller needs to handle inserting in the receive-side fanout when
2571  * appropriate after conn_connect returns.
2572  */
2573 int
2574 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2575 {
2576         ip_xmit_attr_t  *ixa = connp->conn_ixa;
2577         in6_addr_t      nexthop;
2578         in6_addr_t      saddr, faddr;
2579         in_port_t       fport;
2580         int             error;
2581 
2582         ASSERT(MUTEX_HELD(&connp->conn_lock));
2583 
2584         if (connp->conn_ipversion == IPV4_VERSION)
2585                 ixa->ixa_flags |= IXAF_IS_IPV4;
2586         else
2587                 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2588 
2589         /* We do IPsec latching below - hence no caching in ip_attr_connect */
2590         flags &= ~IPDF_IPSEC;
2591 
2592         /* In case we had previously done an ip_attr_connect */
2593         ip_attr_newdst(ixa);
2594 
2595         /*
2596          * Determine the nexthop and copy the addresses before dropping
2597          * conn_lock.
2598          */
2599         ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2600             &connp->conn_faddr_v6, &nexthop);
2601         saddr = connp->conn_saddr_v6;
2602         faddr = connp->conn_faddr_v6;
2603         fport = connp->conn_fport;
2604 
2605         mutex_exit(&connp->conn_lock);
2606         error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2607             &saddr, uinfo, flags | IPDF_VERIFY_DST);
2608         mutex_enter(&connp->conn_lock);
2609 
2610         /* Could have changed even if an error */
2611         connp->conn_saddr_v6 = saddr;
2612         if (error != 0)
2613                 return (error);
2614 
2615         /*
2616          * Check whether Trusted Solaris policy allows communication with this
2617          * host, and pretend that the destination is unreachable if not.
2618          * Compute any needed label and place it in ipp_label_v4/v6.
2619          *
2620          * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2621          * the packet.
2622          *
2623          * TSOL Note: Any concurrent threads would pick a different ixa
2624          * (and ipp if they are to change the ipp)  so we
2625          * don't have to worry about concurrent threads.
2626          */
2627         if (is_system_labeled()) {
2628                 if (connp->conn_mlp_type != mlptSingle)
2629                         return (ECONNREFUSED);
2630 
2631                 /*
2632                  * conn_update_label will set ipp_label* which will later
2633                  * be used by conn_build_hdr_template.
2634                  */
2635                 error = conn_update_label(connp, ixa,
2636                     &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2637                 if (error != 0)
2638                         return (error);
2639         }
2640 
2641         /*
2642          * Ensure that we match on the selected local address.
2643          * This overrides conn_laddr in the case we had earlier bound to a
2644          * multicast or broadcast address.
2645          */
2646         connp->conn_laddr_v6 = connp->conn_saddr_v6;
2647 
2648         /*
2649          * Allow setting new policies.
2650          * The addresses/ports are already set, thus the IPsec policy calls
2651          * can handle their passed-in conn's.
2652          */
2653         connp->conn_policy_cached = B_FALSE;
2654 
2655         /*
2656          * Cache IPsec policy in this conn.  If we have per-socket policy,
2657          * we'll cache that.  If we don't, we'll inherit global policy.
2658          *
2659          * This is done before the caller inserts in the receive-side fanout.
2660          * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2661          * for connections where we don't have a policy. This is to prevent
2662          * global policy lookups in the inbound path.
2663          *
2664          * If we insert before we set conn_policy_cached,
2665          * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2666          * because global policy cound be non-empty. We normally call
2667          * ipsec_check_policy() for conn_policy_cached connections only if
2668          * conn_in_enforce_policy is set. But in this case,
2669          * conn_policy_cached can get set anytime since we made the
2670          * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2671          * called, which will make the above assumption false.  Thus, we
2672          * need to insert after we set conn_policy_cached.
2673          */
2674         error = ipsec_conn_cache_policy(connp,
2675             connp->conn_ipversion == IPV4_VERSION);
2676         if (error != 0)
2677                 return (error);
2678 
2679         /*
2680          * We defer to do LSO check until here since now we have better idea
2681          * whether IPsec is present. If the underlying ill is LSO capable,
2682          * copy its capability in so the ULP can decide whether to enable LSO
2683          * on this connection. So far, only TCP/IPv4 is implemented, so won't
2684          * claim LSO for IPv6.
2685          *
2686          * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2687          * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2688          */
2689         ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2690 
2691         ASSERT(ixa->ixa_ire != NULL);
2692         if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2693             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2694             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2695             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2696             (ixa->ixa_nce != NULL) &&
2697             ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2698             ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2699             ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2700                 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2701                 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2702         }
2703 
2704         /* Check whether ZEROCOPY capability is usable for this connection. */
2705         ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2706 
2707         if ((flags & IPDF_ZCOPY) &&
2708             !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2709             !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2710             !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2711             (ixa->ixa_nce != NULL) &&
2712             ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2713                 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2714         }
2715         return (0);
2716 }
2717 
2718 /*
2719  * Predicates to check if the addresses match conn_last*
2720  */
2721 
2722 /*
2723  * Compare the conn against an address.
2724  * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2725  */
2726 boolean_t
2727 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2728 {
2729         ASSERT(connp->conn_family == AF_INET);
2730         return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2731             sin->sin_port == connp->conn_lastdstport);
2732 }
2733 
2734 /*
2735  * Compare, including for mapped addresses
2736  */
2737 boolean_t
2738 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2739 {
2740         return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2741             sin6->sin6_port == connp->conn_lastdstport &&
2742             sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2743             sin6->sin6_scope_id == connp->conn_lastscopeid);
2744 }
2745 
2746 /*
2747  * Compute a label and place it in the ip_packet_t.
2748  * Handles IPv4 and IPv6.
2749  * The caller should have a correct ixa_tsl and ixa_zoneid and have
2750  * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2751  * has been called.
2752  */
2753 int
2754 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2755     const in6_addr_t *v6dst, ip_pkt_t *ipp)
2756 {
2757         int             err;
2758         ipaddr_t        v4dst;
2759 
2760         if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2761                 uchar_t         opt_storage[IP_MAX_OPT_LENGTH];
2762 
2763                 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2764 
2765                 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2766                     v4dst, opt_storage, ixa->ixa_ipst);
2767                 if (err == 0) {
2768                         /* Length contained in opt_storage[IPOPT_OLEN] */
2769                         err = optcom_pkt_set(opt_storage,
2770                             opt_storage[IPOPT_OLEN],
2771                             (uchar_t **)&ipp->ipp_label_v4,
2772                             &ipp->ipp_label_len_v4);
2773                 }
2774                 if (err != 0) {
2775                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2776                             char *, "conn(1) failed to update options(2) "
2777                             "on ixa(3)",
2778                             conn_t *, connp, char *, opt_storage,
2779                             ip_xmit_attr_t *, ixa);
2780                 }
2781                 if (ipp->ipp_label_len_v4 != 0)
2782                         ipp->ipp_fields |= IPPF_LABEL_V4;
2783                 else
2784                         ipp->ipp_fields &= ~IPPF_LABEL_V4;
2785         } else {
2786                 uchar_t         opt_storage[TSOL_MAX_IPV6_OPTION];
2787                 uint_t          optlen;
2788 
2789                 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2790                     v6dst, opt_storage, ixa->ixa_ipst);
2791                 if (err == 0) {
2792                         /*
2793                          * Note that ipp_label_v6 is just the option - not
2794                          * the hopopts extension header.
2795                          *
2796                          * Length contained in opt_storage[IPOPT_OLEN], but
2797                          * that doesn't include the two byte options header.
2798                          */
2799                         optlen = opt_storage[IPOPT_OLEN];
2800                         if (optlen != 0)
2801                                 optlen += 2;
2802 
2803                         err = optcom_pkt_set(opt_storage, optlen,
2804                             (uchar_t **)&ipp->ipp_label_v6,
2805                             &ipp->ipp_label_len_v6);
2806                 }
2807                 if (err != 0) {
2808                         DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2809                             char *, "conn(1) failed to update options(2) "
2810                             "on ixa(3)",
2811                             conn_t *, connp, char *, opt_storage,
2812                             ip_xmit_attr_t *, ixa);
2813                 }
2814                 if (ipp->ipp_label_len_v6 != 0)
2815                         ipp->ipp_fields |= IPPF_LABEL_V6;
2816                 else
2817                         ipp->ipp_fields &= ~IPPF_LABEL_V6;
2818         }
2819         return (err);
2820 }
2821 
2822 /*
2823  * Inherit all options settings from the parent/listener to the eager.
2824  * Returns zero on success; ENOMEM if memory allocation failed.
2825  *
2826  * We assume that the eager has not had any work done i.e., the conn_ixa
2827  * and conn_xmit_ipp are all zero.
2828  * Furthermore we assume that no other thread can access the eager (because
2829  * it isn't inserted in any fanout list).
2830  */
2831 int
2832 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2833 {
2834         cred_t  *credp;
2835         int     err;
2836         void    *notify_cookie;
2837         uint32_t xmit_hint;
2838 
2839         econnp->conn_family = lconnp->conn_family;
2840         econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2841         econnp->conn_wq = lconnp->conn_wq;
2842         econnp->conn_rq = lconnp->conn_rq;
2843 
2844         /*
2845          * Make a safe copy of the transmit attributes.
2846          * conn_connect will later be used by the caller to setup the ire etc.
2847          */
2848         ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2849         ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2850         ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2851         ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2852 
2853         /* Preserve ixa_notify_cookie and xmit_hint */
2854         notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2855         xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2856         ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2857         econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2858         econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2859 
2860         econnp->conn_bound_if = lconnp->conn_bound_if;
2861         econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2862 
2863         /* Inherit all RECV options */
2864         econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2865 
2866         err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2867             KM_NOSLEEP);
2868         if (err != 0)
2869                 return (err);
2870 
2871         econnp->conn_zoneid = lconnp->conn_zoneid;
2872         econnp->conn_allzones = lconnp->conn_allzones;
2873 
2874         /* This is odd. Pick a flowlabel for each connection instead? */
2875         econnp->conn_flowinfo = lconnp->conn_flowinfo;
2876 
2877         econnp->conn_default_ttl = lconnp->conn_default_ttl;
2878 
2879         /*
2880          * TSOL: tsol_input_proc() needs the eager's cred before the
2881          * eager is accepted
2882          */
2883         ASSERT(lconnp->conn_cred != NULL);
2884         econnp->conn_cred = credp = lconnp->conn_cred;
2885         crhold(credp);
2886         econnp->conn_cpid = lconnp->conn_cpid;
2887         econnp->conn_open_time = ddi_get_lbolt64();
2888 
2889         /*
2890          * Cache things in the ixa without any refhold.
2891          * Listener might not have set up ixa_cred
2892          */
2893         ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2894         econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2895         econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2896         if (is_system_labeled())
2897                 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2898 
2899         /*
2900          * If the caller has the process-wide flag set, then default to MAC
2901          * exempt mode.  This allows read-down to unlabeled hosts.
2902          */
2903         if (getpflags(NET_MAC_AWARE, credp) != 0)
2904                 econnp->conn_mac_mode = CONN_MAC_AWARE;
2905 
2906         econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2907 
2908         /*
2909          * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2910          * via soaccept()->soinheritoptions() which essentially applies
2911          * all the listener options to the new connection. The options that we
2912          * need to take care of are:
2913          * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2914          * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2915          * SO_SNDBUF, SO_RCVBUF.
2916          *
2917          * SO_RCVBUF:   conn_rcvbuf is set.
2918          * SO_SNDBUF:   conn_sndbuf is set.
2919          */
2920 
2921         /* Could we define a struct and use a struct copy for this? */
2922         econnp->conn_sndbuf = lconnp->conn_sndbuf;
2923         econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2924         econnp->conn_sndlowat = lconnp->conn_sndlowat;
2925         econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2926         econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2927         econnp->conn_oobinline = lconnp->conn_oobinline;
2928         econnp->conn_debug = lconnp->conn_debug;
2929         econnp->conn_keepalive = lconnp->conn_keepalive;
2930         econnp->conn_linger = lconnp->conn_linger;
2931         econnp->conn_lingertime = lconnp->conn_lingertime;
2932 
2933         /* Set the IP options */
2934         econnp->conn_broadcast = lconnp->conn_broadcast;
2935         econnp->conn_useloopback = lconnp->conn_useloopback;
2936         econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2937         return (0);
2938 }