1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright 2016 Joyent, Inc.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/stream.h>
  30 #define _SUN_TPI_VERSION 2
  31 #include <sys/tihdr.h>
  32 #include <sys/socket.h>
  33 #include <sys/xti_xtiopt.h>
  34 #include <sys/xti_inet.h>
  35 #include <sys/policy.h>
  36 
  37 #include <inet/common.h>
  38 #include <netinet/ip6.h>
  39 #include <inet/ip.h>
  40 
  41 #include <netinet/in.h>
  42 #include <netinet/tcp.h>
  43 #include <inet/optcom.h>
  44 #include <inet/proto_set.h>
  45 #include <inet/tcp_impl.h>
  46 
  47 static int      tcp_opt_default(queue_t *, int, int, uchar_t *);
  48 
  49 /*
  50  * Table of all known options handled on a TCP protocol stack.
  51  *
  52  * Note: This table contains options processed by both TCP and IP levels
  53  *       and is the superset of options that can be performed on a TCP over IP
  54  *       stack.
  55  */
  56 opdes_t tcp_opt_arr[] = {
  57 
  58 { SO_LINGER,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  59         sizeof (struct linger), 0 },
  60 
  61 { SO_DEBUG,     SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  62 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  63 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  64 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  65         },
  66 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  67 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  68 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  69 { SO_TYPE,      SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  70 { SO_SNDBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  71 { SO_RCVBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  72 { SO_SNDTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  73         sizeof (struct timeval), 0 },
  74 { SO_RCVTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  75         sizeof (struct timeval), 0 },
  76 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  77         },
  78 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  79 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  80         0 },
  81 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  82         0 },
  83 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  84         0 },
  85 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
  86         0 },
  87 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  88 
  89 { SO_DOMAIN,    SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  90 
  91 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  92 
  93 { TCP_NODELAY,  IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  94         },
  95 { TCP_MAXSEG,   IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
  96         536 },
  97 
  98 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
  99         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 100 
 101 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 102         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 103 
 104 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 105         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 106 
 107 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 108         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 109 
 110 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
 111         0 },
 112 
 113 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
 114         sizeof (int), 0 },
 115 
 116 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
 117         },
 118 
 119 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
 120         sizeof (int), 0 },
 121 
 122 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 123         sizeof (int), 0 },
 124 
 125 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 126 
 127 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 128 
 129 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 130 
 131 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 132         sizeof (int), 0 },
 133 
 134 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 135 
 136 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 137 
 138 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 139 
 140 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 141 
 142 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 143 
 144 { IP_OPTIONS,   IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 145         (OP_VARLEN|OP_NODEFAULT),
 146         IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 147 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 148         (OP_VARLEN|OP_NODEFAULT),
 149         IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 150 
 151 { IP_TOS,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 152 { T_IP_TOS,     IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 153 { IP_TTL,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 154         sizeof (int), -1 /* not initialized */ },
 155 
 156 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 157         sizeof (ipsec_req_t), -1 /* not initialized */ },
 158 
 159 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
 160         sizeof (int),   0 /* no ifindex */ },
 161 
 162 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
 163         sizeof (int), 0 },
 164 
 165 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 166         sizeof (int), -1 /* not initialized */ },
 167 
 168 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 169         sizeof (int),   0 /* no ifindex */ },
 170 
 171 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 172 
 173 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
 174         sizeof (in_addr_t),     -1 /* not initialized  */ },
 175 
 176 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
 177         sizeof (int), 0 },
 178 
 179 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 180         (OP_NODEFAULT|OP_VARLEN),
 181         sizeof (struct in6_pktinfo), -1 /* not initialized */ },
 182 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 183         OP_NODEFAULT,
 184         sizeof (sin6_t), -1 /* not initialized */ },
 185 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 186         (OP_VARLEN|OP_NODEFAULT), 255*8,
 187         -1 /* not initialized */ },
 188 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 189         (OP_VARLEN|OP_NODEFAULT), 255*8,
 190         -1 /* not initialized */ },
 191 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 192         (OP_VARLEN|OP_NODEFAULT), 255*8,
 193         -1 /* not initialized */ },
 194 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 195         (OP_VARLEN|OP_NODEFAULT), 255*8,
 196         -1 /* not initialized */ },
 197 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 198         OP_NODEFAULT,
 199         sizeof (int), -1 /* not initialized */ },
 200 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 201         OP_NODEFAULT,
 202         sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
 203 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 204         sizeof (int), 0 },
 205 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 206         sizeof (int), 0 },
 207 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 208         sizeof (int), 0 },
 209 
 210 /* Enable receipt of ancillary data */
 211 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 212         sizeof (int), 0 },
 213 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 214         sizeof (int), 0 },
 215 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 216         sizeof (int), 0 },
 217 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 218         sizeof (int), 0 },
 219 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 220         sizeof (int), 0 },
 221 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 222         sizeof (int), 0 },
 223 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 224         sizeof (int), 0 },
 225 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 226         sizeof (int), 0 },
 227 
 228 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 229         sizeof (ipsec_req_t), -1 /* not initialized */ },
 230 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 231         sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
 232 };
 233 
 234 /*
 235  * Table of all supported levels
 236  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 237  * any supported options so we need this info separately.
 238  *
 239  * This is needed only for topmost tpi providers and is used only by
 240  * XTI interfaces.
 241  */
 242 optlevel_t      tcp_valid_levels_arr[] = {
 243         XTI_GENERIC,
 244         SOL_SOCKET,
 245         IPPROTO_TCP,
 246         IPPROTO_IP,
 247         IPPROTO_IPV6
 248 };
 249 
 250 
 251 #define TCP_OPT_ARR_CNT         A_CNT(tcp_opt_arr)
 252 #define TCP_VALID_LEVELS_CNT    A_CNT(tcp_valid_levels_arr)
 253 
 254 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
 255 
 256 /*
 257  * Initialize option database object for TCP
 258  *
 259  * This object represents database of options to search passed to
 260  * {sock,tpi}optcom_req() interface routine to take care of option
 261  * management and associated methods.
 262  */
 263 
 264 optdb_obj_t tcp_opt_obj = {
 265         tcp_opt_default,        /* TCP default value function pointer */
 266         tcp_tpi_opt_get,        /* TCP get function pointer */
 267         tcp_tpi_opt_set,        /* TCP set function pointer */
 268         TCP_OPT_ARR_CNT,        /* TCP option database count of entries */
 269         tcp_opt_arr,            /* TCP option database */
 270         TCP_VALID_LEVELS_CNT,   /* TCP valid level count of entries */
 271         tcp_valid_levels_arr    /* TCP valid level array */
 272 };
 273 
 274 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
 275 
 276 /*
 277  * Some TCP options can be "set" by requesting them in the option
 278  * buffer. This is needed for XTI feature test though we do not
 279  * allow it in general. We interpret that this mechanism is more
 280  * applicable to OSI protocols and need not be allowed in general.
 281  * This routine filters out options for which it is not allowed (most)
 282  * and lets through those (few) for which it is. [ The XTI interface
 283  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
 284  * ever implemented will have to be allowed here ].
 285  */
 286 static boolean_t
 287 tcp_allow_connopt_set(int level, int name)
 288 {
 289 
 290         switch (level) {
 291         case IPPROTO_TCP:
 292                 switch (name) {
 293                 case TCP_NODELAY:
 294                         return (B_TRUE);
 295                 default:
 296                         return (B_FALSE);
 297                 }
 298                 /*NOTREACHED*/
 299         default:
 300                 return (B_FALSE);
 301         }
 302         /*NOTREACHED*/
 303 }
 304 
 305 /*
 306  * This routine gets default values of certain options whose default
 307  * values are maintained by protocol specific code
 308  */
 309 /* ARGSUSED */
 310 static int
 311 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
 312 {
 313         int32_t *i1 = (int32_t *)ptr;
 314         tcp_stack_t     *tcps = Q_TO_TCP(q)->tcp_tcps;
 315 
 316         switch (level) {
 317         case IPPROTO_TCP:
 318                 switch (name) {
 319                 case TCP_NOTIFY_THRESHOLD:
 320                         *i1 = tcps->tcps_ip_notify_interval;
 321                         break;
 322                 case TCP_ABORT_THRESHOLD:
 323                         *i1 = tcps->tcps_ip_abort_interval;
 324                         break;
 325                 case TCP_CONN_NOTIFY_THRESHOLD:
 326                         *i1 = tcps->tcps_ip_notify_cinterval;
 327                         break;
 328                 case TCP_CONN_ABORT_THRESHOLD:
 329                         *i1 = tcps->tcps_ip_abort_cinterval;
 330                         break;
 331                 default:
 332                         return (-1);
 333                 }
 334                 break;
 335         case IPPROTO_IP:
 336                 switch (name) {
 337                 case IP_TTL:
 338                         *i1 = tcps->tcps_ipv4_ttl;
 339                         break;
 340                 default:
 341                         return (-1);
 342                 }
 343                 break;
 344         case IPPROTO_IPV6:
 345                 switch (name) {
 346                 case IPV6_UNICAST_HOPS:
 347                         *i1 = tcps->tcps_ipv6_hoplimit;
 348                         break;
 349                 default:
 350                         return (-1);
 351                 }
 352                 break;
 353         default:
 354                 return (-1);
 355         }
 356         return (sizeof (int));
 357 }
 358 
 359 /*
 360  * TCP routine to get the values of options.
 361  */
 362 int
 363 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
 364 {
 365         int             *i1 = (int *)ptr;
 366         tcp_t           *tcp = connp->conn_tcp;
 367         conn_opt_arg_t  coas;
 368         int             retval;
 369 
 370         coas.coa_connp = connp;
 371         coas.coa_ixa = connp->conn_ixa;
 372         coas.coa_ipp = &connp->conn_xmit_ipp;
 373         coas.coa_ancillary = B_FALSE;
 374         coas.coa_changed = 0;
 375 
 376         switch (level) {
 377         case SOL_SOCKET:
 378                 switch (name) {
 379                 case SO_SND_COPYAVOID:
 380                         *i1 = tcp->tcp_snd_zcopy_on ?
 381                             SO_SND_COPYAVOID : 0;
 382                         return (sizeof (int));
 383                 case SO_ACCEPTCONN:
 384                         *i1 = (tcp->tcp_state == TCPS_LISTEN);
 385                         return (sizeof (int));
 386                 }
 387                 break;
 388         case IPPROTO_TCP:
 389                 switch (name) {
 390                 case TCP_NODELAY:
 391                         *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
 392                         return (sizeof (int));
 393                 case TCP_MAXSEG:
 394                         *i1 = tcp->tcp_mss;
 395                         return (sizeof (int));
 396                 case TCP_NOTIFY_THRESHOLD:
 397                         *i1 = (int)tcp->tcp_first_timer_threshold;
 398                         return (sizeof (int));
 399                 case TCP_ABORT_THRESHOLD:
 400                         *i1 = tcp->tcp_second_timer_threshold;
 401                         return (sizeof (int));
 402                 case TCP_CONN_NOTIFY_THRESHOLD:
 403                         *i1 = tcp->tcp_first_ctimer_threshold;
 404                         return (sizeof (int));
 405                 case TCP_CONN_ABORT_THRESHOLD:
 406                         *i1 = tcp->tcp_second_ctimer_threshold;
 407                         return (sizeof (int));
 408                 case TCP_INIT_CWND:
 409                         *i1 = tcp->tcp_init_cwnd;
 410                         return (sizeof (int));
 411                 case TCP_KEEPALIVE_THRESHOLD:
 412                         *i1 = tcp->tcp_ka_interval;
 413                         return (sizeof (int));
 414 
 415                 /*
 416                  * TCP_KEEPIDLE expects value in seconds, but
 417                  * tcp_ka_interval is in milliseconds.
 418                  */
 419                 case TCP_KEEPIDLE:
 420                         *i1 = tcp->tcp_ka_interval / 1000;
 421                         return (sizeof (int));
 422                 case TCP_KEEPCNT:
 423                         *i1 = tcp->tcp_ka_cnt;
 424                         return (sizeof (int));
 425 
 426                 /*
 427                  * TCP_KEEPINTVL expects value in seconds, but
 428                  * tcp_ka_rinterval is in milliseconds.
 429                  */
 430                 case TCP_KEEPINTVL:
 431                         *i1 = tcp->tcp_ka_rinterval / 1000;
 432                         return (sizeof (int));
 433                 case TCP_KEEPALIVE_ABORT_THRESHOLD:
 434                         *i1 = tcp->tcp_ka_abort_thres;
 435                         return (sizeof (int));
 436                 case TCP_CORK:
 437                         *i1 = tcp->tcp_cork;
 438                         return (sizeof (int));
 439                 case TCP_RTO_INITIAL:
 440                         *i1 = tcp->tcp_rto_initial;
 441                         return (sizeof (uint32_t));
 442                 case TCP_RTO_MIN:
 443                         *i1 = tcp->tcp_rto_min;
 444                         return (sizeof (uint32_t));
 445                 case TCP_RTO_MAX:
 446                         *i1 = tcp->tcp_rto_max;
 447                         return (sizeof (uint32_t));
 448                 case TCP_LINGER2:
 449                         *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
 450                         return (sizeof (int));
 451                 }
 452                 break;
 453         case IPPROTO_IP:
 454                 if (connp->conn_family != AF_INET)
 455                         return (-1);
 456                 switch (name) {
 457                 case IP_OPTIONS:
 458                 case T_IP_OPTIONS:
 459                         /* Caller ensures enough space */
 460                         return (ip_opt_get_user(connp, ptr));
 461                 default:
 462                         break;
 463                 }
 464                 break;
 465 
 466         case IPPROTO_IPV6:
 467                 /*
 468                  * IPPROTO_IPV6 options are only supported for sockets
 469                  * that are using IPv6 on the wire.
 470                  */
 471                 if (connp->conn_ipversion != IPV6_VERSION) {
 472                         return (-1);
 473                 }
 474                 switch (name) {
 475                 case IPV6_PATHMTU:
 476                         if (tcp->tcp_state < TCPS_ESTABLISHED)
 477                                 return (-1);
 478                         break;
 479                 }
 480                 break;
 481         }
 482         mutex_enter(&connp->conn_lock);
 483         retval = conn_opt_get(&coas, level, name, ptr);
 484         mutex_exit(&connp->conn_lock);
 485         return (retval);
 486 }
 487 
 488 /*
 489  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
 490  * Parameters are assumed to be verified by the caller.
 491  */
 492 /* ARGSUSED */
 493 int
 494 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 495     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
 496     void *thisdg_attrs, cred_t *cr)
 497 {
 498         tcp_t   *tcp = connp->conn_tcp;
 499         int     *i1 = (int *)invalp;
 500         boolean_t onoff = (*i1 == 0) ? 0 : 1;
 501         boolean_t checkonly;
 502         int     reterr;
 503         tcp_stack_t     *tcps = tcp->tcp_tcps;
 504         conn_opt_arg_t  coas;
 505         uint32_t        val = *((uint32_t *)invalp);
 506 
 507         coas.coa_connp = connp;
 508         coas.coa_ixa = connp->conn_ixa;
 509         coas.coa_ipp = &connp->conn_xmit_ipp;
 510         coas.coa_ancillary = B_FALSE;
 511         coas.coa_changed = 0;
 512 
 513         switch (optset_context) {
 514         case SETFN_OPTCOM_CHECKONLY:
 515                 checkonly = B_TRUE;
 516                 /*
 517                  * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
 518                  * inlen != 0 implies value supplied and
 519                  *      we have to "pretend" to set it.
 520                  * inlen == 0 implies that there is no
 521                  *      value part in T_CHECK request and just validation
 522                  * done elsewhere should be enough, we just return here.
 523                  */
 524                 if (inlen == 0) {
 525                         *outlenp = 0;
 526                         return (0);
 527                 }
 528                 break;
 529         case SETFN_OPTCOM_NEGOTIATE:
 530                 checkonly = B_FALSE;
 531                 break;
 532         case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
 533         case SETFN_CONN_NEGOTIATE:
 534                 checkonly = B_FALSE;
 535                 /*
 536                  * Negotiating local and "association-related" options
 537                  * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
 538                  * primitives is allowed by XTI, but we choose
 539                  * to not implement this style negotiation for Internet
 540                  * protocols (We interpret it is a must for OSI world but
 541                  * optional for Internet protocols) for all options.
 542                  * [ Will do only for the few options that enable test
 543                  * suites that our XTI implementation of this feature
 544                  * works for transports that do allow it ]
 545                  */
 546                 if (!tcp_allow_connopt_set(level, name)) {
 547                         *outlenp = 0;
 548                         return (EINVAL);
 549                 }
 550                 break;
 551         default:
 552                 /*
 553                  * We should never get here
 554                  */
 555                 *outlenp = 0;
 556                 return (EINVAL);
 557         }
 558 
 559         ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
 560             (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
 561 
 562         /*
 563          * For TCP, we should have no ancillary data sent down
 564          * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
 565          * has to be zero.
 566          */
 567         ASSERT(thisdg_attrs == NULL);
 568 
 569         /*
 570          * For fixed length options, no sanity check
 571          * of passed in length is done. It is assumed *_optcom_req()
 572          * routines do the right thing.
 573          */
 574         switch (level) {
 575         case SOL_SOCKET:
 576                 switch (name) {
 577                 case SO_KEEPALIVE:
 578                         if (checkonly) {
 579                                 /* check only case */
 580                                 break;
 581                         }
 582 
 583                         if (!onoff) {
 584                                 if (connp->conn_keepalive) {
 585                                         if (tcp->tcp_ka_tid != 0) {
 586                                                 (void) TCP_TIMER_CANCEL(tcp,
 587                                                     tcp->tcp_ka_tid);
 588                                                 tcp->tcp_ka_tid = 0;
 589                                         }
 590                                         connp->conn_keepalive = 0;
 591                                 }
 592                                 break;
 593                         }
 594                         if (!connp->conn_keepalive) {
 595                                 /* Crank up the keepalive timer */
 596                                 tcp->tcp_ka_last_intrvl = 0;
 597                                 tcp->tcp_ka_tid = TCP_TIMER(tcp,
 598                                     tcp_keepalive_timer, tcp->tcp_ka_interval);
 599                                 connp->conn_keepalive = 1;
 600                         }
 601                         break;
 602                 case SO_SNDBUF: {
 603                         if (*i1 > tcps->tcps_max_buf) {
 604                                 *outlenp = 0;
 605                                 return (ENOBUFS);
 606                         }
 607                         if (checkonly)
 608                                 break;
 609 
 610                         connp->conn_sndbuf = *i1;
 611                         if (tcps->tcps_snd_lowat_fraction != 0) {
 612                                 connp->conn_sndlowat = connp->conn_sndbuf /
 613                                     tcps->tcps_snd_lowat_fraction;
 614                         }
 615                         (void) tcp_maxpsz_set(tcp, B_TRUE);
 616                         /*
 617                          * If we are flow-controlled, recheck the condition.
 618                          * There are apps that increase SO_SNDBUF size when
 619                          * flow-controlled (EWOULDBLOCK), and expect the flow
 620                          * control condition to be lifted right away.
 621                          */
 622                         mutex_enter(&tcp->tcp_non_sq_lock);
 623                         if (tcp->tcp_flow_stopped &&
 624                             TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
 625                                 tcp_clrqfull(tcp);
 626                         }
 627                         mutex_exit(&tcp->tcp_non_sq_lock);
 628                         *outlenp = inlen;
 629                         return (0);
 630                 }
 631                 case SO_RCVBUF:
 632                         if (*i1 > tcps->tcps_max_buf) {
 633                                 *outlenp = 0;
 634                                 return (ENOBUFS);
 635                         }
 636                         /* Silently ignore zero */
 637                         if (!checkonly && *i1 != 0) {
 638                                 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
 639                                 (void) tcp_rwnd_set(tcp, *i1);
 640                         }
 641                         /*
 642                          * XXX should we return the rwnd here
 643                          * and tcp_opt_get ?
 644                          */
 645                         *outlenp = inlen;
 646                         return (0);
 647                 case SO_SND_COPYAVOID:
 648                         if (!checkonly) {
 649                                 if (tcp->tcp_loopback ||
 650                                     (onoff != 1) || !tcp_zcopy_check(tcp)) {
 651                                         *outlenp = 0;
 652                                         return (EOPNOTSUPP);
 653                                 }
 654                                 tcp->tcp_snd_zcopy_aware = 1;
 655                         }
 656                         *outlenp = inlen;
 657                         return (0);
 658                 }
 659                 break;
 660         case IPPROTO_TCP:
 661                 switch (name) {
 662                 case TCP_NODELAY:
 663                         if (!checkonly)
 664                                 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
 665                         break;
 666                 case TCP_NOTIFY_THRESHOLD:
 667                         if (!checkonly)
 668                                 tcp->tcp_first_timer_threshold = *i1;
 669                         break;
 670                 case TCP_ABORT_THRESHOLD:
 671                         if (!checkonly)
 672                                 tcp->tcp_second_timer_threshold = *i1;
 673                         break;
 674                 case TCP_CONN_NOTIFY_THRESHOLD:
 675                         if (!checkonly)
 676                                 tcp->tcp_first_ctimer_threshold = *i1;
 677                         break;
 678                 case TCP_CONN_ABORT_THRESHOLD:
 679                         if (!checkonly)
 680                                 tcp->tcp_second_ctimer_threshold = *i1;
 681                         break;
 682                 case TCP_RECVDSTADDR:
 683                         if (tcp->tcp_state > TCPS_LISTEN) {
 684                                 *outlenp = 0;
 685                                 return (EOPNOTSUPP);
 686                         }
 687                         /* Setting done in conn_opt_set */
 688                         break;
 689                 case TCP_INIT_CWND:
 690                         if (checkonly)
 691                                 break;
 692 
 693                         /*
 694                          * Only allow socket with network configuration
 695                          * privilege to set the initial cwnd to be larger
 696                          * than allowed by RFC 3390.
 697                          */
 698                         if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
 699                                 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
 700                                     != 0) {
 701                                         *outlenp = 0;
 702                                         return (reterr);
 703                                 }
 704                                 if (val > tcp_max_init_cwnd) {
 705                                         *outlenp = 0;
 706                                         return (EINVAL);
 707                                 }
 708                         }
 709 
 710                         tcp->tcp_init_cwnd = val;
 711 
 712                         /*
 713                          * If the socket is connected, AND no outbound data
 714                          * has been sent, reset the actual cwnd values.
 715                          */
 716                         if (tcp->tcp_state == TCPS_ESTABLISHED &&
 717                             tcp->tcp_iss == tcp->tcp_snxt - 1) {
 718                                 tcp->tcp_cwnd =
 719                                     MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
 720                         }
 721                         break;
 722 
 723                 /*
 724                  * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
 725                  * is in milliseconds. TCP_KEEPIDLE is introduced for
 726                  * compatibility with other Unix flavors.
 727                  * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
 728                  * converting the input to milliseconds.
 729                  */
 730                 case TCP_KEEPIDLE:
 731                         *i1 *= 1000;
 732                         /* FALLTHRU */
 733 
 734                 case TCP_KEEPALIVE_THRESHOLD:
 735                         if (checkonly)
 736                                 break;
 737 
 738                         if (*i1 < tcps->tcps_keepalive_interval_low ||
 739                             *i1 > tcps->tcps_keepalive_interval_high) {
 740                                 *outlenp = 0;
 741                                 return (EINVAL);
 742                         }
 743                         if (*i1 != tcp->tcp_ka_interval) {
 744                                 tcp->tcp_ka_interval = *i1;
 745                                 /*
 746                                  * Check if we need to restart the
 747                                  * keepalive timer.
 748                                  */
 749                                 if (tcp->tcp_ka_tid != 0) {
 750                                         ASSERT(connp->conn_keepalive);
 751                                         (void) TCP_TIMER_CANCEL(tcp,
 752                                             tcp->tcp_ka_tid);
 753                                         tcp->tcp_ka_last_intrvl = 0;
 754                                         tcp->tcp_ka_tid = TCP_TIMER(tcp,
 755                                             tcp_keepalive_timer,
 756                                             tcp->tcp_ka_interval);
 757                                 }
 758                         }
 759                         break;
 760 
 761                 /*
 762                  * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
 763                  * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
 764                  * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
 765                  * tcp_ka_cnt.
 766                  */
 767                 case TCP_KEEPCNT:
 768                         if (checkonly)
 769                                 break;
 770 
 771                         if (*i1 == 0) {
 772                                 return (EINVAL);
 773                         } else if (tcp->tcp_ka_rinterval == 0) {
 774                                 /*
 775                                  * When TCP_KEEPCNT is specified without first
 776                                  * specifying a TCP_KEEPINTVL, we infer an
 777                                  * interval based on a tunable specific to our
 778                                  * stack: the tcp_keepalive_abort_interval.
 779                                  * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
 780                                  * the unlikely event that that has been set.)
 781                                  * Given the abort interval's default value of
 782                                  * 480 seconds, low TCP_KEEPCNT values can
 783                                  * result in intervals that exceed the default
 784                                  * maximum RTO of 60 seconds.  Rather than
 785                                  * fail in these cases, we (implicitly) clamp
 786                                  * the interval at the maximum RTO; if the
 787                                  * TCP_KEEPCNT is shortly followed by a
 788                                  * TCP_KEEPINTVL (as we expect), the abort
 789                                  * threshold will be recalculated correctly --
 790                                  * and if a TCP_KEEPINTVL is not forthcoming,
 791                                  * keep-alive will at least operate reasonably
 792                                  * given the underconfigured state.
 793                                  */
 794                                 uint32_t interval;
 795 
 796                                 interval = tcp->tcp_ka_abort_thres / *i1;
 797 
 798                                 if (interval < tcp->tcp_rto_min)
 799                                         interval = tcp->tcp_rto_min;
 800 
 801                                 if (interval > tcp->tcp_rto_max)
 802                                         interval = tcp->tcp_rto_max;
 803 
 804                                 tcp->tcp_ka_rinterval = interval;
 805                         } else {
 806                                 if ((*i1 * tcp->tcp_ka_rinterval) <
 807                                     tcps->tcps_keepalive_abort_interval_low ||
 808                                     (*i1 * tcp->tcp_ka_rinterval) >
 809                                     tcps->tcps_keepalive_abort_interval_high)
 810                                         return (EINVAL);
 811                                 tcp->tcp_ka_abort_thres =
 812                                     (*i1 * tcp->tcp_ka_rinterval);
 813                         }
 814                         tcp->tcp_ka_cnt = *i1;
 815                         break;
 816                 case TCP_KEEPINTVL:
 817                         /*
 818                          * TCP_KEEPINTVL is specified in seconds, but
 819                          * tcp_ka_rinterval is in milliseconds.
 820                          */
 821 
 822                         if (checkonly)
 823                                 break;
 824 
 825                         if ((*i1 * 1000) < tcp->tcp_rto_min ||
 826                             (*i1 * 1000) > tcp->tcp_rto_max)
 827                                 return (EINVAL);
 828 
 829                         if (tcp->tcp_ka_cnt == 0) {
 830                                 tcp->tcp_ka_cnt =
 831                                     tcp->tcp_ka_abort_thres / (*i1 * 1000);
 832                         } else {
 833                                 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
 834                                     tcps->tcps_keepalive_abort_interval_low ||
 835                                     (*i1 * tcp->tcp_ka_cnt * 1000) >
 836                                     tcps->tcps_keepalive_abort_interval_high)
 837                                         return (EINVAL);
 838                                 tcp->tcp_ka_abort_thres =
 839                                     (*i1 * tcp->tcp_ka_cnt * 1000);
 840                         }
 841                         tcp->tcp_ka_rinterval = *i1 * 1000;
 842                         break;
 843                 case TCP_KEEPALIVE_ABORT_THRESHOLD:
 844                         if (!checkonly) {
 845                                 if (*i1 <
 846                                     tcps->tcps_keepalive_abort_interval_low ||
 847                                     *i1 >
 848                                     tcps->tcps_keepalive_abort_interval_high) {
 849                                         *outlenp = 0;
 850                                         return (EINVAL);
 851                                 }
 852                                 tcp->tcp_ka_abort_thres = *i1;
 853                                 tcp->tcp_ka_cnt = 0;
 854                                 tcp->tcp_ka_rinterval = 0;
 855                         }
 856                         break;
 857                 case TCP_CORK:
 858                         if (!checkonly) {
 859                                 /*
 860                                  * if tcp->tcp_cork was set and is now
 861                                  * being unset, we have to make sure that
 862                                  * the remaining data gets sent out. Also
 863                                  * unset tcp->tcp_cork so that tcp_wput_data()
 864                                  * can send data even if it is less than mss
 865                                  */
 866                                 if (tcp->tcp_cork && onoff == 0 &&
 867                                     tcp->tcp_unsent > 0) {
 868                                         tcp->tcp_cork = B_FALSE;
 869                                         tcp_wput_data(tcp, NULL, B_FALSE);
 870                                 }
 871                                 tcp->tcp_cork = onoff;
 872                         }
 873                         break;
 874                 case TCP_RTO_INITIAL:
 875                         if (checkonly || val == 0)
 876                                 break;
 877 
 878                         /*
 879                          * Sanity checks
 880                          *
 881                          * The initial RTO should be bounded by the minimum
 882                          * and maximum RTO.  And it should also be smaller
 883                          * than the connect attempt abort timeout.  Otherwise,
 884                          * the connection won't be aborted in a period
 885                          * reasonably close to that timeout.
 886                          */
 887                         if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
 888                             val > tcp->tcp_second_ctimer_threshold ||
 889                             val < tcps->tcps_rexmit_interval_initial_low ||
 890                             val > tcps->tcps_rexmit_interval_initial_high) {
 891                                 *outlenp = 0;
 892                                 return (EINVAL);
 893                         }
 894                         tcp->tcp_rto_initial = val;
 895 
 896                         /*
 897                          * If TCP has not sent anything, need to re-calculate
 898                          * tcp_rto.  Otherwise, this option change does not
 899                          * really affect anything.
 900                          */
 901                         if (tcp->tcp_state >= TCPS_SYN_SENT)
 902                                 break;
 903 
 904                         tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
 905                         tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
 906                         tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
 907                             tcps->tcps_conn_grace_period);
 908                         break;
 909                 case TCP_RTO_MIN:
 910                         if (checkonly || val == 0)
 911                                 break;
 912 
 913                         if (val < tcps->tcps_rexmit_interval_min_low ||
 914                             val > tcps->tcps_rexmit_interval_min_high ||
 915                             val > tcp->tcp_rto_max) {
 916                                 *outlenp = 0;
 917                                 return (EINVAL);
 918                         }
 919                         tcp->tcp_rto_min = val;
 920                         if (tcp->tcp_rto < val)
 921                                 tcp->tcp_rto = val;
 922                         break;
 923                 case TCP_RTO_MAX:
 924                         if (checkonly || val == 0)
 925                                 break;
 926 
 927                         /*
 928                          * Sanity checks
 929                          *
 930                          * The maximum RTO should not be larger than the
 931                          * connection abort timeout.  Otherwise, the
 932                          * connection won't be aborted in a period reasonably
 933                          * close to that timeout.
 934                          */
 935                         if (val < tcps->tcps_rexmit_interval_max_low ||
 936                             val > tcps->tcps_rexmit_interval_max_high ||
 937                             val < tcp->tcp_rto_min ||
 938                             val > tcp->tcp_second_timer_threshold) {
 939                                 *outlenp = 0;
 940                                 return (EINVAL);
 941                         }
 942                         tcp->tcp_rto_max = val;
 943                         if (tcp->tcp_rto > val)
 944                                 tcp->tcp_rto = val;
 945                         break;
 946                 case TCP_LINGER2:
 947                         if (checkonly || *i1 == 0)
 948                                 break;
 949 
 950                         /*
 951                          * Note that the option value's unit is second.  And
 952                          * the value should be bigger than the private
 953                          * parameter tcp_fin_wait_2_flush_interval's lower
 954                          * bound and smaller than the current value of that
 955                          * parameter.  It should be smaller than the current
 956                          * value to avoid an app setting TCP_LINGER2 to a big
 957                          * value, causing resource to be held up too long in
 958                          * FIN-WAIT-2 state.
 959                          */
 960                         if (*i1 < 0 ||
 961                             tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
 962                             *i1 ||
 963                             tcps->tcps_fin_wait_2_flush_interval/SECONDS <
 964                             *i1) {
 965                                 *outlenp = 0;
 966                                 return (EINVAL);
 967                         }
 968                         tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
 969                         break;
 970                 default:
 971                         break;
 972                 }
 973                 break;
 974         case IPPROTO_IP:
 975                 if (connp->conn_family != AF_INET) {
 976                         *outlenp = 0;
 977                         return (EINVAL);
 978                 }
 979                 switch (name) {
 980                 case IP_SEC_OPT:
 981                         /*
 982                          * We should not allow policy setting after
 983                          * we start listening for connections.
 984                          */
 985                         if (tcp->tcp_state == TCPS_LISTEN) {
 986                                 return (EINVAL);
 987                         }
 988                         break;
 989                 }
 990                 break;
 991         case IPPROTO_IPV6:
 992                 /*
 993                  * IPPROTO_IPV6 options are only supported for sockets
 994                  * that are using IPv6 on the wire.
 995                  */
 996                 if (connp->conn_ipversion != IPV6_VERSION) {
 997                         *outlenp = 0;
 998                         return (EINVAL);
 999                 }
1000 
1001                 switch (name) {
1002                 case IPV6_RECVPKTINFO:
1003                         if (!checkonly) {
1004                                 /* Force it to be sent up with the next msg */
1005                                 tcp->tcp_recvifindex = 0;
1006                         }
1007                         break;
1008                 case IPV6_RECVTCLASS:
1009                         if (!checkonly) {
1010                                 /* Force it to be sent up with the next msg */
1011                                 tcp->tcp_recvtclass = 0xffffffffU;
1012                         }
1013                         break;
1014                 case IPV6_RECVHOPLIMIT:
1015                         if (!checkonly) {
1016                                 /* Force it to be sent up with the next msg */
1017                                 tcp->tcp_recvhops = 0xffffffffU;
1018                         }
1019                         break;
1020                 case IPV6_PKTINFO:
1021                         /* This is an extra check for TCP */
1022                         if (inlen == sizeof (struct in6_pktinfo)) {
1023                                 struct in6_pktinfo *pkti;
1024 
1025                                 pkti = (struct in6_pktinfo *)invalp;
1026                                 /*
1027                                  * RFC 3542 states that ipi6_addr must be
1028                                  * the unspecified address when setting the
1029                                  * IPV6_PKTINFO sticky socket option on a
1030                                  * TCP socket.
1031                                  */
1032                                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1033                                         return (EINVAL);
1034                         }
1035                         break;
1036                 case IPV6_SEC_OPT:
1037                         /*
1038                          * We should not allow policy setting after
1039                          * we start listening for connections.
1040                          */
1041                         if (tcp->tcp_state == TCPS_LISTEN) {
1042                                 return (EINVAL);
1043                         }
1044                         break;
1045                 }
1046                 break;
1047         }
1048         reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1049             checkonly, cr);
1050         if (reterr != 0) {
1051                 *outlenp = 0;
1052                 return (reterr);
1053         }
1054 
1055         /*
1056          * Common case of OK return with outval same as inval
1057          */
1058         if (invalp != outvalp) {
1059                 /* don't trust bcopy for identical src/dst */
1060                 (void) bcopy(invalp, outvalp, inlen);
1061         }
1062         *outlenp = inlen;
1063 
1064         if (coas.coa_changed & COA_HEADER_CHANGED) {
1065                 /* If we are connected we rebuilt the headers */
1066                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1067                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1068                         reterr = tcp_build_hdrs(tcp);
1069                         if (reterr != 0)
1070                                 return (reterr);
1071                 }
1072         }
1073         if (coas.coa_changed & COA_ROUTE_CHANGED) {
1074                 in6_addr_t nexthop;
1075 
1076                 /*
1077                  * If we are connected we re-cache the information.
1078                  * We ignore errors to preserve BSD behavior.
1079                  * Note that we don't redo IPsec policy lookup here
1080                  * since the final destination (or source) didn't change.
1081                  */
1082                 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1083                     &connp->conn_faddr_v6, &nexthop);
1084 
1085                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1086                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1087                         (void) ip_attr_connect(connp, connp->conn_ixa,
1088                             &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1089                             &nexthop, connp->conn_fport, NULL, NULL,
1090                             IPDF_VERIFY_DST);
1091                 }
1092         }
1093         if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1094                 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1095         }
1096         if (coas.coa_changed & COA_WROFF_CHANGED) {
1097                 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1098                     tcps->tcps_wroff_xtra;
1099                 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1100                     connp->conn_wroff);
1101         }
1102         if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1103                 if (IPCL_IS_NONSTR(connp))
1104                         proto_set_rx_oob_opt(connp, onoff);
1105         }
1106         return (0);
1107 }