1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/stream.h>
  28 #define _SUN_TPI_VERSION 2
  29 #include <sys/tihdr.h>
  30 #include <sys/socket.h>
  31 #include <sys/xti_xtiopt.h>
  32 #include <sys/xti_inet.h>
  33 #include <sys/policy.h>
  34 
  35 #include <inet/common.h>
  36 #include <netinet/ip6.h>
  37 #include <inet/ip.h>
  38 
  39 #include <netinet/in.h>
  40 #include <netinet/tcp.h>
  41 #include <inet/optcom.h>
  42 #include <inet/proto_set.h>
  43 #include <inet/tcp_impl.h>
  44 
  45 static int      tcp_opt_default(queue_t *, int, int, uchar_t *);
  46 
  47 /*
  48  * Table of all known options handled on a TCP protocol stack.
  49  *
  50  * Note: This table contains options processed by both TCP and IP levels
  51  *       and is the superset of options that can be performed on a TCP over IP
  52  *       stack.
  53  */
  54 opdes_t tcp_opt_arr[] = {
  55 
  56 { SO_LINGER,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  57         sizeof (struct linger), 0 },
  58 
  59 { SO_DEBUG,     SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  60 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  61 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  62 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  63         },
  64 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  65 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  66 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  67 { SO_TYPE,      SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  68 { SO_SNDBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  69 { SO_RCVBUF,    SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  70 { SO_SNDTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  71         sizeof (struct timeval), 0 },
  72 { SO_RCVTIMEO,  SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
  73         sizeof (struct timeval), 0 },
  74 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  75         },
  76 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  77 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  78         0 },
  79 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  80         0 },
  81 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
  82         0 },
  83 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
  84         0 },
  85 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
  86 
  87 { SO_DOMAIN,    SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  88 
  89 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
  90 
  91 { TCP_NODELAY,  IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
  92         },
  93 { TCP_MAXSEG,   IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
  94         536 },
  95 
  96 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
  97         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
  98 
  99 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 100         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 101 
 102 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 103         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 104 
 105 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
 106         OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
 107 
 108 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
 109         0 },
 110 
 111 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
 112         sizeof (int), 0 },
 113 
 114 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
 115         },
 116 
 117 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
 118         sizeof (int), 0 },
 119 
 120 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 121         sizeof (int), 0 },
 122 
 123 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 124 
 125 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 126 
 127 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 128 
 129 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
 130         sizeof (int), 0 },
 131 
 132 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 133 
 134 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 135 
 136 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 137 
 138 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 139 
 140 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 141 
 142 { IP_OPTIONS,   IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 143         (OP_VARLEN|OP_NODEFAULT),
 144         IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 145 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 146         (OP_VARLEN|OP_NODEFAULT),
 147         IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 148 
 149 { IP_TOS,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 150 { T_IP_TOS,     IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 151 { IP_TTL,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 152         sizeof (int), -1 /* not initialized */ },
 153 
 154 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 155         sizeof (ipsec_req_t), -1 /* not initialized */ },
 156 
 157 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
 158         sizeof (int),   0 /* no ifindex */ },
 159 
 160 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
 161         sizeof (int), 0 },
 162 
 163 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
 164         sizeof (int), -1 /* not initialized */ },
 165 
 166 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 167         sizeof (int),   0 /* no ifindex */ },
 168 
 169 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 170 
 171 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
 172         sizeof (in_addr_t),     -1 /* not initialized  */ },
 173 
 174 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
 175         sizeof (int), 0 },
 176 
 177 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 178         (OP_NODEFAULT|OP_VARLEN),
 179         sizeof (struct in6_pktinfo), -1 /* not initialized */ },
 180 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 181         OP_NODEFAULT,
 182         sizeof (sin6_t), -1 /* not initialized */ },
 183 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 184         (OP_VARLEN|OP_NODEFAULT), 255*8,
 185         -1 /* not initialized */ },
 186 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 187         (OP_VARLEN|OP_NODEFAULT), 255*8,
 188         -1 /* not initialized */ },
 189 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 190         (OP_VARLEN|OP_NODEFAULT), 255*8,
 191         -1 /* not initialized */ },
 192 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 193         (OP_VARLEN|OP_NODEFAULT), 255*8,
 194         -1 /* not initialized */ },
 195 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 196         OP_NODEFAULT,
 197         sizeof (int), -1 /* not initialized */ },
 198 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
 199         OP_NODEFAULT,
 200         sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
 201 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 202         sizeof (int), 0 },
 203 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 204         sizeof (int), 0 },
 205 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 206         sizeof (int), 0 },
 207 
 208 /* Enable receipt of ancillary data */
 209 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 210         sizeof (int), 0 },
 211 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 212         sizeof (int), 0 },
 213 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 214         sizeof (int), 0 },
 215 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 216         sizeof (int), 0 },
 217 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 218         sizeof (int), 0 },
 219 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 220         sizeof (int), 0 },
 221 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 222         sizeof (int), 0 },
 223 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 224         sizeof (int), 0 },
 225 
 226 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
 227         sizeof (ipsec_req_t), -1 /* not initialized */ },
 228 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
 229         sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
 230 };
 231 
 232 /*
 233  * Table of all supported levels
 234  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 235  * any supported options so we need this info separately.
 236  *
 237  * This is needed only for topmost tpi providers and is used only by
 238  * XTI interfaces.
 239  */
 240 optlevel_t      tcp_valid_levels_arr[] = {
 241         XTI_GENERIC,
 242         SOL_SOCKET,
 243         IPPROTO_TCP,
 244         IPPROTO_IP,
 245         IPPROTO_IPV6
 246 };
 247 
 248 
 249 #define TCP_OPT_ARR_CNT         A_CNT(tcp_opt_arr)
 250 #define TCP_VALID_LEVELS_CNT    A_CNT(tcp_valid_levels_arr)
 251 
 252 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
 253 
 254 /*
 255  * Initialize option database object for TCP
 256  *
 257  * This object represents database of options to search passed to
 258  * {sock,tpi}optcom_req() interface routine to take care of option
 259  * management and associated methods.
 260  */
 261 
 262 optdb_obj_t tcp_opt_obj = {
 263         tcp_opt_default,        /* TCP default value function pointer */
 264         tcp_tpi_opt_get,        /* TCP get function pointer */
 265         tcp_tpi_opt_set,        /* TCP set function pointer */
 266         TCP_OPT_ARR_CNT,        /* TCP option database count of entries */
 267         tcp_opt_arr,            /* TCP option database */
 268         TCP_VALID_LEVELS_CNT,   /* TCP valid level count of entries */
 269         tcp_valid_levels_arr    /* TCP valid level array */
 270 };
 271 
 272 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
 273 
 274 /*
 275  * Some TCP options can be "set" by requesting them in the option
 276  * buffer. This is needed for XTI feature test though we do not
 277  * allow it in general. We interpret that this mechanism is more
 278  * applicable to OSI protocols and need not be allowed in general.
 279  * This routine filters out options for which it is not allowed (most)
 280  * and lets through those (few) for which it is. [ The XTI interface
 281  * test suite specifics will imply that any XTI_GENERIC level XTI_* if
 282  * ever implemented will have to be allowed here ].
 283  */
 284 static boolean_t
 285 tcp_allow_connopt_set(int level, int name)
 286 {
 287 
 288         switch (level) {
 289         case IPPROTO_TCP:
 290                 switch (name) {
 291                 case TCP_NODELAY:
 292                         return (B_TRUE);
 293                 default:
 294                         return (B_FALSE);
 295                 }
 296                 /*NOTREACHED*/
 297         default:
 298                 return (B_FALSE);
 299         }
 300         /*NOTREACHED*/
 301 }
 302 
 303 /*
 304  * This routine gets default values of certain options whose default
 305  * values are maintained by protocol specific code
 306  */
 307 /* ARGSUSED */
 308 static int
 309 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
 310 {
 311         int32_t *i1 = (int32_t *)ptr;
 312         tcp_stack_t     *tcps = Q_TO_TCP(q)->tcp_tcps;
 313 
 314         switch (level) {
 315         case IPPROTO_TCP:
 316                 switch (name) {
 317                 case TCP_NOTIFY_THRESHOLD:
 318                         *i1 = tcps->tcps_ip_notify_interval;
 319                         break;
 320                 case TCP_ABORT_THRESHOLD:
 321                         *i1 = tcps->tcps_ip_abort_interval;
 322                         break;
 323                 case TCP_CONN_NOTIFY_THRESHOLD:
 324                         *i1 = tcps->tcps_ip_notify_cinterval;
 325                         break;
 326                 case TCP_CONN_ABORT_THRESHOLD:
 327                         *i1 = tcps->tcps_ip_abort_cinterval;
 328                         break;
 329                 default:
 330                         return (-1);
 331                 }
 332                 break;
 333         case IPPROTO_IP:
 334                 switch (name) {
 335                 case IP_TTL:
 336                         *i1 = tcps->tcps_ipv4_ttl;
 337                         break;
 338                 default:
 339                         return (-1);
 340                 }
 341                 break;
 342         case IPPROTO_IPV6:
 343                 switch (name) {
 344                 case IPV6_UNICAST_HOPS:
 345                         *i1 = tcps->tcps_ipv6_hoplimit;
 346                         break;
 347                 default:
 348                         return (-1);
 349                 }
 350                 break;
 351         default:
 352                 return (-1);
 353         }
 354         return (sizeof (int));
 355 }
 356 
 357 /*
 358  * TCP routine to get the values of options.
 359  */
 360 int
 361 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
 362 {
 363         int             *i1 = (int *)ptr;
 364         tcp_t           *tcp = connp->conn_tcp;
 365         conn_opt_arg_t  coas;
 366         int             retval;
 367 
 368         coas.coa_connp = connp;
 369         coas.coa_ixa = connp->conn_ixa;
 370         coas.coa_ipp = &connp->conn_xmit_ipp;
 371         coas.coa_ancillary = B_FALSE;
 372         coas.coa_changed = 0;
 373 
 374         switch (level) {
 375         case SOL_SOCKET:
 376                 switch (name) {
 377                 case SO_SND_COPYAVOID:
 378                         *i1 = tcp->tcp_snd_zcopy_on ?
 379                             SO_SND_COPYAVOID : 0;
 380                         return (sizeof (int));
 381                 case SO_ACCEPTCONN:
 382                         *i1 = (tcp->tcp_state == TCPS_LISTEN);
 383                         return (sizeof (int));
 384                 }
 385                 break;
 386         case IPPROTO_TCP:
 387                 switch (name) {
 388                 case TCP_NODELAY:
 389                         *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
 390                         return (sizeof (int));
 391                 case TCP_MAXSEG:
 392                         *i1 = tcp->tcp_mss;
 393                         return (sizeof (int));
 394                 case TCP_NOTIFY_THRESHOLD:
 395                         *i1 = (int)tcp->tcp_first_timer_threshold;
 396                         return (sizeof (int));
 397                 case TCP_ABORT_THRESHOLD:
 398                         *i1 = tcp->tcp_second_timer_threshold;
 399                         return (sizeof (int));
 400                 case TCP_CONN_NOTIFY_THRESHOLD:
 401                         *i1 = tcp->tcp_first_ctimer_threshold;
 402                         return (sizeof (int));
 403                 case TCP_CONN_ABORT_THRESHOLD:
 404                         *i1 = tcp->tcp_second_ctimer_threshold;
 405                         return (sizeof (int));
 406                 case TCP_INIT_CWND:
 407                         *i1 = tcp->tcp_init_cwnd;
 408                         return (sizeof (int));
 409                 case TCP_KEEPALIVE_THRESHOLD:
 410                         *i1 = tcp->tcp_ka_interval;
 411                         return (sizeof (int));
 412 
 413                 /*
 414                  * TCP_KEEPIDLE expects value in seconds, but
 415                  * tcp_ka_interval is in milliseconds.
 416                  */
 417                 case TCP_KEEPIDLE:
 418                         *i1 = tcp->tcp_ka_interval / 1000;
 419                         return (sizeof (int));
 420                 case TCP_KEEPCNT:
 421                         *i1 = tcp->tcp_ka_cnt;
 422                         return (sizeof (int));
 423 
 424                 /*
 425                  * TCP_KEEPINTVL expects value in seconds, but
 426                  * tcp_ka_rinterval is in milliseconds.
 427                  */
 428                 case TCP_KEEPINTVL:
 429                         *i1 = tcp->tcp_ka_rinterval / 1000;
 430                         return (sizeof (int));
 431                 case TCP_KEEPALIVE_ABORT_THRESHOLD:
 432                         *i1 = tcp->tcp_ka_abort_thres;
 433                         return (sizeof (int));
 434                 case TCP_CORK:
 435                         *i1 = tcp->tcp_cork;
 436                         return (sizeof (int));
 437                 case TCP_RTO_INITIAL:
 438                         *i1 = tcp->tcp_rto_initial;
 439                         return (sizeof (uint32_t));
 440                 case TCP_RTO_MIN:
 441                         *i1 = tcp->tcp_rto_min;
 442                         return (sizeof (uint32_t));
 443                 case TCP_RTO_MAX:
 444                         *i1 = tcp->tcp_rto_max;
 445                         return (sizeof (uint32_t));
 446                 case TCP_LINGER2:
 447                         *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
 448                         return (sizeof (int));
 449                 }
 450                 break;
 451         case IPPROTO_IP:
 452                 if (connp->conn_family != AF_INET)
 453                         return (-1);
 454                 switch (name) {
 455                 case IP_OPTIONS:
 456                 case T_IP_OPTIONS:
 457                         /* Caller ensures enough space */
 458                         return (ip_opt_get_user(connp, ptr));
 459                 default:
 460                         break;
 461                 }
 462                 break;
 463 
 464         case IPPROTO_IPV6:
 465                 /*
 466                  * IPPROTO_IPV6 options are only supported for sockets
 467                  * that are using IPv6 on the wire.
 468                  */
 469                 if (connp->conn_ipversion != IPV6_VERSION) {
 470                         return (-1);
 471                 }
 472                 switch (name) {
 473                 case IPV6_PATHMTU:
 474                         if (tcp->tcp_state < TCPS_ESTABLISHED)
 475                                 return (-1);
 476                         break;
 477                 }
 478                 break;
 479         }
 480         mutex_enter(&connp->conn_lock);
 481         retval = conn_opt_get(&coas, level, name, ptr);
 482         mutex_exit(&connp->conn_lock);
 483         return (retval);
 484 }
 485 
 486 /*
 487  * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
 488  * Parameters are assumed to be verified by the caller.
 489  */
 490 /* ARGSUSED */
 491 int
 492 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
 493     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
 494     void *thisdg_attrs, cred_t *cr)
 495 {
 496         tcp_t   *tcp = connp->conn_tcp;
 497         int     *i1 = (int *)invalp;
 498         boolean_t onoff = (*i1 == 0) ? 0 : 1;
 499         boolean_t checkonly;
 500         int     reterr;
 501         tcp_stack_t     *tcps = tcp->tcp_tcps;
 502         conn_opt_arg_t  coas;
 503         uint32_t        val = *((uint32_t *)invalp);
 504 
 505         coas.coa_connp = connp;
 506         coas.coa_ixa = connp->conn_ixa;
 507         coas.coa_ipp = &connp->conn_xmit_ipp;
 508         coas.coa_ancillary = B_FALSE;
 509         coas.coa_changed = 0;
 510 
 511         switch (optset_context) {
 512         case SETFN_OPTCOM_CHECKONLY:
 513                 checkonly = B_TRUE;
 514                 /*
 515                  * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
 516                  * inlen != 0 implies value supplied and
 517                  *      we have to "pretend" to set it.
 518                  * inlen == 0 implies that there is no
 519                  *      value part in T_CHECK request and just validation
 520                  * done elsewhere should be enough, we just return here.
 521                  */
 522                 if (inlen == 0) {
 523                         *outlenp = 0;
 524                         return (0);
 525                 }
 526                 break;
 527         case SETFN_OPTCOM_NEGOTIATE:
 528                 checkonly = B_FALSE;
 529                 break;
 530         case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
 531         case SETFN_CONN_NEGOTIATE:
 532                 checkonly = B_FALSE;
 533                 /*
 534                  * Negotiating local and "association-related" options
 535                  * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
 536                  * primitives is allowed by XTI, but we choose
 537                  * to not implement this style negotiation for Internet
 538                  * protocols (We interpret it is a must for OSI world but
 539                  * optional for Internet protocols) for all options.
 540                  * [ Will do only for the few options that enable test
 541                  * suites that our XTI implementation of this feature
 542                  * works for transports that do allow it ]
 543                  */
 544                 if (!tcp_allow_connopt_set(level, name)) {
 545                         *outlenp = 0;
 546                         return (EINVAL);
 547                 }
 548                 break;
 549         default:
 550                 /*
 551                  * We should never get here
 552                  */
 553                 *outlenp = 0;
 554                 return (EINVAL);
 555         }
 556 
 557         ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
 558             (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
 559 
 560         /*
 561          * For TCP, we should have no ancillary data sent down
 562          * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
 563          * has to be zero.
 564          */
 565         ASSERT(thisdg_attrs == NULL);
 566 
 567         /*
 568          * For fixed length options, no sanity check
 569          * of passed in length is done. It is assumed *_optcom_req()
 570          * routines do the right thing.
 571          */
 572         switch (level) {
 573         case SOL_SOCKET:
 574                 switch (name) {
 575                 case SO_KEEPALIVE:
 576                         if (checkonly) {
 577                                 /* check only case */
 578                                 break;
 579                         }
 580 
 581                         if (!onoff) {
 582                                 if (connp->conn_keepalive) {
 583                                         if (tcp->tcp_ka_tid != 0) {
 584                                                 (void) TCP_TIMER_CANCEL(tcp,
 585                                                     tcp->tcp_ka_tid);
 586                                                 tcp->tcp_ka_tid = 0;
 587                                         }
 588                                         connp->conn_keepalive = 0;
 589                                 }
 590                                 break;
 591                         }
 592                         if (!connp->conn_keepalive) {
 593                                 /* Crank up the keepalive timer */
 594                                 tcp->tcp_ka_last_intrvl = 0;
 595                                 tcp->tcp_ka_tid = TCP_TIMER(tcp,
 596                                     tcp_keepalive_timer, tcp->tcp_ka_interval);
 597                                 connp->conn_keepalive = 1;
 598                         }
 599                         break;
 600                 case SO_SNDBUF: {
 601                         if (*i1 > tcps->tcps_max_buf) {
 602                                 *outlenp = 0;
 603                                 return (ENOBUFS);
 604                         }
 605                         if (checkonly)
 606                                 break;
 607 
 608                         connp->conn_sndbuf = *i1;
 609                         if (tcps->tcps_snd_lowat_fraction != 0) {
 610                                 connp->conn_sndlowat = connp->conn_sndbuf /
 611                                     tcps->tcps_snd_lowat_fraction;
 612                         }
 613                         (void) tcp_maxpsz_set(tcp, B_TRUE);
 614                         /*
 615                          * If we are flow-controlled, recheck the condition.
 616                          * There are apps that increase SO_SNDBUF size when
 617                          * flow-controlled (EWOULDBLOCK), and expect the flow
 618                          * control condition to be lifted right away.
 619                          */
 620                         mutex_enter(&tcp->tcp_non_sq_lock);
 621                         if (tcp->tcp_flow_stopped &&
 622                             TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
 623                                 tcp_clrqfull(tcp);
 624                         }
 625                         mutex_exit(&tcp->tcp_non_sq_lock);
 626                         *outlenp = inlen;
 627                         return (0);
 628                 }
 629                 case SO_RCVBUF:
 630                         if (*i1 > tcps->tcps_max_buf) {
 631                                 *outlenp = 0;
 632                                 return (ENOBUFS);
 633                         }
 634                         /* Silently ignore zero */
 635                         if (!checkonly && *i1 != 0) {
 636                                 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
 637                                 (void) tcp_rwnd_set(tcp, *i1);
 638                         }
 639                         /*
 640                          * XXX should we return the rwnd here
 641                          * and tcp_opt_get ?
 642                          */
 643                         *outlenp = inlen;
 644                         return (0);
 645                 case SO_SND_COPYAVOID:
 646                         if (!checkonly) {
 647                                 if (tcp->tcp_loopback ||
 648                                     (onoff != 1) || !tcp_zcopy_check(tcp)) {
 649                                         *outlenp = 0;
 650                                         return (EOPNOTSUPP);
 651                                 }
 652                                 tcp->tcp_snd_zcopy_aware = 1;
 653                         }
 654                         *outlenp = inlen;
 655                         return (0);
 656                 }
 657                 break;
 658         case IPPROTO_TCP:
 659                 switch (name) {
 660                 case TCP_NODELAY:
 661                         if (!checkonly)
 662                                 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
 663                         break;
 664                 case TCP_NOTIFY_THRESHOLD:
 665                         if (!checkonly)
 666                                 tcp->tcp_first_timer_threshold = *i1;
 667                         break;
 668                 case TCP_ABORT_THRESHOLD:
 669                         if (!checkonly)
 670                                 tcp->tcp_second_timer_threshold = *i1;
 671                         break;
 672                 case TCP_CONN_NOTIFY_THRESHOLD:
 673                         if (!checkonly)
 674                                 tcp->tcp_first_ctimer_threshold = *i1;
 675                         break;
 676                 case TCP_CONN_ABORT_THRESHOLD:
 677                         if (!checkonly)
 678                                 tcp->tcp_second_ctimer_threshold = *i1;
 679                         break;
 680                 case TCP_RECVDSTADDR:
 681                         if (tcp->tcp_state > TCPS_LISTEN) {
 682                                 *outlenp = 0;
 683                                 return (EOPNOTSUPP);
 684                         }
 685                         /* Setting done in conn_opt_set */
 686                         break;
 687                 case TCP_INIT_CWND:
 688                         if (checkonly)
 689                                 break;
 690 
 691                         /*
 692                          * Only allow socket with network configuration
 693                          * privilege to set the initial cwnd to be larger
 694                          * than allowed by RFC 3390.
 695                          */
 696                         if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
 697                                 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
 698                                     != 0) {
 699                                         *outlenp = 0;
 700                                         return (reterr);
 701                                 }
 702                                 if (val > tcp_max_init_cwnd) {
 703                                         *outlenp = 0;
 704                                         return (EINVAL);
 705                                 }
 706                         }
 707 
 708                         tcp->tcp_init_cwnd = val;
 709 
 710                         /*
 711                          * If the socket is connected, AND no outbound data
 712                          * has been sent, reset the actual cwnd values.
 713                          */
 714                         if (tcp->tcp_state == TCPS_ESTABLISHED &&
 715                             tcp->tcp_iss == tcp->tcp_snxt - 1) {
 716                                 tcp->tcp_cwnd =
 717                                     MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
 718                         }
 719                         break;
 720 
 721                 /*
 722                  * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
 723                  * is in milliseconds. TCP_KEEPIDLE is introduced for
 724                  * compatibility with other Unix flavors.
 725                  * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
 726                  * converting the input to milliseconds.
 727                  */
 728                 case TCP_KEEPIDLE:
 729                         *i1 *= 1000;
 730                         /* FALLTHRU */
 731 
 732                 case TCP_KEEPALIVE_THRESHOLD:
 733                         if (checkonly)
 734                                 break;
 735 
 736                         if (*i1 < tcps->tcps_keepalive_interval_low ||
 737                             *i1 > tcps->tcps_keepalive_interval_high) {
 738                                 *outlenp = 0;
 739                                 return (EINVAL);
 740                         }
 741                         if (*i1 != tcp->tcp_ka_interval) {
 742                                 tcp->tcp_ka_interval = *i1;
 743                                 /*
 744                                  * Check if we need to restart the
 745                                  * keepalive timer.
 746                                  */
 747                                 if (tcp->tcp_ka_tid != 0) {
 748                                         ASSERT(connp->conn_keepalive);
 749                                         (void) TCP_TIMER_CANCEL(tcp,
 750                                             tcp->tcp_ka_tid);
 751                                         tcp->tcp_ka_last_intrvl = 0;
 752                                         tcp->tcp_ka_tid = TCP_TIMER(tcp,
 753                                             tcp_keepalive_timer,
 754                                             tcp->tcp_ka_interval);
 755                                 }
 756                         }
 757                         break;
 758 
 759                 /*
 760                  * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
 761                  * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
 762                  * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
 763                  * tcp_ka_cnt.
 764                  */
 765                 case TCP_KEEPCNT:
 766                         if (checkonly)
 767                                 break;
 768 
 769                         if (*i1 == 0) {
 770                                 return (EINVAL);
 771                         } else if (tcp->tcp_ka_rinterval == 0) {
 772                                 /*
 773                                  * When TCP_KEEPCNT is specified without first
 774                                  * specifying a TCP_KEEPINTVL, we infer an
 775                                  * interval based on a tunable specific to our
 776                                  * stack: the tcp_keepalive_abort_interval.
 777                                  * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
 778                                  * the unlikely event that that has been set.)
 779                                  * Given the abort interval's default value of
 780                                  * 480 seconds, low TCP_KEEPCNT values can
 781                                  * result in intervals that exceed the default
 782                                  * maximum RTO of 60 seconds.  Rather than
 783                                  * fail in these cases, we (implicitly) clamp
 784                                  * the interval at the maximum RTO; if the
 785                                  * TCP_KEEPCNT is shortly followed by a
 786                                  * TCP_KEEPINTVL (as we expect), the abort
 787                                  * threshold will be recalculated correctly --
 788                                  * and if a TCP_KEEPINTVL is not forthcoming,
 789                                  * keep-alive will at least operate reasonably
 790                                  * given the underconfigured state.
 791                                  */
 792                                 uint32_t interval;
 793 
 794                                 interval = tcp->tcp_ka_abort_thres / *i1;
 795 
 796                                 if (interval < tcp->tcp_rto_min)
 797                                         interval = tcp->tcp_rto_min;
 798 
 799                                 if (interval > tcp->tcp_rto_max)
 800                                         interval = tcp->tcp_rto_max;
 801 
 802                                 tcp->tcp_ka_rinterval = interval;
 803                         } else {
 804                                 if ((*i1 * tcp->tcp_ka_rinterval) <
 805                                     tcps->tcps_keepalive_abort_interval_low ||
 806                                     (*i1 * tcp->tcp_ka_rinterval) >
 807                                     tcps->tcps_keepalive_abort_interval_high)
 808                                         return (EINVAL);
 809                                 tcp->tcp_ka_abort_thres =
 810                                     (*i1 * tcp->tcp_ka_rinterval);
 811                         }
 812                         tcp->tcp_ka_cnt = *i1;
 813                         break;
 814                 case TCP_KEEPINTVL:
 815                         /*
 816                          * TCP_KEEPINTVL is specified in seconds, but
 817                          * tcp_ka_rinterval is in milliseconds.
 818                          */
 819 
 820                         if (checkonly)
 821                                 break;
 822 
 823                         if ((*i1 * 1000) < tcp->tcp_rto_min ||
 824                             (*i1 * 1000) > tcp->tcp_rto_max)
 825                                 return (EINVAL);
 826 
 827                         if (tcp->tcp_ka_cnt == 0) {
 828                                 tcp->tcp_ka_cnt =
 829                                     tcp->tcp_ka_abort_thres / (*i1 * 1000);
 830                         } else {
 831                                 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
 832                                     tcps->tcps_keepalive_abort_interval_low ||
 833                                     (*i1 * tcp->tcp_ka_cnt * 1000) >
 834                                     tcps->tcps_keepalive_abort_interval_high)
 835                                         return (EINVAL);
 836                                 tcp->tcp_ka_abort_thres =
 837                                     (*i1 * tcp->tcp_ka_cnt * 1000);
 838                         }
 839                         tcp->tcp_ka_rinterval = *i1 * 1000;
 840                         break;
 841                 case TCP_KEEPALIVE_ABORT_THRESHOLD:
 842                         if (!checkonly) {
 843                                 if (*i1 <
 844                                     tcps->tcps_keepalive_abort_interval_low ||
 845                                     *i1 >
 846                                     tcps->tcps_keepalive_abort_interval_high) {
 847                                         *outlenp = 0;
 848                                         return (EINVAL);
 849                                 }
 850                                 tcp->tcp_ka_abort_thres = *i1;
 851                                 tcp->tcp_ka_cnt = 0;
 852                                 tcp->tcp_ka_rinterval = 0;
 853                         }
 854                         break;
 855                 case TCP_CORK:
 856                         if (!checkonly) {
 857                                 /*
 858                                  * if tcp->tcp_cork was set and is now
 859                                  * being unset, we have to make sure that
 860                                  * the remaining data gets sent out. Also
 861                                  * unset tcp->tcp_cork so that tcp_wput_data()
 862                                  * can send data even if it is less than mss
 863                                  */
 864                                 if (tcp->tcp_cork && onoff == 0 &&
 865                                     tcp->tcp_unsent > 0) {
 866                                         tcp->tcp_cork = B_FALSE;
 867                                         tcp_wput_data(tcp, NULL, B_FALSE);
 868                                 }
 869                                 tcp->tcp_cork = onoff;
 870                         }
 871                         break;
 872                 case TCP_RTO_INITIAL: {
 873                         clock_t rto;
 874 
 875                         if (checkonly || val == 0)
 876                                 break;
 877 
 878                         /*
 879                          * Sanity checks
 880                          *
 881                          * The initial RTO should be bounded by the minimum
 882                          * and maximum RTO.  And it should also be smaller
 883                          * than the connect attempt abort timeout.  Otherwise,
 884                          * the connection won't be aborted in a period
 885                          * reasonably close to that timeout.
 886                          */
 887                         if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
 888                             val > tcp->tcp_second_ctimer_threshold ||
 889                             val < tcps->tcps_rexmit_interval_initial_low ||
 890                             val > tcps->tcps_rexmit_interval_initial_high) {
 891                                 *outlenp = 0;
 892                                 return (EINVAL);
 893                         }
 894                         tcp->tcp_rto_initial = val;
 895 
 896                         /*
 897                          * If TCP has not sent anything, need to re-calculate
 898                          * tcp_rto.  Otherwise, this option change does not
 899                          * really affect anything.
 900                          */
 901                         if (tcp->tcp_state >= TCPS_SYN_SENT)
 902                                 break;
 903 
 904                         tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
 905                         tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
 906                         rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
 907                             tcps->tcps_rexmit_interval_extra +
 908                             (tcp->tcp_rtt_sa >> 5) +
 909                             tcps->tcps_conn_grace_period;
 910                         TCP_SET_RTO(tcp, rto);
 911                         break;
 912                 }
 913                 case TCP_RTO_MIN:
 914                         if (checkonly || val == 0)
 915                                 break;
 916 
 917                         if (val < tcps->tcps_rexmit_interval_min_low ||
 918                             val > tcps->tcps_rexmit_interval_min_high ||
 919                             val > tcp->tcp_rto_max) {
 920                                 *outlenp = 0;
 921                                 return (EINVAL);
 922                         }
 923                         tcp->tcp_rto_min = val;
 924                         if (tcp->tcp_rto < val)
 925                                 tcp->tcp_rto = val;
 926                         break;
 927                 case TCP_RTO_MAX:
 928                         if (checkonly || val == 0)
 929                                 break;
 930 
 931                         /*
 932                          * Sanity checks
 933                          *
 934                          * The maximum RTO should not be larger than the
 935                          * connection abort timeout.  Otherwise, the
 936                          * connection won't be aborted in a period reasonably
 937                          * close to that timeout.
 938                          */
 939                         if (val < tcps->tcps_rexmit_interval_max_low ||
 940                             val > tcps->tcps_rexmit_interval_max_high ||
 941                             val < tcp->tcp_rto_min ||
 942                             val > tcp->tcp_second_timer_threshold) {
 943                                 *outlenp = 0;
 944                                 return (EINVAL);
 945                         }
 946                         tcp->tcp_rto_max = val;
 947                         if (tcp->tcp_rto > val)
 948                                 tcp->tcp_rto = val;
 949                         break;
 950                 case TCP_LINGER2:
 951                         if (checkonly || *i1 == 0)
 952                                 break;
 953 
 954                         /*
 955                          * Note that the option value's unit is second.  And
 956                          * the value should be bigger than the private
 957                          * parameter tcp_fin_wait_2_flush_interval's lower
 958                          * bound and smaller than the current value of that
 959                          * parameter.  It should be smaller than the current
 960                          * value to avoid an app setting TCP_LINGER2 to a big
 961                          * value, causing resource to be held up too long in
 962                          * FIN-WAIT-2 state.
 963                          */
 964                         if (*i1 < 0 ||
 965                             tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
 966                             *i1 ||
 967                             tcps->tcps_fin_wait_2_flush_interval/SECONDS <
 968                             *i1) {
 969                                 *outlenp = 0;
 970                                 return (EINVAL);
 971                         }
 972                         tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
 973                         break;
 974                 default:
 975                         break;
 976                 }
 977                 break;
 978         case IPPROTO_IP:
 979                 if (connp->conn_family != AF_INET) {
 980                         *outlenp = 0;
 981                         return (EINVAL);
 982                 }
 983                 switch (name) {
 984                 case IP_SEC_OPT:
 985                         /*
 986                          * We should not allow policy setting after
 987                          * we start listening for connections.
 988                          */
 989                         if (tcp->tcp_state == TCPS_LISTEN) {
 990                                 return (EINVAL);
 991                         }
 992                         break;
 993                 }
 994                 break;
 995         case IPPROTO_IPV6:
 996                 /*
 997                  * IPPROTO_IPV6 options are only supported for sockets
 998                  * that are using IPv6 on the wire.
 999                  */
1000                 if (connp->conn_ipversion != IPV6_VERSION) {
1001                         *outlenp = 0;
1002                         return (EINVAL);
1003                 }
1004 
1005                 switch (name) {
1006                 case IPV6_RECVPKTINFO:
1007                         if (!checkonly) {
1008                                 /* Force it to be sent up with the next msg */
1009                                 tcp->tcp_recvifindex = 0;
1010                         }
1011                         break;
1012                 case IPV6_RECVTCLASS:
1013                         if (!checkonly) {
1014                                 /* Force it to be sent up with the next msg */
1015                                 tcp->tcp_recvtclass = 0xffffffffU;
1016                         }
1017                         break;
1018                 case IPV6_RECVHOPLIMIT:
1019                         if (!checkonly) {
1020                                 /* Force it to be sent up with the next msg */
1021                                 tcp->tcp_recvhops = 0xffffffffU;
1022                         }
1023                         break;
1024                 case IPV6_PKTINFO:
1025                         /* This is an extra check for TCP */
1026                         if (inlen == sizeof (struct in6_pktinfo)) {
1027                                 struct in6_pktinfo *pkti;
1028 
1029                                 pkti = (struct in6_pktinfo *)invalp;
1030                                 /*
1031                                  * RFC 3542 states that ipi6_addr must be
1032                                  * the unspecified address when setting the
1033                                  * IPV6_PKTINFO sticky socket option on a
1034                                  * TCP socket.
1035                                  */
1036                                 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1037                                         return (EINVAL);
1038                         }
1039                         break;
1040                 case IPV6_SEC_OPT:
1041                         /*
1042                          * We should not allow policy setting after
1043                          * we start listening for connections.
1044                          */
1045                         if (tcp->tcp_state == TCPS_LISTEN) {
1046                                 return (EINVAL);
1047                         }
1048                         break;
1049                 }
1050                 break;
1051         }
1052         reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1053             checkonly, cr);
1054         if (reterr != 0) {
1055                 *outlenp = 0;
1056                 return (reterr);
1057         }
1058 
1059         /*
1060          * Common case of OK return with outval same as inval
1061          */
1062         if (invalp != outvalp) {
1063                 /* don't trust bcopy for identical src/dst */
1064                 (void) bcopy(invalp, outvalp, inlen);
1065         }
1066         *outlenp = inlen;
1067 
1068         if (coas.coa_changed & COA_HEADER_CHANGED) {
1069                 /* If we are connected we rebuilt the headers */
1070                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1071                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1072                         reterr = tcp_build_hdrs(tcp);
1073                         if (reterr != 0)
1074                                 return (reterr);
1075                 }
1076         }
1077         if (coas.coa_changed & COA_ROUTE_CHANGED) {
1078                 in6_addr_t nexthop;
1079 
1080                 /*
1081                  * If we are connected we re-cache the information.
1082                  * We ignore errors to preserve BSD behavior.
1083                  * Note that we don't redo IPsec policy lookup here
1084                  * since the final destination (or source) didn't change.
1085                  */
1086                 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1087                     &connp->conn_faddr_v6, &nexthop);
1088 
1089                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1090                     !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1091                         (void) ip_attr_connect(connp, connp->conn_ixa,
1092                             &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1093                             &nexthop, connp->conn_fport, NULL, NULL,
1094                             IPDF_VERIFY_DST);
1095                 }
1096         }
1097         if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1098                 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1099         }
1100         if (coas.coa_changed & COA_WROFF_CHANGED) {
1101                 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1102                     tcps->tcps_wroff_xtra;
1103                 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1104                     connp->conn_wroff);
1105         }
1106         if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1107                 if (IPCL_IS_NONSTR(connp))
1108                         proto_set_rx_oob_opt(connp, onoff);
1109         }
1110         return (0);
1111 }