1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2012 David Hoeppner.  All rights reserved.
  29  */
  30 
  31 /*
  32  * This file implements the Data Congestion Control Protocol (DCCP).
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/stream.h>
  37 #include <sys/stropts.h>
  38 #include <sys/strlog.h>
  39 #include <sys/strsun.h>
  40 #define _SUN_TPI_VERSION 2
  41 #include <sys/tihdr.h>
  42 #include <sys/socket.h>
  43 #include <sys/socketvar.h>
  44 #include <sys/sockio.h>
  45 #include <sys/priv.h>
  46 #include <sys/vtrace.h>
  47 #include <sys/sdt.h>
  48 #include <sys/debug.h>
  49 #include <sys/ddi.h>
  50 #include <sys/isa_defs.h>
  51 #include <sys/policy.h>
  52 #include <sys/tsol/label.h>
  53 #include <sys/tsol/tnet.h>
  54 #include <inet/dccp_impl.h>
  55 #include <inet/dccp_stack.h>
  56 #include <inet/kstatcom.h>
  57 #include <inet/snmpcom.h>
  58 
  59 #include <sys/cmn_err.h>
  60 
  61 int dccp_squeue_flag;
  62 
  63 /* Setable in /etc/system */
  64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE;
  65 
  66 static void     dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
  67                     ixa_notify_arg_t);
  68 
  69 /* Functions to register netstack */
  70 static void     *dccp_stack_init(netstackid_t, netstack_t *);
  71 static void     dccp_stack_fini(netstackid_t, void *);
  72 
  73 /* Stream device open functions */
  74 static int      dccp_openv4(queue_t *, dev_t *, int, int, cred_t *);
  75 static int      dccp_openv6(queue_t *, dev_t *, int, int, cred_t *);
  76 static int      dccp_open(queue_t *, dev_t *, int, int, cred_t *,
  77                     boolean_t);
  78 
  79 /* Write service routine */
  80 static void     dccp_wsrv(queue_t *);
  81 
  82 /* Connection related functions */
  83 static int      dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t);
  84 static int      dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t,
  85     uint_t, uint32_t);
  86 
  87 /* Initialise ISS */
  88 static void     dccp_iss_init(dccp_t *);
  89 
  90 struct module_info dccp_rinfo = {
  91         DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER,
  92         DCCP_RECV_LOWATER
  93 };
  94 
  95 static struct module_info dccp_winfo = {
  96         DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16
  97 };
  98 
  99 /*
 100  * Queue information structure with DCCP entry points.
 101  */
 102 struct qinit dccp_rinitv4 = {
 103         NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo
 104 };
 105 
 106 struct qinit dccp_rinitv6 = {
 107         NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo
 108 };
 109 
 110 struct qinit dccp_winit = {
 111         (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
 112 };
 113 
 114 /* Initial entry point for TCP in socket mode */
 115 struct qinit dccp_sock_winit = {
 116         (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
 117 };
 118 
 119 struct qinit dccp_fallback_sock_winit = {
 120         (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo
 121 };
 122 /*
 123  * DCCP as acceptor STREAM.
 124  */
 125 struct qinit dccp_acceptor_rinit = {
 126         NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo
 127 };
 128 
 129 struct qinit dccp_acceptor_winit = {
 130         (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo
 131 };
 132 
 133 /* AF_INET /dev/dccp */
 134 struct streamtab dccpinfov4 = {
 135         &dccp_rinitv4, &dccp_winit
 136 };
 137 
 138 /* AF_INET6 /dev/dccp6 */
 139 struct streamtab dccpinfov6 = {
 140         &dccp_rinitv6, &dccp_winit
 141 };
 142 
 143 /* Template for response to info request */
 144 struct T_info_ack dccp_g_t_info_ack = {
 145         T_INFO_ACK,             /* PRIM_type */
 146         0,                      /* TSDU_size */
 147         T_INFINITE,             /* ETSDU_size */
 148         T_INVALID,              /* CDATA_size */
 149         T_INVALID,              /* DDATA_size */
 150         sizeof (sin_t),         /* ADDR_size */
 151         0,                      /* OPT_size - not initialized here */
 152         TIDUSZ,                 /* TIDU_size */
 153         T_COTS_ORD,             /* SERV_type */
 154         DCCPS_CLOSED,           /* CURRENT_state */
 155         (XPG4_1|EXPINLINE)      /* PROVIDER_flag */
 156 };
 157 
 158 struct T_info_ack dccp_g_t_info_ack_v6 = {
 159         T_INFO_ACK,             /* PRIM_type */
 160         0,                      /* TSDU_size */
 161         T_INFINITE,             /* ETSDU_size */
 162         T_INVALID,              /* CDATA_size */
 163         T_INVALID,              /* DDATA_size */
 164         sizeof (sin6_t),        /* ADDR_size */
 165         0,                      /* OPT_size - not initialized here */
 166         TIDUSZ,                 /* TIDU_size */
 167         T_COTS_ORD,             /* SERV_type */
 168         DCCPS_CLOSED,           /* CURRENT_state */
 169         (XPG4_1|EXPINLINE)      /* PROVIDER_flag */
 170 };
 171 
 172 /*
 173  * DCCP Tunables.
 174  */
 175 extern mod_prop_info_t dccp_propinfo_tbl[];
 176 extern int dccp_propinfo_count;
 177 
 178 /*
 179  * Register DCCP in ip netstack.
 180  */
 181 void
 182 dccp_ddi_g_init(void)
 183 {
 184         /* Global timer cache */
 185         dccp_timercache = kmem_cache_create("dccp_timercache",
 186             sizeof (dccp_timer_t) + sizeof (mblk_t), 0,
 187             NULL, NULL, NULL, NULL, NULL, 0);
 188  
 189         netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini);
 190 }
 191 
 192 /*
 193  * Unregister DCCP from ip netstack.
 194  */
 195 void
 196 dccp_ddi_g_destroy(void)
 197 {
 198         /* Global timer cache */
 199         kmem_cache_destroy(dccp_timercache);
 200 
 201         netstack_unregister(NS_DCCP);
 202 }
 203 
 204 #define INET_NAME       "ip"
 205 
 206 /*
 207  * Initialize this DCCP stack instance.
 208  */
 209 static void *
 210 dccp_stack_init(netstackid_t stackid, netstack_t *ns)
 211 {
 212         dccp_stack_t    *dccps;
 213         major_t         major;
 214         size_t          arrsz;
 215         int             error;
 216         int             i;
 217 
 218         dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP);
 219         if (dccps == NULL) {
 220                 return (NULL);
 221         }
 222         dccps->dccps_netstack = ns;
 223 
 224         /* Ports */
 225         mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
 226         dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS;
 227         dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
 228         dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
 229         dccps->dccps_min_anonpriv_port = 512;
 230 
 231         dccps->dccps_bind_fanout_size = dccp_bind_fanout_size;
 232 
 233         /* Bind fanout */
 234         dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size *
 235             sizeof (dccp_df_t), KM_SLEEP);
 236         for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
 237                 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL,
 238                     MUTEX_DEFAULT, NULL);
 239         }
 240 
 241         /* Tunable properties */
 242         arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t);
 243         dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP);
 244         if (dccps->dccps_propinfo_tbl == NULL) {
 245                 kmem_free(dccps, sizeof (*dccps));
 246                 return (NULL);
 247         }
 248         bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz);
 249 
 250         /* Allocate per netstack cpu stats */
 251         mutex_enter(&cpu_lock);
 252         dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus);
 253         mutex_exit(&cpu_lock);
 254 
 255         dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *),
 256             KM_SLEEP);
 257         for (i = 0; i < dccps->dccps_sc_cnt; i++) {
 258                 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t),
 259                     KM_SLEEP);
 260         }
 261 
 262         /* Kernel statistics */
 263         dccps->dccps_kstat = dccp_kstat2_init(stackid);
 264         dccps->dccps_mibkp = dccp_kstat_init(stackid);
 265 
 266         /* Driver major number */
 267         major = mod_name_to_major(INET_NAME);
 268         error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident);
 269         ASSERT(error == 0);
 270 
 271         return (dccps);
 272 }
 273 
 274 /*
 275  * Destroy this DCCP netstack instance.
 276  */
 277 static void
 278 dccp_stack_fini(netstackid_t stackid, void *arg)
 279 {
 280         dccp_stack_t    *dccps = (dccp_stack_t *)arg;
 281         int             i;
 282 
 283         /* Free cpu stats */
 284         for (i = 0; i < dccps->dccps_sc_cnt; i++) {
 285                 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t));
 286         }
 287         kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *));
 288 
 289         /* Free tunable properties */
 290         kmem_free(dccps->dccps_propinfo_tbl,
 291             dccp_propinfo_count * sizeof (mod_prop_info_t));
 292         dccps->dccps_propinfo_tbl = NULL;
 293 
 294         /* Free bind fanout */
 295         for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
 296                 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL);
 297                 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock);
 298         }
 299         kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size *
 300             sizeof (dccp_df_t));
 301         dccps->dccps_bind_fanout = NULL;
 302 
 303         /* Kernel statistics */
 304         dccp_kstat_fini(stackid, dccps->dccps_mibkp);
 305         dccps->dccps_mibkp = NULL;
 306 
 307         ldi_ident_release(dccps->dccps_ldi_ident);
 308 
 309         kmem_free(dccps, sizeof (*dccps));
 310 }
 311 
 312 /* /dev/dccp */
 313 static int
 314 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 315 {
 316         cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n");
 317 
 318         return (dccp_open(q, devp, flag, sflag, credp, B_FALSE));
 319 }
 320 
 321 /* /dev/dccp6 */
 322 static int
 323 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 324 {
 325         cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n");
 326 
 327         return (dccp_open(q, devp, flag, sflag, credp, B_TRUE));
 328 }
 329 
 330 /*
 331  * Common open function for v4 and v6 devices.
 332  */
 333 static int
 334 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
 335     boolean_t isv6)
 336 {
 337         conn_t          *connp;
 338         dccp_t          *dccp;
 339         vmem_t          *minor_arena;
 340         dev_t           conn_dev;
 341         boolean_t       issocket;
 342         int             error;
 343 
 344         cmn_err(CE_NOTE, "dccp.c: dccp_open");
 345 
 346         /* If the stream is already open, return immediately */
 347         if (q->q_ptr != NULL) {
 348                 return (0);
 349         }
 350 
 351         if (sflag == MODOPEN) {
 352                 return (EINVAL);
 353         }
 354 
 355         if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
 356             ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
 357                 minor_arena = ip_minor_arena_la;
 358         } else {
 359                 /*
 360                  * Either minor numbers in the large arena were exhausted
 361                  * or a non socket application is doing the open.
 362                  * Try to allocate from the small arena.
 363                  */
 364                 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
 365                         return (EBUSY);
 366                 }
 367                 minor_arena = ip_minor_arena_sa;
 368         }
 369 
 370         ASSERT(minor_arena != NULL);
 371 
 372         *devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
 373 
 374         if (flag & SO_FALLBACK) {
 375                 /*
 376                  * Non streams socket needs a stream to fallback to.
 377                  */
 378                 RD(q)->q_ptr = (void *)conn_dev;
 379                 WR(q)->q_qinfo = &dccp_fallback_sock_winit;
 380                 WR(q)->q_ptr = (void *)minor_arena;
 381                 qprocson(q);
 382                 return (0);
 383         } else if (flag & SO_ACCEPTOR) {
 384                 q->q_qinfo = &dccp_acceptor_rinit;
 385                 /*
 386                  * The conn_dev and minor_arena will be subsequently used by
 387                  * dccp_tli_accept() and dccp_tpi_close_accept() to figure out
 388                  * the minor device number for this connection from the q_ptr.
 389                  */
 390                 RD(q)->q_ptr = (void *)conn_dev;
 391                 WR(q)->q_qinfo = &dccp_acceptor_winit;
 392                 WR(q)->q_ptr = (void *)minor_arena;
 393                 qprocson(q);
 394                 return (0);
 395         }
 396 
 397         issocket = flag & SO_SOCKSTR;
 398         connp = dccp_create_common(credp, isv6, issocket, &error);
 399         if (connp == NULL) {
 400                 inet_minor_free(minor_arena, conn_dev);
 401                 q->q_ptr = WR(q)->q_ptr = NULL;
 402                 return (error);
 403         }
 404 
 405         connp->conn_rq = q;
 406         connp->conn_wq = WR(q);
 407         q->q_ptr = WR(q)->q_ptr = connp;
 408 
 409         connp->conn_dev = conn_dev;
 410         connp->conn_minor_arena = minor_arena;
 411 
 412         ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6);
 413         ASSERT(WR(q)->q_qinfo == &dccp_winit);
 414 
 415         dccp = connp->conn_dccp;
 416 
 417         if (issocket) {
 418                 WR(q)->q_qinfo = &dccp_sock_winit;
 419         } else {
 420 #ifdef  _ILP32
 421                 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q);
 422 #else
 423                 dccp->dccp_acceptor_id = conn_dev;
 424 #endif  /* _ILP32 */
 425         }
 426 
 427         /*
 428          * Put the ref for DCCP. Ref for IP was already put
 429          * by ipcl_conn_create. Also Make the conn_t globally
 430          * visible to walkers.
 431          */
 432         mutex_enter(&connp->conn_lock);
 433         CONN_INC_REF_LOCKED(connp);
 434         ASSERT(connp->conn_ref == 2);
 435         connp->conn_state_flags &= ~CONN_INCIPIENT;
 436         mutex_exit(&connp->conn_lock);
 437 
 438         qprocson(q);
 439 
 440         return (0);
 441 }
 442 
 443 /*
 444  * IXA notify
 445  */
 446 static void
 447 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
 448     ixa_notify_arg_t narg)
 449 {
 450         cmn_err(CE_NOTE, "dccp.c: dccp_notify");
 451 }
 452 
 453 /*
 454  * Build the template headers.
 455  */
 456 int
 457 dccp_build_hdrs(dccp_t *dccp)
 458 {
 459         dccp_stack_t    *dccps = dccp->dccp_dccps;
 460         conn_t          *connp = dccp->dccp_connp;
 461         dccpha_t        *dccpha;
 462         uint32_t        cksum;
 463         char            buf[DCCP_MAX_HDR_LENGTH];
 464         uint_t          buflen;
 465         uint_t          ulplen = 12;
 466         uint_t          extralen = 0;
 467         int             error;
 468 
 469         cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs");
 470 
 471         buflen = connp->conn_ht_ulp_len;
 472         if (buflen != 0) {
 473                 cmn_err(CE_NOTE, "buflen != 0");
 474                 bcopy(connp->conn_ht_ulp, buf, buflen);
 475                 extralen -= buflen - ulplen;
 476                 ulplen = buflen;
 477         }
 478 
 479         mutex_enter(&connp->conn_lock);
 480         error = conn_build_hdr_template(connp, ulplen, extralen,
 481             &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo);
 482         mutex_exit(&connp->conn_lock);
 483         if (error != 0) {
 484                 cmn_err(CE_NOTE, "conn_build_hdr_template failed");
 485                 return (error);
 486         }
 487 
 488         dccpha = (dccpha_t *)connp->conn_ht_ulp;
 489         dccp->dccp_dccpha = dccpha;
 490 
 491         if (buflen != 0) {
 492                 bcopy(buf, connp->conn_ht_ulp, buflen);
 493         } else {
 494                 dccpha->dha_sum = 0;
 495                 dccpha->dha_lport = connp->conn_lport;
 496                 dccpha->dha_fport = connp->conn_fport;
 497         }
 498 
 499         cksum = sizeof (dccpha_t) + connp->conn_sum;
 500         cksum = (cksum >> 16) + (cksum & 0xFFFF);
 501         dccpha->dha_sum = htons(cksum);
 502         dccpha->dha_offset = 7;
 503         dccpha->dha_x = 1;
 504 
 505         if (connp->conn_ipversion == IPV4_VERSION) {
 506                 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc;
 507         } else {
 508                 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc;
 509         }
 510 
 511         /* XXX */
 512 
 513         return (0);
 514 }
 515 
 516 /*
 517  * DCCP write service routine.
 518  */
 519 static void
 520 dccp_wsrv(queue_t *q)
 521 {
 522         dccp_stack_t    *dccps = Q_TO_DCCP(q)->dccp_dccps;
 523 
 524         DCCP_STAT(dccps, dccp_wsrv_called);
 525 }
 526 
 527 /*
 528  * Common create function for streams and sockets.
 529  */
 530 conn_t *
 531 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
 532     int *errorp)
 533 {
 534         conn_t          *connp;
 535         dccp_t          *dccp;
 536         dccp_stack_t    *dccps;
 537         netstack_t      *ns;
 538         squeue_t        *sqp;
 539         zoneid_t        zoneid;
 540         int             error;
 541 
 542         cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n");
 543 
 544         ASSERT(errorp != NULL);
 545 
 546         error = secpolicy_basic_net_access(credp);
 547         if (error != 0) {
 548                 *errorp = error;
 549                 return (NULL);
 550         }
 551 
 552         /*
 553          * Find the right netstack.
 554          */
 555         ns = netstack_find_by_cred(credp);
 556         ASSERT(ns != NULL);
 557         dccps = ns->netstack_dccp;
 558         ASSERT(dccps != NULL);
 559 
 560         /*
 561          * For exclusive stacks we set the zoneid to zero
 562          * to make TCP operate as if in the global zone.
 563          */
 564         if (ns->netstack_stackid != GLOBAL_NETSTACKID) {
 565                 zoneid = GLOBAL_ZONEID;
 566         } else {
 567                 zoneid = crgetzoneid(credp);
 568         }
 569 
 570         sqp = IP_SQUEUE_GET((uint_t)gethrtime());
 571         connp = (conn_t *)dccp_get_conn(sqp, dccps);
 572         netstack_rele(dccps->dccps_netstack);
 573         if (connp == NULL) {
 574                 *errorp = ENOSR;
 575                 return (NULL);
 576         }
 577         ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
 578 
 579         connp->conn_sqp = sqp;
 580         connp->conn_initial_sqp = connp->conn_sqp;
 581         connp->conn_ixa->ixa_sqp = connp->conn_sqp;
 582         dccp = connp->conn_dccp;
 583 
 584         /* Setting flags for ip output */
 585         connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
 586             IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
 587 
 588         ASSERT(connp->conn_proto == IPPROTO_DCCP);
 589         ASSERT(connp->conn_dccp == dccp);
 590         ASSERT(dccp->dccp_connp == connp);
 591 
 592         if (isv6) {
 593                 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
 594                 connp->conn_ipversion = IPV6_VERSION;
 595                 connp->conn_family = AF_INET6;
 596                 /* XXX mms, ttl */
 597         } else {
 598                 connp->conn_ipversion = IPV4_VERSION;
 599                 connp->conn_family = AF_INET;
 600                 /* XXX mms, ttl */
 601         }
 602         connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
 603 
 604         crhold(credp);
 605         connp->conn_cred = credp;
 606         connp->conn_cpid = curproc->p_pid;
 607         connp->conn_open_time = ddi_get_lbolt64();
 608 
 609         ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
 610         connp->conn_ixa->ixa_cred = credp;
 611         connp->conn_ixa->ixa_cpid = connp->conn_cpid;
 612 
 613         connp->conn_zoneid = zoneid;
 614         connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
 615         connp->conn_ixa->ixa_zoneid = zoneid;
 616         connp->conn_mlp_type = mlptSingle;
 617 
 618         dccp->dccp_dccps = dccps;
 619         dccp->dccp_state = DCCPS_CLOSED;
 620 
 621         ASSERT(connp->conn_netstack == dccps->dccps_netstack);
 622         ASSERT(dccp->dccp_dccps == dccps);
 623 
 624         /*
 625          * If the caller has the process-wide flag set, then default to MAC
 626          * exempt mode.  This allows read-down to unlabeled hosts.
 627          */
 628         if (getpflags(NET_MAC_AWARE, credp) != 0) {
 629                 connp->conn_mac_mode = CONN_MAC_AWARE;
 630         }
 631 
 632         if (issocket) {
 633                 dccp->dccp_issocket = 1;
 634         }
 635 
 636         /* XXX rcvbuf, sndbuf etc */
 637 
 638         connp->conn_so_type = SOCK_STREAM;
 639 
 640         SOCK_CONNID_INIT(dccp->dccp_connid);
 641         dccp_init_values(dccp, NULL);
 642 
 643         return (connp);
 644 }
 645 
 646 /*
 647  * Common close function for streams and sockets.
 648  */
 649 void
 650 dccp_close_common(conn_t *connp, int flags)
 651 {
 652         dccp_t          *dccp = connp->conn_dccp;
 653         mblk_t          *mp;
 654         boolean_t       conn_ioctl_cleanup_reqd = B_FALSE;
 655 
 656         cmn_err(CE_NOTE, "dccp.c: dccp_close_common");
 657 
 658         ASSERT(connp->conn_ref >= 2);
 659 
 660         /*
 661          * Mark the conn as closing. ipsq_pending_mp_add will not
 662          * add any mp to the pending mp list, after this conn has
 663          * started closing.
 664          */
 665         mutex_enter(&connp->conn_lock);
 666         connp->conn_state_flags |= CONN_CLOSING;
 667 
 668         if (connp->conn_oper_pending_ill != NULL) {
 669                 conn_ioctl_cleanup_reqd = B_TRUE;
 670         }
 671 
 672         CONN_INC_REF_LOCKED(connp);
 673         mutex_exit(&connp->conn_lock);
 674 
 675         ASSERT(connp->conn_ref >= 3);
 676 
 677         /*
 678          * Cleanup any queued ioctls here. This must be done before the wq/rq
 679          * are re-written by dccp_close_output().
 680          */
 681         if (conn_ioctl_cleanup_reqd) {
 682                 conn_ioctl_cleanup(connp);
 683         }
 684 
 685         mutex_enter(&connp->conn_lock);
 686         while (connp->conn_ioctlref > 0) {
 687                 cv_wait(&connp->conn_cv, &connp->conn_lock);
 688         }
 689         ASSERT(connp->conn_ioctlref == 0);
 690         ASSERT(connp->conn_oper_pending_ill == NULL);
 691         mutex_exit(&connp->conn_lock);
 692 
 693         /* generate close */
 694 /*
 695         SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp,
 696             NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE);
 697 
 698 */
 699 
 700 nowait:
 701         connp->conn_cpid = NOPID;
 702 }
 703 
 704 /*
 705  * Common bind function.
 706  */
 707 int
 708 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 709     boolean_t bind_to_req_port_only)
 710 {
 711         dccp_t  *dccp = connp->conn_dccp;
 712         int     error;
 713 
 714         cmn_err(CE_NOTE, "dccp.c: dccp_do_bind");
 715 
 716         if (dccp->dccp_state >= DCCPS_BOUND) {
 717                 if (connp->conn_debug) {
 718                         (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 719                             "dccp_bind: bad state, %d", dccp->dccp_state);
 720                 }
 721                 return (-TOUTSTATE);
 722         }
 723 
 724         error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
 725         if (error != 0) {
 726                 return (error);
 727         }
 728 
 729         ASSERT(dccp->dccp_state == DCCPS_LISTEN);
 730         /* XXX dccp_conn_req_max = 0 */ 
 731 
 732         return (0);
 733 }
 734 
 735 /*
 736  * Common unbind function.
 737  */
 738 int
 739 dccp_do_unbind(conn_t *connp)
 740 {
 741         dccp_t  *dccp = connp->conn_dccp;
 742         int32_t oldstate;
 743 
 744         cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind");
 745 
 746         switch (dccp->dccp_state) {
 747         case DCCPS_OPEN:
 748         case DCCPS_LISTEN:
 749                 break;
 750         default:
 751                 return (-TOUTSTATE);
 752         }
 753 
 754         connp->conn_laddr_v6 = ipv6_all_zeros;
 755         connp->conn_saddr_v6 = ipv6_all_zeros;
 756 
 757         dccp_bind_hash_remove(dccp);
 758 
 759         oldstate = dccp->dccp_state;
 760         dccp->dccp_state = DCCPS_CLOSED;
 761         DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
 762             connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
 763             int32_t, oldstate);
 764 
 765         ip_unbind(connp);
 766         bzero(&connp->conn_ports, sizeof (connp->conn_ports));
 767 
 768         return (0);
 769 }
 770 
 771 /*
 772  * Common listen function.
 773  */
 774 int
 775 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
 776     int backlog, cred_t *cr, boolean_t bind_to_req_port_only)
 777 {
 778         dccp_t          *dccp = connp->conn_dccp;
 779         dccp_stack_t    *dccps = dccp->dccp_dccps;
 780         int32_t         oldstate;
 781         int             error;
 782 
 783         cmn_err(CE_NOTE, "dccp.c: dccp_do_listen");
 784 
 785         /* All Solaris components should pass a cred for this operation */
 786         ASSERT(cr != NULL);
 787 
 788         if (dccp->dccp_state >= DCCPS_BOUND) {
 789 
 790                 if ((dccp->dccp_state == DCCPS_BOUND ||
 791                     dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) {
 792                         goto do_listen;
 793                 }
 794                 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state");
 795 
 796                 if (connp->conn_debug) {
 797                         (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 798                             "dccp_listen: bad state, %d", dccp->dccp_state);
 799                 }
 800                 return (-TOUTSTATE);
 801         } else {
 802                 if (sa == NULL) {
 803                         sin6_t  addr;
 804                         sin6_t  *sin6;
 805                         sin_t   *sin;
 806 
 807                         ASSERT(IPCL_IS_NONSTR(connp));
 808 
 809                         if (connp->conn_family == AF_INET) {
 810                                 len = sizeof (sin_t);
 811                                 sin = (sin_t *)&addr;
 812                                 *sin = sin_null;
 813                                 sin->sin_family = AF_INET;
 814                         } else {
 815                                 ASSERT(connp->conn_family == AF_INET6);
 816 
 817                                 len = sizeof (sin6_t);
 818                                 sin6 = (sin6_t *)&addr;
 819                                 *sin6 = sin6_null;
 820                                 sin6->sin6_family = AF_INET6;
 821                         }
 822 
 823                         sa = (struct sockaddr *)&addr;
 824                 }
 825 
 826                 error = dccp_bind_check(connp, sa, len, cr,
 827                     bind_to_req_port_only);
 828                 if (error != 0) {
 829                         cmn_err(CE_NOTE, "dccp_bind_check failed");
 830                         return (error);
 831                 }
 832                 /* Fall through and do the fanout insertion */
 833         }
 834 
 835 do_listen:
 836         ASSERT(dccp->dccp_state == DCCPS_BOUND ||
 837             dccp->dccp_state == DCCPS_LISTEN);
 838 
 839         /* XXX backlog */
 840 
 841         connp->conn_recv = dccp_input_listener_unbound;
 842 
 843         /* Insert into the classifier table */
 844         error = ip_laddr_fanout_insert(connp);
 845         if (error != 0) {
 846                 /* Error - undo the bind */
 847                 oldstate = dccp->dccp_state;
 848                 dccp->dccp_state = DCCPS_CLOSED;
 849 
 850                 connp->conn_bound_addr_v6 = ipv6_all_zeros;
 851 
 852                 connp->conn_laddr_v6 = ipv6_all_zeros;
 853                 connp->conn_saddr_v6 = ipv6_all_zeros;
 854                 connp->conn_ports = 0;
 855 
 856                 if (connp->conn_anon_port) {
 857                         zone_t  *zone;
 858 
 859                         zone = crgetzone(cr);
 860                         connp->conn_anon_port = B_FALSE;
 861                         (void) tsol_mlp_anon(zone, connp->conn_mlp_type,
 862                             connp->conn_proto, connp->conn_lport, B_FALSE);
 863                 }
 864                 connp->conn_mlp_type = mlptSingle;
 865 
 866                 /* XXX dccp_bind_hash_remove */
 867 
 868                 return (error);
 869         } else {
 870                 /* XXX connection limits */
 871         }
 872 
 873         return (error);
 874 }
 875 
 876 /*
 877  * Common connect function.
 878  */
 879 int
 880 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 881     cred_t *cr, pid_t pid)
 882 {
 883         dccp_t          *dccp = connp->conn_dccp;
 884         dccp_stack_t    *dccps = dccp->dccp_dccps;
 885         ip_xmit_attr_t  *ixa = connp->conn_ixa;
 886         mblk_t          *req_mp;
 887         sin_t           *sin = (sin_t *)sa;
 888         sin6_t          *sin6 = (sin6_t *)sa;
 889         ipaddr_t        *dstaddrp;
 890         in_port_t       dstport;
 891         uint_t          srcid;
 892         int32_t         oldstate;
 893         int             error;
 894 
 895         cmn_err(CE_NOTE, "dccp.c: dccp_do_connect");
 896 
 897         oldstate = dccp->dccp_state;
 898 
 899         switch (len) {
 900         case sizeof (sin_t):
 901                 sin = (sin_t *)sa;
 902                 if (sin->sin_port == 0) {
 903                         return (-TBADADDR);
 904                 }
 905                 if (connp->conn_ipv6_v6only) {
 906                         return (EAFNOSUPPORT);
 907                 }
 908                 break;
 909 
 910         case sizeof (sin6_t):
 911                 sin6 = (sin6_t *)sa;
 912                 if (sin6->sin6_port == 0) {
 913                         return (-TBADADDR);
 914                 }
 915                 break;
 916 
 917         default:
 918                 return (EINVAL);
 919         }
 920 
 921         if (connp->conn_family == AF_INET6 &&
 922             connp->conn_ipversion == IPV6_VERSION &&
 923             IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 924                 if (connp->conn_ipv6_v6only) {
 925                         return (EADDRNOTAVAIL);
 926                 }
 927 
 928                 connp->conn_ipversion = IPV4_VERSION;
 929         }
 930 
 931         switch (dccp->dccp_state) {
 932         case DCCPS_LISTEN:
 933                 /*
 934                  * Listening sockets are not allowed to issue connect().
 935                  */
 936                 if (IPCL_IS_NONSTR(connp)) {
 937                         return (EOPNOTSUPP);
 938                 }
 939 
 940         case DCCPS_CLOSED:
 941                 /*
 942                  * We support quick connect.
 943                  */
 944                 /* FALLTHRU */
 945         case DCCPS_OPEN:
 946                 break;
 947 
 948         default:
 949                 return (-TOUTSTATE);
 950         }
 951 
 952         /*
 953          * We update our cred/cpid based on the caller of connect.
 954          */
 955         if (connp->conn_cred != cr) {
 956                 crhold(cr);
 957                 crfree(connp->conn_cred);
 958                 connp->conn_cred = cr;
 959         }
 960         connp->conn_cpid = pid;
 961 
 962         /* Cache things in the ixa without any refhold */
 963         ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
 964         ixa->ixa_cred = cr;
 965         ixa->ixa_cpid = pid;
 966 
 967         if (is_system_labeled()) {
 968                 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
 969         }
 970 
 971         if (connp->conn_family == AF_INET6) {
 972                 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 973                         error = dccp_connect_ipv6(dccp, &sin6->sin6_addr,
 974                             sin6->sin6_port, sin6->sin6_flowinfo,
 975                             sin6->__sin6_src_id, sin6->sin6_scope_id);
 976                 } else {
 977                         /*
 978                          * Destination adress is mapped IPv6 address.
 979                          * Source bound address should be unspecified or
 980                          * IPv6 mapped address as well.
 981                          */
 982                         if (!IN6_IS_ADDR_UNSPECIFIED(
 983                             &connp->conn_bound_addr_v6) &&
 984                             !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
 985                                 return (EADDRNOTAVAIL);
 986                         }
 987 
 988                         dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
 989                         dstport = sin6->sin6_port;
 990                         srcid = sin6->__sin6_src_id;
 991                         error = dccp_connect_ipv4(dccp, dstaddrp, dstport,
 992                             srcid);
 993                 }
 994         } else {
 995                 dstaddrp = &sin->sin_addr.s_addr;
 996                 dstport = sin->sin_port;
 997                 srcid = 0;
 998                 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid);
 999         }
1000 
1001         if (error != 0) {
1002                 cmn_err(CE_NOTE, "dccp_connect_ip failed");
1003                 goto connect_failed;
1004         }
1005 
1006         /* XXX cluster */
1007 
1008         /* Connect succeeded */
1009         DCCPS_BUMP_MIB(dccps, dccpActiveOpens);
1010         dccp->dccp_active_open = 1;
1011 
1012         DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
1013             connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
1014             int32_t, DCCPS_BOUND);
1015 
1016         DCCP_TIMER_RESTART(dccp, 100);
1017         req_mp = dccp_generate_request(connp);
1018         if (req_mp != NULL) {
1019                 /*
1020                  * We must bump the generation before sending the request
1021                  * to ensure that we use the right generation in case
1022                  * this thread issues a "connected" up call.
1023                  */
1024                 SOCK_CONNID_BUMP(dccp->dccp_connid);
1025 
1026                 DTRACE_DCCP5(connect__request, mblk_t *, NULL,
1027                     ip_xmit_attr_t *, connp->conn_ixa,
1028                     void_ip_t *, req_mp->b_rptr, dccp_t *, dccp,
1029                     dccpha_t *,
1030                     &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]);
1031 
1032                 dccp_send_data(dccp, req_mp);
1033         }
1034 
1035         return (0);
1036 
1037 connect_failed:
1038         cmn_err(CE_NOTE, "dccp_do_connect failed");
1039 
1040         connp->conn_faddr_v6 = ipv6_all_zeros;
1041         connp->conn_fport = 0;
1042         dccp->dccp_state = oldstate;
1043 
1044         /* XXX */
1045         return (error);
1046 }
1047 
1048 /*
1049  * Init values of a connection.
1050  */
1051 void
1052 dccp_init_values(dccp_t *dccp, dccp_t *parent)
1053 {
1054         conn_t          *connp = dccp->dccp_connp;
1055         dccp_stack_t    *dccps = dccp->dccp_dccps;
1056 
1057         connp->conn_mlp_type = mlptSingle;
1058 }
1059 
1060 /*
1061  * Free dccp structure.
1062  */
1063 void
1064 dccp_free(dccp_t *dccp)
1065 {
1066         conn_t  *connp = dccp->dccp_connp;
1067 
1068         cmn_err(CE_NOTE, "dccp.c: dccp_free");
1069 
1070         connp->conn_rq = NULL;
1071         connp->conn_wq = NULL;
1072 
1073         if (connp->conn_upper_handle != NULL) {
1074                 if (IPCL_IS_NONSTR(connp)) {
1075                         (*connp->conn_upcalls->su_closed)(
1076                             connp->conn_upper_handle);
1077                         dccp->dccp_detached = B_TRUE;
1078                 }
1079 
1080                 connp->conn_upper_handle = NULL;
1081                 connp->conn_upcalls = NULL;
1082         }
1083 }
1084 
1085 void *
1086 dccp_get_conn(void *arg, dccp_stack_t *dccps)
1087 {
1088         dccp_t          *dccp = NULL;
1089         conn_t          *connp;
1090         squeue_t        *sqp = (squeue_t *)arg;
1091         netstack_t      *ns;
1092 
1093         /* XXX timewait */
1094 
1095         connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP,
1096             dccps->dccps_netstack);
1097         if (connp == NULL) {
1098                 return (NULL);
1099         }
1100 
1101         dccp = connp->conn_dccp;
1102         dccp->dccp_dccps = dccps;
1103 
1104         /* List of features being negotated */
1105         list_create(&dccp->dccp_features, sizeof (dccp_feature_t),
1106             offsetof(dccp_feature_t, df_next));
1107 
1108         connp->conn_recv = dccp_input_data;
1109         connp->conn_recvicmp = dccp_icmp_input;
1110         connp->conn_verifyicmp = dccp_verifyicmp;
1111 
1112         connp->conn_ixa->ixa_notify = dccp_notify;
1113         connp->conn_ixa->ixa_notify_cookie = dccp;
1114 
1115         return ((void *)connp);
1116 }
1117 
1118 /*
1119  * IPv4 connect.
1120  */
1121 static int
1122 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport,
1123     uint_t srcid)
1124 {
1125         conn_t          *connp = dccp->dccp_connp;
1126         dccp_stack_t    *dccps = dccp->dccp_dccps;
1127         ipaddr_t        dstaddr = *dstaddrp;
1128         uint16_t        lport;
1129         int             error;
1130 
1131         cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4");
1132 
1133         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1134 
1135         if (dstaddr == INADDR_ANY) {
1136                 dstaddr = htonl(INADDR_LOOPBACK);
1137                 *dstaddrp = dstaddr;
1138         }
1139 
1140         /* Handle __sin6_src_id if socket not bound to an IP address */
1141         if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
1142                 ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
1143                     IPCL_ZONEID(connp), dccps->dccps_netstack);
1144                 connp->conn_saddr_v6 = connp->conn_laddr_v6;
1145         }
1146 
1147         IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
1148         connp->conn_fport = dstport;
1149 
1150         if (dccp->dccp_state == DCCPS_CLOSED) {
1151                 lport = dccp_update_next_port(dccps->dccps_next_port_to_try,
1152                     dccp, B_TRUE);
1153                 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0,
1154                     B_TRUE, B_FALSE, B_FALSE);
1155                 if (lport == 0) {
1156                         return (-TNOADDR);
1157                 }
1158         }
1159 
1160         error = dccp_set_destination(dccp);
1161         if (error != 0) {
1162                 return (error);
1163         }
1164 
1165         /*
1166          * Don't connect to oneself.
1167          */
1168         if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
1169             connp->conn_fport == connp->conn_lport) {
1170                 return (-TBADADDR);
1171         }
1172 
1173         dccp->dccp_state = DCCPS_REQUEST;
1174 
1175         return (ipcl_conn_insert_v4(connp));
1176 }
1177 
1178 /*
1179  * IPv6 connect.
1180  */
1181 static int
1182 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport,
1183     uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
1184 {
1185         cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6");
1186 
1187         return (0);
1188 }
1189 
1190 /*
1191  * Set the ports via conn_connect and build the template
1192  * header.
1193  */
1194 int
1195 dccp_set_destination(dccp_t *dccp)
1196 {
1197         conn_t          *connp = dccp->dccp_connp;
1198         dccp_stack_t    *dccps = dccp->dccp_dccps;
1199         iulp_t          uinfo;
1200         uint32_t        flags;
1201         int             error;
1202 
1203         flags = IPDF_LSO | IPDF_ZCOPY;
1204         flags |= IPDF_UNIQUE_DCE;
1205 
1206         mutex_enter(&connp->conn_lock);
1207         error = conn_connect(connp, &uinfo, flags);
1208         mutex_exit(&connp->conn_lock);
1209         if (error != 0) {
1210                 cmn_err(CE_NOTE, "conn_connect failed");
1211                 return (error);
1212         }
1213 
1214         error = dccp_build_hdrs(dccp);
1215         if (error != 0) {
1216                 cmn_err(CE_NOTE, "dccp_build_hdrs failed");
1217                 return (error);
1218         }
1219 
1220         /* XXX */
1221 
1222         /* Initialise the ISS */
1223         dccp_iss_init(dccp);
1224 
1225         mutex_enter(&connp->conn_lock);
1226         connp->conn_state_flags &= ~CONN_INCIPIENT;
1227         mutex_exit(&connp->conn_lock);
1228 
1229         return (0);
1230 }
1231 
1232 /*
1233  * Init the ISS.
1234  */
1235 static void
1236 dccp_iss_init(dccp_t *dccp)
1237 {
1238         cmn_err(CE_NOTE, "dccp.c: dccp_iss_init");
1239 
1240         dccp->dccp_iss += gethrtime();
1241         dccp->dccp_gss = dccp->dccp_iss;
1242 }