1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2012 David Hoeppner.  All rights reserved.
  29  */
  30 
  31 /*
  32  * This file implements the Data Congestion Control Protocol (DCCP).
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/stream.h>
  37 #include <sys/stropts.h>
  38 #include <sys/strlog.h>
  39 #include <sys/strsun.h>
  40 #define _SUN_TPI_VERSION 2
  41 #include <sys/tihdr.h>
  42 #include <sys/socket.h>
  43 #include <sys/socketvar.h>
  44 #include <sys/sockio.h>
  45 #include <sys/priv.h>
  46 #include <sys/vtrace.h>
  47 #include <sys/sdt.h>
  48 #include <sys/debug.h>
  49 #include <sys/ddi.h>
  50 #include <sys/isa_defs.h>
  51 #include <sys/policy.h>
  52 #include <sys/tsol/label.h>
  53 #include <sys/tsol/tnet.h>
  54 #include <inet/dccp_impl.h>
  55 #include <inet/dccp_stack.h>
  56 #include <inet/kstatcom.h>
  57 #include <inet/snmpcom.h>
  58 
  59 #include <sys/cmn_err.h>
  60 
  61 int dccp_squeue_flag;
  62 
  63 /* Setable in /etc/system */
  64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE;
  65 
  66 static void     dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
  67                     ixa_notify_arg_t);
  68 
  69 /* Functions to register netstack */
  70 static void     *dccp_stack_init(netstackid_t, netstack_t *);
  71 static void     dccp_stack_fini(netstackid_t, void *);
  72 
  73 /* Stream device open functions */
  74 static int      dccp_openv4(queue_t *, dev_t *, int, int, cred_t *);
  75 static int      dccp_openv6(queue_t *, dev_t *, int, int, cred_t *);
  76 static int      dccp_open(queue_t *, dev_t *, int, int, cred_t *,
  77                     boolean_t);
  78 
  79 /* Write service routine */
  80 static void     dccp_wsrv(queue_t *);
  81 
  82 /* Connection related functions */
  83 static int      dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t);
  84 static int      dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t,
  85     uint_t, uint32_t);
  86 
  87 /* Initialise ISS */
  88 static void     dccp_iss_init(dccp_t *);
  89 
  90 struct module_info dccp_rinfo = {
  91         DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER,
  92         DCCP_RECV_LOWATER
  93 };
  94 
  95 static struct module_info dccp_winfo = {
  96         DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16
  97 };
  98 
  99 /*
 100  * Queue information structure with DCCP entry points.
 101  */
 102 struct qinit dccp_rinitv4 = {
 103         NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo
 104 };
 105 
 106 struct qinit dccp_rinitv6 = {
 107         NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo
 108 };
 109 
 110 struct qinit dccp_winit = {
 111         (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
 112 };
 113 
 114 /* Initial entry point for TCP in socket mode */
 115 struct qinit dccp_sock_winit = {
 116         (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
 117 };
 118 
 119 struct qinit dccp_fallback_sock_winit = {
 120         (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo
 121 };
 122 /*
 123  * DCCP as acceptor STREAM.
 124  */
 125 struct qinit dccp_acceptor_rinit = {
 126         NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo
 127 };
 128 
 129 struct qinit dccp_acceptor_winit = {
 130         (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo
 131 };
 132 
 133 /* AF_INET /dev/dccp */
 134 struct streamtab dccpinfov4 = {
 135         &dccp_rinitv4, &dccp_winit
 136 };
 137 
 138 /* AF_INET6 /dev/dccp6 */
 139 struct streamtab dccpinfov6 = {
 140         &dccp_rinitv6, &dccp_winit
 141 };
 142 
 143 /* Template for response to info request */
 144 struct T_info_ack dccp_g_t_info_ack = {
 145         T_INFO_ACK,             /* PRIM_type */
 146         0,                      /* TSDU_size */
 147         T_INFINITE,             /* ETSDU_size */
 148         T_INVALID,              /* CDATA_size */
 149         T_INVALID,              /* DDATA_size */
 150         sizeof (sin_t),         /* ADDR_size */
 151         0,                      /* OPT_size - not initialized here */
 152         TIDUSZ,                 /* TIDU_size */
 153         T_COTS_ORD,             /* SERV_type */
 154         DCCPS_CLOSED,           /* CURRENT_state */
 155         (XPG4_1|EXPINLINE)      /* PROVIDER_flag */
 156 };
 157 
 158 struct T_info_ack dccp_g_t_info_ack_v6 = {
 159         T_INFO_ACK,             /* PRIM_type */
 160         0,                      /* TSDU_size */
 161         T_INFINITE,             /* ETSDU_size */
 162         T_INVALID,              /* CDATA_size */
 163         T_INVALID,              /* DDATA_size */
 164         sizeof (sin6_t),        /* ADDR_size */
 165         0,                      /* OPT_size - not initialized here */
 166         TIDUSZ,                 /* TIDU_size */
 167         T_COTS_ORD,             /* SERV_type */
 168         DCCPS_CLOSED,           /* CURRENT_state */
 169         (XPG4_1|EXPINLINE)      /* PROVIDER_flag */
 170 };
 171 
 172 /*
 173  * DCCP Tunables.
 174  */
 175 extern mod_prop_info_t dccp_propinfo_tbl[];
 176 extern int dccp_propinfo_count;
 177 
 178 /*
 179  * Register DCCP in ip netstack.
 180  */
 181 void
 182 dccp_ddi_g_init(void)
 183 {
 184         /* Global timer cache */
 185         dccp_timercache = kmem_cache_create("dccp_timercache",
 186             sizeof (dccp_timer_t) + sizeof (mblk_t), 0,
 187             NULL, NULL, NULL, NULL, NULL, 0);
 188  
 189         netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini);
 190 }
 191 
 192 /*
 193  * Unregister DCCP from ip netstack.
 194  */
 195 void
 196 dccp_ddi_g_destroy(void)
 197 {
 198         /* Global timer cache */
 199         kmem_cache_destroy(dccp_timercache);
 200 
 201         netstack_unregister(NS_DCCP);
 202 }
 203 
 204 #define INET_NAME       "ip"
 205 
 206 /*
 207  * Initialize this DCCP stack instance.
 208  */
 209 static void *
 210 dccp_stack_init(netstackid_t stackid, netstack_t *ns)
 211 {
 212         dccp_stack_t    *dccps;
 213         major_t         major;
 214         size_t          arrsz;
 215         int             error;
 216         int             i;
 217 
 218         dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP);
 219         if (dccps == NULL) {
 220                 return (NULL);
 221         }
 222         dccps->dccps_netstack = ns;
 223 
 224         /* Ports */
 225         mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
 226         dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS;
 227         dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
 228         dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
 229         dccps->dccps_min_anonpriv_port = 512;
 230 
 231         dccps->dccps_bind_fanout_size = dccp_bind_fanout_size;
 232 
 233         /* Bind fanout */
 234         dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size *
 235             sizeof (dccp_df_t), KM_SLEEP);
 236         for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
 237                 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL,
 238                     MUTEX_DEFAULT, NULL);
 239         }
 240 
 241         /* Tunable properties */
 242         arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t);
 243         dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP);
 244         if (dccps->dccps_propinfo_tbl == NULL) {
 245                 kmem_free(dccps, sizeof (*dccps));
 246                 return (NULL);
 247         }
 248         bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz);
 249 
 250         /* Allocate per netstack cpu stats */
 251         mutex_enter(&cpu_lock);
 252         dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus);
 253         mutex_exit(&cpu_lock);
 254 
 255         dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *),
 256             KM_SLEEP);
 257         for (i = 0; i < dccps->dccps_sc_cnt; i++) {
 258                 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t),
 259                     KM_SLEEP);
 260         }
 261 
 262         /* Kernel statistics */
 263         //dccps->dccps_kstat = dccp_kstat2_init(stackid);
 264         //dccps->dccps_mibkp = dccp_kstat_init(stackid);
 265 
 266         /* Driver major number */
 267         major = mod_name_to_major(INET_NAME);
 268         error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident);
 269         ASSERT(error == 0);
 270 
 271         return (dccps);
 272 }
 273 
 274 /*
 275  * Destroy this DCCP netstack instance.
 276  */
 277 static void
 278 dccp_stack_fini(netstackid_t stackid, void *arg)
 279 {
 280         dccp_stack_t    *dccps = (dccp_stack_t *)arg;
 281         int             i;
 282 
 283         /* Free cpu stats */
 284         for (i = 0; i < dccps->dccps_sc_cnt; i++) {
 285                 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t));
 286         }
 287         kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *));
 288 
 289         /* Free tunable properties */
 290         kmem_free(dccps->dccps_propinfo_tbl,
 291             dccp_propinfo_count * sizeof (mod_prop_info_t));
 292         dccps->dccps_propinfo_tbl = NULL;
 293 
 294         /* Free bind fanout */
 295         for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
 296                 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL);
 297                 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock);
 298         }
 299         kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size *
 300             sizeof (dccp_df_t));
 301         dccps->dccps_bind_fanout = NULL;
 302 
 303         /* Kernel statistics */
 304         dccp_kstat_fini(stackid, dccps->dccps_mibkp);
 305         dccps->dccps_mibkp = NULL;
 306         dccp_kstat2_fini(stackid, dccps->dccps_kstat);
 307         dccps->dccps_kstat = NULL;
 308 
 309         ldi_ident_release(dccps->dccps_ldi_ident);
 310 
 311         kmem_free(dccps, sizeof (*dccps));
 312 }
 313 
 314 /* /dev/dccp */
 315 static int
 316 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 317 {
 318         cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n");
 319 
 320         return (dccp_open(q, devp, flag, sflag, credp, B_FALSE));
 321 }
 322 
 323 /* /dev/dccp6 */
 324 static int
 325 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
 326 {
 327         cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n");
 328 
 329         return (dccp_open(q, devp, flag, sflag, credp, B_TRUE));
 330 }
 331 
 332 /*
 333  * Common open function for v4 and v6 devices.
 334  */
 335 static int
 336 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
 337     boolean_t isv6)
 338 {
 339         conn_t          *connp;
 340         dccp_t          *dccp;
 341         vmem_t          *minor_arena;
 342         dev_t           conn_dev;
 343         boolean_t       issocket;
 344         int             error;
 345 
 346         cmn_err(CE_NOTE, "dccp.c: dccp_open");
 347 
 348         /* If the stream is already open, return immediately */
 349         if (q->q_ptr != NULL) {
 350                 return (0);
 351         }
 352 
 353         if (sflag == MODOPEN) {
 354                 return (EINVAL);
 355         }
 356 
 357         if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
 358             ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
 359                 minor_arena = ip_minor_arena_la;
 360         } else {
 361                 /*
 362                  * Either minor numbers in the large arena were exhausted
 363                  * or a non socket application is doing the open.
 364                  * Try to allocate from the small arena.
 365                  */
 366                 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
 367                         return (EBUSY);
 368                 }
 369                 minor_arena = ip_minor_arena_sa;
 370         }
 371 
 372         ASSERT(minor_arena != NULL);
 373 
 374         *devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
 375 
 376         if (flag & SO_FALLBACK) {
 377                 /*
 378                  * Non streams socket needs a stream to fallback to.
 379                  */
 380                 RD(q)->q_ptr = (void *)conn_dev;
 381                 WR(q)->q_qinfo = &dccp_fallback_sock_winit;
 382                 WR(q)->q_ptr = (void *)minor_arena;
 383                 qprocson(q);
 384                 return (0);
 385         } else if (flag & SO_ACCEPTOR) {
 386                 q->q_qinfo = &dccp_acceptor_rinit;
 387                 /*
 388                  * The conn_dev and minor_arena will be subsequently used by
 389                  * dccp_tli_accept() and dccp_tpi_close_accept() to figure out
 390                  * the minor device number for this connection from the q_ptr.
 391                  */
 392                 RD(q)->q_ptr = (void *)conn_dev;
 393                 WR(q)->q_qinfo = &dccp_acceptor_winit;
 394                 WR(q)->q_ptr = (void *)minor_arena;
 395                 qprocson(q);
 396                 return (0);
 397         }
 398 
 399         issocket = flag & SO_SOCKSTR;
 400         connp = dccp_create_common(credp, isv6, issocket, &error);
 401         if (connp == NULL) {
 402                 inet_minor_free(minor_arena, conn_dev);
 403                 q->q_ptr = WR(q)->q_ptr = NULL;
 404                 return (error);
 405         }
 406 
 407         connp->conn_rq = q;
 408         connp->conn_wq = WR(q);
 409         q->q_ptr = WR(q)->q_ptr = connp;
 410 
 411         connp->conn_dev = conn_dev;
 412         connp->conn_minor_arena = minor_arena;
 413 
 414         ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6);
 415         ASSERT(WR(q)->q_qinfo == &dccp_winit);
 416 
 417         dccp = connp->conn_dccp;
 418 
 419         if (issocket) {
 420                 WR(q)->q_qinfo = &dccp_sock_winit;
 421         } else {
 422 #ifdef  _ILP32
 423                 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q);
 424 #else
 425                 dccp->dccp_acceptor_id = conn_dev;
 426 #endif  /* _ILP32 */
 427         }
 428 
 429         /*
 430          * Put the ref for DCCP. Ref for IP was already put
 431          * by ipcl_conn_create. Also Make the conn_t globally
 432          * visible to walkers.
 433          */
 434         mutex_enter(&connp->conn_lock);
 435         CONN_INC_REF_LOCKED(connp);
 436         ASSERT(connp->conn_ref == 2);
 437         connp->conn_state_flags &= ~CONN_INCIPIENT;
 438         mutex_exit(&connp->conn_lock);
 439 
 440         qprocson(q);
 441 
 442         return (0);
 443 }
 444 
 445 /*
 446  * IXA notify
 447  */
 448 static void
 449 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
 450     ixa_notify_arg_t narg)
 451 {
 452         cmn_err(CE_NOTE, "dccp.c: dccp_notify");
 453 }
 454 
 455 /*
 456  * Build the template headers.
 457  */
 458 int
 459 dccp_build_hdrs(dccp_t *dccp)
 460 {
 461         dccp_stack_t    *dccps = dccp->dccp_dccps;
 462         conn_t          *connp = dccp->dccp_connp;
 463         dccpha_t        *dccpha;
 464         uint32_t        cksum;
 465         char            buf[DCCP_MAX_HDR_LENGTH];
 466         uint_t          buflen;
 467         uint_t          ulplen = 12;
 468         uint_t          extralen = 0;
 469         int             error;
 470 
 471         cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs");
 472 
 473         buflen = connp->conn_ht_ulp_len;
 474         if (buflen != 0) {
 475                 cmn_err(CE_NOTE, "buflen != 0");
 476                 bcopy(connp->conn_ht_ulp, buf, buflen);
 477                 extralen -= buflen - ulplen;
 478                 ulplen = buflen;
 479         }
 480 
 481         mutex_enter(&connp->conn_lock);
 482         error = conn_build_hdr_template(connp, ulplen, extralen,
 483             &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo);
 484         mutex_exit(&connp->conn_lock);
 485         if (error != 0) {
 486                 cmn_err(CE_NOTE, "conn_build_hdr_template failed");
 487                 return (error);
 488         }
 489 
 490         dccpha = (dccpha_t *)connp->conn_ht_ulp;
 491         dccp->dccp_dccpha = dccpha;
 492 
 493         if (buflen != 0) {
 494                 bcopy(buf, connp->conn_ht_ulp, buflen);
 495         } else {
 496                 dccpha->dha_sum = 0;
 497                 dccpha->dha_lport = connp->conn_lport;
 498                 dccpha->dha_fport = connp->conn_fport;
 499         }
 500 
 501         cksum = sizeof (dccpha_t) + connp->conn_sum;
 502         cksum = (cksum >> 16) + (cksum & 0xFFFF);
 503         dccpha->dha_sum = htons(cksum);
 504         dccpha->dha_offset = 7;
 505         dccpha->dha_x = 1;
 506 
 507         if (connp->conn_ipversion == IPV4_VERSION) {
 508                 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc;
 509         } else {
 510                 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc;
 511         }
 512 
 513         /* XXX */
 514 
 515         return (0);
 516 }
 517 
 518 /*
 519  * DCCP write service routine.
 520  */
 521 static void
 522 dccp_wsrv(queue_t *q)
 523 {
 524         dccp_stack_t    *dccps = Q_TO_DCCP(q)->dccp_dccps;
 525 
 526         DCCP_STAT(dccps, dccp_wsrv_called);
 527 }
 528 
 529 /*
 530  * Common create function for streams and sockets.
 531  */
 532 conn_t *
 533 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
 534     int *errorp)
 535 {
 536         conn_t          *connp;
 537         dccp_t          *dccp;
 538         dccp_stack_t    *dccps;
 539         netstack_t      *ns;
 540         squeue_t        *sqp;
 541         zoneid_t        zoneid;
 542         int             error;
 543 
 544         cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n");
 545 
 546         ASSERT(errorp != NULL);
 547 
 548         error = secpolicy_basic_net_access(credp);
 549         if (error != 0) {
 550                 *errorp = error;
 551                 return (NULL);
 552         }
 553 
 554         /*
 555          * Find the right netstack.
 556          */
 557         ns = netstack_find_by_cred(credp);
 558         ASSERT(ns != NULL);
 559         dccps = ns->netstack_dccp;
 560         ASSERT(dccps != NULL);
 561 
 562         /*
 563          * For exclusive stacks we set the zoneid to zero
 564          * to make TCP operate as if in the global zone.
 565          */
 566         if (ns->netstack_stackid != GLOBAL_NETSTACKID) {
 567                 zoneid = GLOBAL_ZONEID;
 568         } else {
 569                 zoneid = crgetzoneid(credp);
 570         }
 571 
 572         sqp = IP_SQUEUE_GET((uint_t)gethrtime());
 573         connp = (conn_t *)dccp_get_conn(sqp, dccps);
 574         netstack_rele(dccps->dccps_netstack);
 575         if (connp == NULL) {
 576                 *errorp = ENOSR;
 577                 return (NULL);
 578         }
 579         ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
 580 
 581         connp->conn_sqp = sqp;
 582         connp->conn_initial_sqp = connp->conn_sqp;
 583         connp->conn_ixa->ixa_sqp = connp->conn_sqp;
 584         dccp = connp->conn_dccp;
 585 
 586         /* Setting flags for ip output */
 587         connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
 588             IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
 589 
 590         ASSERT(connp->conn_proto == IPPROTO_DCCP);
 591         ASSERT(connp->conn_dccp == dccp);
 592         ASSERT(dccp->dccp_connp == connp);
 593 
 594         if (isv6) {
 595                 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
 596                 connp->conn_ipversion = IPV6_VERSION;
 597                 connp->conn_family = AF_INET6;
 598                 /* XXX mms, ttl */
 599         } else {
 600                 connp->conn_ipversion = IPV4_VERSION;
 601                 connp->conn_family = AF_INET;
 602                 /* XXX mms, ttl */
 603         }
 604         connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
 605 
 606         crhold(credp);
 607         connp->conn_cred = credp;
 608         connp->conn_cpid = curproc->p_pid;
 609         connp->conn_open_time = ddi_get_lbolt64();
 610 
 611         ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
 612         connp->conn_ixa->ixa_cred = credp;
 613         connp->conn_ixa->ixa_cpid = connp->conn_cpid;
 614 
 615         connp->conn_zoneid = zoneid;
 616         connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
 617         connp->conn_ixa->ixa_zoneid = zoneid;
 618         connp->conn_mlp_type = mlptSingle;
 619 
 620         dccp->dccp_dccps = dccps;
 621         dccp->dccp_state = DCCPS_CLOSED;
 622 
 623         ASSERT(connp->conn_netstack == dccps->dccps_netstack);
 624         ASSERT(dccp->dccp_dccps == dccps);
 625 
 626         /*
 627          * If the caller has the process-wide flag set, then default to MAC
 628          * exempt mode.  This allows read-down to unlabeled hosts.
 629          */
 630         if (getpflags(NET_MAC_AWARE, credp) != 0) {
 631                 connp->conn_mac_mode = CONN_MAC_AWARE;
 632         }
 633 
 634         if (issocket) {
 635                 dccp->dccp_issocket = 1;
 636         }
 637 
 638         /* XXX rcvbuf, sndbuf etc */
 639 
 640         connp->conn_so_type = SOCK_STREAM;
 641 
 642         SOCK_CONNID_INIT(dccp->dccp_connid);
 643         dccp_init_values(dccp, NULL);
 644 
 645         return (connp);
 646 }
 647 
 648 /*
 649  * Common close function for streams and sockets.
 650  */
 651 void
 652 dccp_close_common(conn_t *connp, int flags)
 653 {
 654         dccp_t          *dccp = connp->conn_dccp;
 655         mblk_t          *mp;
 656         boolean_t       conn_ioctl_cleanup_reqd = B_FALSE;
 657 
 658         cmn_err(CE_NOTE, "dccp.c: dccp_close_common");
 659 
 660         ASSERT(connp->conn_ref >= 2);
 661 
 662         /*
 663          * Mark the conn as closing. ipsq_pending_mp_add will not
 664          * add any mp to the pending mp list, after this conn has
 665          * started closing.
 666          */
 667         mutex_enter(&connp->conn_lock);
 668         connp->conn_state_flags |= CONN_CLOSING;
 669 
 670         if (connp->conn_oper_pending_ill != NULL) {
 671                 conn_ioctl_cleanup_reqd = B_TRUE;
 672         }
 673 
 674         CONN_INC_REF_LOCKED(connp);
 675         mutex_exit(&connp->conn_lock);
 676 
 677         ASSERT(connp->conn_ref >= 3);
 678 
 679         /*
 680          * Cleanup any queued ioctls here. This must be done before the wq/rq
 681          * are re-written by dccp_close_output().
 682          */
 683         if (conn_ioctl_cleanup_reqd) {
 684                 conn_ioctl_cleanup(connp);
 685         }
 686 
 687         mutex_enter(&connp->conn_lock);
 688         while (connp->conn_ioctlref > 0) {
 689                 cv_wait(&connp->conn_cv, &connp->conn_lock);
 690         }
 691         ASSERT(connp->conn_ioctlref == 0);
 692         ASSERT(connp->conn_oper_pending_ill == NULL);
 693         mutex_exit(&connp->conn_lock);
 694 
 695         /* generate close */
 696 /*
 697         SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp,
 698             NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE);
 699 
 700 */
 701 
 702 nowait:
 703         connp->conn_cpid = NOPID;
 704 }
 705 
 706 /*
 707  * Common bind function.
 708  */
 709 int
 710 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
 711     boolean_t bind_to_req_port_only)
 712 {
 713         dccp_t  *dccp = connp->conn_dccp;
 714         int     error;
 715 
 716         cmn_err(CE_NOTE, "dccp.c: dccp_do_bind");
 717 
 718         if (dccp->dccp_state >= DCCPS_BOUND) {
 719                 if (connp->conn_debug) {
 720                         (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 721                             "dccp_bind: bad state, %d", dccp->dccp_state);
 722                 }
 723                 return (-TOUTSTATE);
 724         }
 725 
 726         error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
 727         if (error != 0) {
 728                 return (error);
 729         }
 730 
 731         ASSERT(dccp->dccp_state == DCCPS_LISTEN);
 732         /* XXX dccp_conn_req_max = 0 */ 
 733 
 734         return (0);
 735 }
 736 
 737 /*
 738  * Common unbind function.
 739  */
 740 int
 741 dccp_do_unbind(conn_t *connp)
 742 {
 743         dccp_t  *dccp = connp->conn_dccp;
 744         int32_t oldstate;
 745 
 746         cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind");
 747 
 748         switch (dccp->dccp_state) {
 749         case DCCPS_OPEN:
 750         case DCCPS_LISTEN:
 751                 break;
 752         default:
 753                 return (-TOUTSTATE);
 754         }
 755 
 756         connp->conn_laddr_v6 = ipv6_all_zeros;
 757         connp->conn_saddr_v6 = ipv6_all_zeros;
 758 
 759         dccp_bind_hash_remove(dccp);
 760 
 761         oldstate = dccp->dccp_state;
 762         dccp->dccp_state = DCCPS_CLOSED;
 763         DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
 764             connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
 765             int32_t, oldstate);
 766 
 767         ip_unbind(connp);
 768         bzero(&connp->conn_ports, sizeof (connp->conn_ports));
 769 
 770         return (0);
 771 }
 772 
 773 /*
 774  * Common listen function.
 775  */
 776 int
 777 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
 778     int backlog, cred_t *cr, boolean_t bind_to_req_port_only)
 779 {
 780         dccp_t          *dccp = connp->conn_dccp;
 781         dccp_stack_t    *dccps = dccp->dccp_dccps;
 782         int32_t         oldstate;
 783         int             error;
 784 
 785         cmn_err(CE_NOTE, "dccp.c: dccp_do_listen");
 786 
 787         /* All Solaris components should pass a cred for this operation */
 788         ASSERT(cr != NULL);
 789 
 790         if (dccp->dccp_state >= DCCPS_BOUND) {
 791 
 792                 if ((dccp->dccp_state == DCCPS_BOUND ||
 793                     dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) {
 794                         goto do_listen;
 795                 }
 796                 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state");
 797 
 798                 if (connp->conn_debug) {
 799                         (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
 800                             "dccp_listen: bad state, %d", dccp->dccp_state);
 801                 }
 802                 return (-TOUTSTATE);
 803         } else {
 804                 if (sa == NULL) {
 805                         sin6_t  addr;
 806                         sin6_t  *sin6;
 807                         sin_t   *sin;
 808 
 809                         ASSERT(IPCL_IS_NONSTR(connp));
 810 
 811                         if (connp->conn_family == AF_INET) {
 812                                 len = sizeof (sin_t);
 813                                 sin = (sin_t *)&addr;
 814                                 *sin = sin_null;
 815                                 sin->sin_family = AF_INET;
 816                         } else {
 817                                 ASSERT(connp->conn_family == AF_INET6);
 818 
 819                                 len = sizeof (sin6_t);
 820                                 sin6 = (sin6_t *)&addr;
 821                                 *sin6 = sin6_null;
 822                                 sin6->sin6_family = AF_INET6;
 823                         }
 824 
 825                         sa = (struct sockaddr *)&addr;
 826                 }
 827 
 828                 error = dccp_bind_check(connp, sa, len, cr,
 829                     bind_to_req_port_only);
 830                 if (error != 0) {
 831                         cmn_err(CE_NOTE, "dccp_bind_check failed");
 832                         return (error);
 833                 }
 834                 /* Fall through and do the fanout insertion */
 835         }
 836 
 837 do_listen:
 838         ASSERT(dccp->dccp_state == DCCPS_BOUND ||
 839             dccp->dccp_state == DCCPS_LISTEN);
 840 
 841         /* XXX backlog */
 842 
 843         connp->conn_recv = dccp_input_listener_unbound;
 844 
 845         /* Insert into the classifier table */
 846         error = ip_laddr_fanout_insert(connp);
 847         if (error != 0) {
 848                 /* Error - undo the bind */
 849                 oldstate = dccp->dccp_state;
 850                 dccp->dccp_state = DCCPS_CLOSED;
 851 
 852                 connp->conn_bound_addr_v6 = ipv6_all_zeros;
 853 
 854                 connp->conn_laddr_v6 = ipv6_all_zeros;
 855                 connp->conn_saddr_v6 = ipv6_all_zeros;
 856                 connp->conn_ports = 0;
 857 
 858                 if (connp->conn_anon_port) {
 859                         zone_t  *zone;
 860 
 861                         zone = crgetzone(cr);
 862                         connp->conn_anon_port = B_FALSE;
 863                         (void) tsol_mlp_anon(zone, connp->conn_mlp_type,
 864                             connp->conn_proto, connp->conn_lport, B_FALSE);
 865                 }
 866                 connp->conn_mlp_type = mlptSingle;
 867 
 868                 /* XXX dccp_bind_hash_remove */
 869 
 870                 return (error);
 871         } else {
 872                 /* XXX connection limits */
 873         }
 874 
 875         return (error);
 876 }
 877 
 878 /*
 879  * Common connect function.
 880  */
 881 int
 882 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
 883     cred_t *cr, pid_t pid)
 884 {
 885         dccp_t          *dccp = connp->conn_dccp;
 886         dccp_stack_t    *dccps = dccp->dccp_dccps;
 887         ip_xmit_attr_t  *ixa = connp->conn_ixa;
 888         mblk_t          *req_mp;
 889         sin_t           *sin = (sin_t *)sa;
 890         sin6_t          *sin6 = (sin6_t *)sa;
 891         ipaddr_t        *dstaddrp;
 892         in_port_t       dstport;
 893         uint_t          srcid;
 894         int32_t         oldstate;
 895         int             error;
 896 
 897         cmn_err(CE_NOTE, "dccp.c: dccp_do_connect");
 898 
 899         oldstate = dccp->dccp_state;
 900 
 901         switch (len) {
 902         case sizeof (sin_t):
 903                 sin = (sin_t *)sa;
 904                 if (sin->sin_port == 0) {
 905                         return (-TBADADDR);
 906                 }
 907                 if (connp->conn_ipv6_v6only) {
 908                         return (EAFNOSUPPORT);
 909                 }
 910                 break;
 911 
 912         case sizeof (sin6_t):
 913                 sin6 = (sin6_t *)sa;
 914                 if (sin6->sin6_port == 0) {
 915                         return (-TBADADDR);
 916                 }
 917                 break;
 918 
 919         default:
 920                 return (EINVAL);
 921         }
 922 
 923         if (connp->conn_family == AF_INET6 &&
 924             connp->conn_ipversion == IPV6_VERSION &&
 925             IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 926                 if (connp->conn_ipv6_v6only) {
 927                         return (EADDRNOTAVAIL);
 928                 }
 929 
 930                 connp->conn_ipversion = IPV4_VERSION;
 931         }
 932 
 933         switch (dccp->dccp_state) {
 934         case DCCPS_LISTEN:
 935                 /*
 936                  * Listening sockets are not allowed to issue connect().
 937                  */
 938                 if (IPCL_IS_NONSTR(connp)) {
 939                         return (EOPNOTSUPP);
 940                 }
 941 
 942         case DCCPS_CLOSED:
 943                 /*
 944                  * We support quick connect.
 945                  */
 946                 /* FALLTHRU */
 947         case DCCPS_OPEN:
 948                 break;
 949 
 950         default:
 951                 return (-TOUTSTATE);
 952         }
 953 
 954         /*
 955          * We update our cred/cpid based on the caller of connect.
 956          */
 957         if (connp->conn_cred != cr) {
 958                 crhold(cr);
 959                 crfree(connp->conn_cred);
 960                 connp->conn_cred = cr;
 961         }
 962         connp->conn_cpid = pid;
 963 
 964         /* Cache things in the ixa without any refhold */
 965         ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
 966         ixa->ixa_cred = cr;
 967         ixa->ixa_cpid = pid;
 968 
 969         if (is_system_labeled()) {
 970                 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
 971         }
 972 
 973         if (connp->conn_family == AF_INET6) {
 974                 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 975                         error = dccp_connect_ipv6(dccp, &sin6->sin6_addr,
 976                             sin6->sin6_port, sin6->sin6_flowinfo,
 977                             sin6->__sin6_src_id, sin6->sin6_scope_id);
 978                 } else {
 979                         /*
 980                          * Destination adress is mapped IPv6 address.
 981                          * Source bound address should be unspecified or
 982                          * IPv6 mapped address as well.
 983                          */
 984                         if (!IN6_IS_ADDR_UNSPECIFIED(
 985                             &connp->conn_bound_addr_v6) &&
 986                             !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
 987                                 return (EADDRNOTAVAIL);
 988                         }
 989 
 990                         dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
 991                         dstport = sin6->sin6_port;
 992                         srcid = sin6->__sin6_src_id;
 993                         error = dccp_connect_ipv4(dccp, dstaddrp, dstport,
 994                             srcid);
 995                 }
 996         } else {
 997                 dstaddrp = &sin->sin_addr.s_addr;
 998                 dstport = sin->sin_port;
 999                 srcid = 0;
1000                 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid);
1001         }
1002 
1003         if (error != 0) {
1004                 cmn_err(CE_NOTE, "dccp_connect_ip failed");
1005                 goto connect_failed;
1006         }
1007 
1008         /* XXX cluster */
1009 
1010         /* Connect succeeded */
1011         DCCPS_BUMP_MIB(dccps, dccpActiveOpens);
1012         dccp->dccp_active_open = 1;
1013 
1014         DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
1015             connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
1016             int32_t, DCCPS_BOUND);
1017 
1018         DCCP_TIMER_RESTART(dccp, 100);
1019         req_mp = dccp_generate_request(connp);
1020         if (req_mp != NULL) {
1021                 /*
1022                  * We must bump the generation before sending the request
1023                  * to ensure that we use the right generation in case
1024                  * this thread issues a "connected" up call.
1025                  */
1026                 SOCK_CONNID_BUMP(dccp->dccp_connid);
1027 
1028                 DTRACE_DCCP5(connect__request, mblk_t *, NULL,
1029                     ip_xmit_attr_t *, connp->conn_ixa,
1030                     void_ip_t *, req_mp->b_rptr, dccp_t *, dccp,
1031                     dccpha_t *,
1032                     &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]);
1033 
1034                 dccp_send_data(dccp, req_mp);
1035         }
1036 
1037         return (0);
1038 
1039 connect_failed:
1040         cmn_err(CE_NOTE, "dccp_do_connect failed");
1041 
1042         connp->conn_faddr_v6 = ipv6_all_zeros;
1043         connp->conn_fport = 0;
1044         dccp->dccp_state = oldstate;
1045 
1046         /* XXX */
1047         return (error);
1048 }
1049 
1050 /*
1051  * Init values of a connection.
1052  */
1053 void
1054 dccp_init_values(dccp_t *dccp, dccp_t *parent)
1055 {
1056         conn_t          *connp = dccp->dccp_connp;
1057         dccp_stack_t    *dccps = dccp->dccp_dccps;
1058 
1059         connp->conn_mlp_type = mlptSingle;
1060 }
1061 
1062 /*
1063  * Free dccp structure.
1064  */
1065 void
1066 dccp_free(dccp_t *dccp)
1067 {
1068         conn_t  *connp = dccp->dccp_connp;
1069 
1070         cmn_err(CE_NOTE, "dccp.c: dccp_free");
1071 
1072         connp->conn_rq = NULL;
1073         connp->conn_wq = NULL;
1074 
1075         if (connp->conn_upper_handle != NULL) {
1076                 if (IPCL_IS_NONSTR(connp)) {
1077                         (*connp->conn_upcalls->su_closed)(
1078                             connp->conn_upper_handle);
1079                         dccp->dccp_detached = B_TRUE;
1080                 }
1081 
1082                 connp->conn_upper_handle = NULL;
1083                 connp->conn_upcalls = NULL;
1084         }
1085 }
1086 
1087 void *
1088 dccp_get_conn(void *arg, dccp_stack_t *dccps)
1089 {
1090         dccp_t          *dccp = NULL;
1091         conn_t          *connp;
1092         squeue_t        *sqp = (squeue_t *)arg;
1093         netstack_t      *ns;
1094 
1095         /* XXX timewait */
1096 
1097         connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP,
1098             dccps->dccps_netstack);
1099         if (connp == NULL) {
1100                 return (NULL);
1101         }
1102 
1103         dccp = connp->conn_dccp;
1104         dccp->dccp_dccps = dccps;
1105 
1106         /* List of features being negotated */
1107         list_create(&dccp->dccp_features, sizeof (dccp_feature_t),
1108             offsetof(dccp_feature_t, df_next));
1109 
1110         connp->conn_recv = dccp_input_data;
1111         connp->conn_recvicmp = dccp_icmp_input;
1112         connp->conn_verifyicmp = dccp_verifyicmp;
1113 
1114         connp->conn_ixa->ixa_notify = dccp_notify;
1115         connp->conn_ixa->ixa_notify_cookie = dccp;
1116 
1117         return ((void *)connp);
1118 }
1119 
1120 /*
1121  * IPv4 connect.
1122  */
1123 static int
1124 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport,
1125     uint_t srcid)
1126 {
1127         conn_t          *connp = dccp->dccp_connp;
1128         dccp_stack_t    *dccps = dccp->dccp_dccps;
1129         ipaddr_t        dstaddr = *dstaddrp;
1130         uint16_t        lport;
1131         int             error;
1132 
1133         cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4");
1134 
1135         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1136 
1137         if (dstaddr == INADDR_ANY) {
1138                 dstaddr = htonl(INADDR_LOOPBACK);
1139                 *dstaddrp = dstaddr;
1140         }
1141 
1142         /* Handle __sin6_src_id if socket not bound to an IP address */
1143         if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
1144                 ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
1145                     IPCL_ZONEID(connp), dccps->dccps_netstack);
1146                 connp->conn_saddr_v6 = connp->conn_laddr_v6;
1147         }
1148 
1149         IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
1150         connp->conn_fport = dstport;
1151 
1152         if (dccp->dccp_state == DCCPS_CLOSED) {
1153                 lport = dccp_update_next_port(dccps->dccps_next_port_to_try,
1154                     dccp, B_TRUE);
1155                 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0,
1156                     B_TRUE, B_FALSE, B_FALSE);
1157                 if (lport == 0) {
1158                         return (-TNOADDR);
1159                 }
1160         }
1161 
1162         error = dccp_set_destination(dccp);
1163         if (error != 0) {
1164                 return (error);
1165         }
1166 
1167         /*
1168          * Don't connect to oneself.
1169          */
1170         if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
1171             connp->conn_fport == connp->conn_lport) {
1172                 return (-TBADADDR);
1173         }
1174 
1175         dccp->dccp_state = DCCPS_REQUEST;
1176 
1177         return (ipcl_conn_insert_v4(connp));
1178 }
1179 
1180 /*
1181  * IPv6 connect.
1182  */
1183 static int
1184 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport,
1185     uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
1186 {
1187         cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6");
1188 
1189         return (0);
1190 }
1191 
1192 /*
1193  * Set the ports via conn_connect and build the template
1194  * header.
1195  */
1196 int
1197 dccp_set_destination(dccp_t *dccp)
1198 {
1199         conn_t          *connp = dccp->dccp_connp;
1200         dccp_stack_t    *dccps = dccp->dccp_dccps;
1201         iulp_t          uinfo;
1202         uint32_t        flags;
1203         int             error;
1204 
1205         flags = IPDF_LSO | IPDF_ZCOPY;
1206         flags |= IPDF_UNIQUE_DCE;
1207 
1208         mutex_enter(&connp->conn_lock);
1209         error = conn_connect(connp, &uinfo, flags);
1210         mutex_exit(&connp->conn_lock);
1211         if (error != 0) {
1212                 cmn_err(CE_NOTE, "conn_connect failed");
1213                 return (error);
1214         }
1215 
1216         error = dccp_build_hdrs(dccp);
1217         if (error != 0) {
1218                 cmn_err(CE_NOTE, "dccp_build_hdrs failed");
1219                 return (error);
1220         }
1221 
1222         /* XXX */
1223 
1224         /* Initialise the ISS */
1225         dccp_iss_init(dccp);
1226 
1227         mutex_enter(&connp->conn_lock);
1228         connp->conn_state_flags &= ~CONN_INCIPIENT;
1229         mutex_exit(&connp->conn_lock);
1230 
1231         return (0);
1232 }
1233 
1234 /*
1235  * Init the ISS.
1236  */
1237 static void
1238 dccp_iss_init(dccp_t *dccp)
1239 {
1240         cmn_err(CE_NOTE, "dccp.c: dccp_iss_init");
1241 
1242         dccp->dccp_iss += gethrtime();
1243         dccp->dccp_gss = dccp->dccp_iss;
1244 }