1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2012 David Hoeppner.  All rights reserved.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/stream.h>
  33 #include <sys/strsun.h>
  34 #include <sys/strsubr.h>
  35 #include <sys/stropts.h>
  36 #include <sys/strlog.h>
  37 #define _SUN_TPI_VERSION 2
  38 #include <sys/tihdr.h>
  39 #include <sys/suntpi.h>
  40 #include <sys/xti_inet.h>
  41 #include <sys/squeue_impl.h>
  42 #include <sys/squeue.h>
  43 #include <sys/tsol/tnet.h>
  44 
  45 #include <inet/common.h>
  46 #include <inet/ip.h>
  47 
  48 #include <sys/cmn_err.h>
  49 
  50 #include "dccp_impl.h"
  51 
  52 static mblk_t   *dccp_conn_create_v4(conn_t *, conn_t *, mblk_t *,
  53                     ip_recv_attr_t *);
  54 static mblk_t   *dccp_conn_create_v6(conn_t *, conn_t *, mblk_t *,
  55                     ip_recv_attr_t *);
  56 static void     dccp_input_listener(void *, mblk_t *, void *, ip_recv_attr_t *);
  57 static void     dccp_icmp_error_ipv6(dccp_t *, mblk_t *, ip_recv_attr_t *);
  58 
  59 void
  60 dccp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
  61 {
  62         conn_t  *connp = (conn_t *)arg1;
  63         dccp_t  *dccp = connp->conn_dccp;
  64 
  65         cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_input");
  66 
  67         /* Assume IP provides aligned packets */
  68         ASSERT(OK_32PTR(mp->b_rptr));
  69         ASSERT((MBLKL(mp) >= sizeof (ipha_t)));
  70 
  71         /* Verify IP version. */
  72         if (!(ira->ira_flags & IRAF_IS_IPV4)) {
  73                 dccp_icmp_error_ipv6(dccp, mp, ira);
  74                 return;
  75         }
  76 
  77 }
  78 
  79 static void
  80 dccp_icmp_error_ipv6(dccp_t *dccp, mblk_t *mp, ip_recv_attr_t *ira)
  81 {
  82         cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_error_ipv6");
  83 }
  84 
  85 void
  86 dccp_rsrv(queue_t *q)
  87 {
  88         cmn_err(CE_NOTE, "dccp_input.c: dccp_rsrv");
  89 }
  90 
  91 /*
  92  * Handle a REQUEST on an AF_INET6 socket; can be either IPv4 or IPv6.
  93  */
  94 static mblk_t *
  95 dccp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
  96     ip_recv_attr_t *ira)
  97 {
  98         dccp_t          *ldccp = lconnp->conn_dccp;
  99         dccp_t          *dccp = connp->conn_dccp;
 100         dccp_stack_t    *dccps = dccp->dccp_dccps;
 101         ipha_t          *ipha;
 102         ip6_t           *ip6h;
 103         mblk_t          *tpi_mp;
 104         sin6_t          sin6;
 105         uint_t          ifindex = ira->ira_ruifindex;
 106 
 107         if (ira->ira_flags & IRAF_IS_IPV4) {
 108                 ipha = (ipha_t *)mp->b_rptr;
 109 
 110                 connp->conn_ipversion = IPV4_VERSION;
 111                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
 112                 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
 113                 connp->conn_saddr_v6 = connp->conn_laddr_v6;
 114 
 115                 sin6 = sin6_null;
 116                 sin6.sin6_addr = connp->conn_faddr_v6;
 117                 sin6.sin6_port = connp->conn_fport;
 118                 sin6.sin6_family = AF_INET6;
 119                 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
 120                     IPCL_ZONEID(lconnp), dccps->dccps_netstack);
 121 
 122                 if (connp->conn_recv_ancillary.crb_recvdstaddr) {
 123                         sin6_t  sin6d;
 124 
 125                         sin6d = sin6_null;
 126                         sin6d.sin6_addr = connp->conn_laddr_v6;
 127                         sin6d.sin6_port = connp->conn_lport;
 128                         sin6d.sin6_family = AF_INET;
 129                         tpi_mp = mi_tpi_extconn_ind(NULL,
 130                             (char *)&sin6d, sizeof (sin6_t),
 131                             (char *)&dccp,
 132                             (t_scalar_t)sizeof (intptr_t),
 133                             (char *)&sin6d, sizeof (sin6_t),
 134                             (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 135                 } else {
 136                         tpi_mp = mi_tpi_conn_ind(NULL,
 137                             (char *)&sin6, sizeof (sin6_t),
 138                             (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
 139                             (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 140                 }
 141         } else {
 142                 ip6h = (ip6_t *)mp->b_rptr;
 143 
 144                 connp->conn_ipversion = IPV6_VERSION;
 145                 connp->conn_laddr_v6 = ip6h->ip6_dst;
 146                 connp->conn_faddr_v6 = ip6h->ip6_src;
 147                 connp->conn_saddr_v6 = connp->conn_laddr_v6;
 148 
 149                 sin6 = sin6_null;
 150                 sin6.sin6_addr = connp->conn_faddr_v6;
 151                 sin6.sin6_port = connp->conn_fport;
 152                 sin6.sin6_family = AF_INET6;
 153                 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
 154                 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
 155                     IPCL_ZONEID(lconnp), dccps->dccps_netstack);
 156 
 157                 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
 158                         /* Pass up the scope_id of remote addr */
 159                         sin6.sin6_scope_id = ifindex;
 160                 } else {
 161                         sin6.sin6_scope_id = 0;
 162                 }
 163                 if (connp->conn_recv_ancillary.crb_recvdstaddr) {
 164                         sin6_t  sin6d;
 165 
 166                         sin6d = sin6_null;
 167                         sin6.sin6_addr = connp->conn_laddr_v6;
 168                         sin6d.sin6_port = connp->conn_lport;
 169                         sin6d.sin6_family = AF_INET6;
 170                         if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_laddr_v6))
 171                                 sin6d.sin6_scope_id = ifindex;
 172 
 173                         tpi_mp = mi_tpi_extconn_ind(NULL,
 174                             (char *)&sin6d, sizeof (sin6_t),
 175                             (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
 176                             (char *)&sin6d, sizeof (sin6_t),
 177                             (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 178                 } else {
 179                         tpi_mp = mi_tpi_conn_ind(NULL,
 180                             (char *)&sin6, sizeof (sin6_t),
 181                             (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
 182                             (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 183                 }
 184         }
 185 
 186         /* XXX mss */
 187         return (tpi_mp);
 188 }
 189 
 190 /*
 191  * Handle a REQUEST on an AF_INET socket.
 192  */
 193 static mblk_t *
 194 dccp_conn_create_v4(conn_t *lconnp, conn_t *connp, mblk_t *mp,
 195     ip_recv_attr_t *ira)
 196 {
 197         dccp_t          *ldccp = lconnp->conn_dccp;
 198         dccp_t          *dccp = connp->conn_dccp;
 199         dccp_stack_t    *dccps = dccp->dccp_dccps;
 200         ipha_t          *ipha;
 201         mblk_t          *tpi_mp;
 202         sin_t           sin;
 203 
 204         ASSERT(ira->ira_flags & IRAF_IS_IPV4);
 205         ipha = (ipha_t *)mp->b_rptr;
 206 
 207         connp->conn_ipversion = IPV4_VERSION;
 208         IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
 209         IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
 210         connp->conn_saddr_v6 = connp->conn_laddr_v6;
 211 
 212         sin = sin_null;
 213         sin.sin_addr.s_addr = connp->conn_faddr_v4;
 214         sin.sin_port = connp->conn_fport;
 215         sin.sin_family = AF_INET;
 216 
 217         if (lconnp->conn_recv_ancillary.crb_recvdstaddr) {
 218                 cmn_err(CE_NOTE, "ancillary");
 219 
 220                 sin_t   sind;
 221 
 222                 sind = sin_null;
 223                 sind.sin_addr.s_addr = connp->conn_laddr_v4;
 224                 sind.sin_port = connp->conn_lport;
 225                 sind.sin_family = AF_INET;
 226 
 227                 tpi_mp = mi_tpi_extconn_ind(NULL,
 228                     (char *)&sind, sizeof (sin_t), (char *)&dccp,
 229                     (t_scalar_t)sizeof (intptr_t), (char *)&sind,
 230                     sizeof (sin_t), (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 231 
 232         } else {
 233                 tpi_mp = mi_tpi_conn_ind(NULL,
 234                 (char *)&sin, sizeof (sin_t),
 235                 (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
 236                 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
 237         }
 238 
 239         /* XXX mss */
 240 
 241         return (tpi_mp);
 242 }
 243 
 244 static void
 245 dccp_input_listener(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 246 {
 247         conn_t          *lconnp = (conn_t *)arg;
 248         conn_t          *econnp;
 249         dccp_t          *listener = lconnp->conn_dccp;
 250         dccp_t          *eager;
 251         dccp_stack_t    *dccps = listener->dccp_dccps;
 252         ip_stack_t      *ipst = dccps->dccps_netstack->netstack_ip;
 253         dccpha_t        *dccpha;
 254         squeue_t        *new_sqp;
 255         mblk_t          *tpi_mp;
 256         mblk_t          *mp1;
 257         uint_t          ifindex = ira->ira_ruifindex;
 258         uint_t          ip_hdr_len;
 259         uint_t          type;
 260         int             error;
 261 
 262         cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener");
 263 
 264         ip_hdr_len = ira->ira_ip_hdr_length;
 265         dccpha = (dccpha_t *)&mp->b_rptr[ip_hdr_len];
 266         type = (uint_t)dccpha->dha_type;
 267 
 268         DTRACE_DCCP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, lconnp->conn_ixa,
 269             __dtrace_dccp_void_ip_t *, mp->b_rptr, dccp_t *, listener,
 270             __dtrace_dccp_dccph_t *, dccpha);
 271 
 272         if (type != DCCP_PKT_REQUEST) {
 273                 cmn_err(CE_NOTE, "not request pkt");
 274 
 275                 /* XXX do something with a reset packet sent? */
 276                 freemsg(mp);
 277                 return;
 278         }
 279 
 280         /* XXX memory pressure */
 281 
 282         /* XXX request defense */
 283 
 284         /* XXX number of connections per listener */
 285 
 286         ASSERT(ira->ira_sqp != NULL);
 287         new_sqp = ira->ira_sqp;
 288 
 289         econnp = (conn_t *)dccp_get_conn(arg2, dccps);
 290         if (econnp == NULL) {
 291                 cmn_err(CE_NOTE, "econnp not found (eager)");
 292                 goto error2;
 293         }
 294 
 295         ASSERT(econnp->conn_netstack == lconnp->conn_netstack);
 296         econnp->conn_sqp = new_sqp;
 297         econnp->conn_initial_sqp = new_sqp;
 298         econnp->conn_ixa->ixa_sqp = new_sqp;
 299 
 300         econnp->conn_fport = dccpha->dha_lport;
 301         econnp->conn_lport = dccpha->dha_fport;
 302 
 303         error = conn_inherit_parent(lconnp, econnp);
 304         if (error != 0) {
 305                 cmn_err(CE_NOTE, "conn_inherit_parent failed");
 306                 goto error3;
 307         }
 308 
 309         /* We already know the laddr of the new connection is ours */
 310         econnp->conn_ixa->ixa_src_generation = ipst->ips_src_generation;
 311 
 312         ASSERT(OK_32PTR(mp->b_rptr));
 313         ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ||
 314             IPH_HDR_VERSION(mp->b_rptr) == IPV6_VERSION);
 315 
 316         if (lconnp->conn_family == AF_INET) {
 317                 ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
 318                 tpi_mp = dccp_conn_create_v4(lconnp, econnp, mp, ira);
 319         } else {
 320                 tpi_mp = dccp_conn_create_v6(lconnp, econnp, mp, ira);
 321         }
 322 
 323         if (tpi_mp == NULL) {
 324                 cmn_err(CE_NOTE, "tpi_mo == NULL");
 325                 goto error3;
 326         }
 327 
 328         eager = econnp->conn_dccp;
 329         SOCK_CONNID_INIT(eager->dccp_connid);
 330 
 331         dccp_init_values(eager, listener);
 332 
 333         ASSERT((econnp->conn_ixa->ixa_flags &
 334             (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
 335             IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO)) ==
 336             (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
 337             IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO));
 338 
 339         if (!(ira->ira_flags & IRAF_IS_IPV4) && econnp->conn_bound_if == 0) {
 340                 if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_faddr_v6) ||
 341                     IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6)) {
 342                         econnp->conn_incoming_ifindex = ifindex;
 343                         econnp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
 344                         econnp->conn_ixa->ixa_scopeid = ifindex;
 345                 }
 346         }
 347 
 348         if (ira->ira_cred != NULL) {
 349                 mblk_setcred(tpi_mp, ira->ira_cred, ira->ira_cpid);
 350         }
 351 
 352         if (IPCL_IS_NONSTR(lconnp)) {
 353                 econnp->conn_flags |= IPCL_NONSTR;
 354         }
 355 
 356         /* XXX dccps is right? */
 357         dccp_bind_hash_insert(&dccps->dccps_bind_fanout[
 358             DCCP_BIND_HASH(econnp->conn_lport, dccps->dccps_bind_fanout_size)], eager, 0);
 359 
 360         /* XXX CLUSTER */
 361 
 362         SOCK_CONNID_BUMP(eager->dccp_connid);
 363 
 364         error = dccp_set_destination(eager);
 365         if (error != 0) {
 366                 cmn_err(CE_NOTE, "dccp_set_destination failed.");
 367                 dccp_bind_hash_remove(eager);
 368                 goto error3;
 369         }
 370 
 371         /* Process all DCCP options */
 372         dccp_process_options(eager, dccpha);
 373 
 374         CONN_INC_REF(lconnp);
 375         eager->dccp_conn_req_seqnum = listener->dccp_conn_req_seqnum;
 376         if (++listener->dccp_conn_req_seqnum == -1) {
 377                 /*
 378                  * -1 is "special" and defined in TPI as something
 379                  * that should never be used in T_CONN_IND
 380                  */
 381                 ++listener->dccp_conn_req_seqnum;
 382         }
 383 
 384         /* XXX SYN DEFENSE */
 385 
 386         eager->dccp_state = DCCPS_REQUEST_RCVD;
 387         DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
 388             econnp->conn_ixa, void, NULL, dccp_t *, eager, void, NULL,
 389             int32_t, DCCPS_LISTEN);
 390 
 391 /*
 392         mp1 = dccp_xmit_mp(eager, eager->dccp_xmit_head, 0,
 393             NULL, NULL, 0, B_FALSE, NULL, B_FALSE);
 394 */
 395         mp1 = dccp_generate_packet(econnp, mp);
 396         if (mp1 == NULL) {
 397                 cmn_err(CE_NOTE, "dccp_generate_packet failed");
 398                 /*
 399                  * Increment the ref count as we are going to
 400                  * enqueueing an mp in squeue
 401                  */
 402                 CONN_INC_REF(econnp);
 403                 goto error;
 404         }
 405 
 406         CONN_INC_REF(econnp);
 407 
 408         error = ipcl_conn_insert(econnp);
 409         if (error != 0) {
 410                 cmn_err(CE_NOTE, "ipcl_conn_insert(econnp) failed");
 411                 goto error;
 412         }
 413 
 414         ASSERT(econnp->conn_ixa->ixa_notify_cookie == econnp->conn_dccp);
 415         freemsg(mp);
 416 
 417         /*
 418          * Send the SYN-ACK. Use the right squeue so that conn_ixa is
 419          * only used by one thread at a time.
 420          */
 421         if (econnp->conn_sqp == lconnp->conn_sqp) {
 422                 DTRACE_TCP5(send, mblk_t *, NULL, ip_xmit_attr_t *,
 423                     econnp->conn_ixa, __dtrace_dccp_void_ip_t *, mp1->b_rptr,
 424                     dccp_t *, eager, __dtrace_dccp_dccph_t *,
 425                     &mp1->b_rptr[econnp->conn_ixa->ixa_ip_hdr_length]);
 426                 (void) conn_ip_output(mp1, econnp->conn_ixa);
 427                 CONN_DEC_REF(econnp);
 428         } else {
 429                 SQUEUE_ENTER_ONE(econnp->conn_sqp, mp1, dccp_send_synack,
 430                     econnp, NULL, SQ_PROCESS, SQTAG_TCP_SEND_SYNACK); /* XXX */
 431         }
 432 
 433         return;
 434 error:
 435         freemsg(mp1);
 436 error2:
 437         CONN_DEC_REF(econnp);
 438 error3:
 439         freemsg(mp);
 440 }
 441 
 442 void
 443 dccp_input_listener_unbound(void *arg, mblk_t *mp, void *arg2,
 444     ip_recv_attr_t *ira)
 445 {
 446         conn_t          *connp = (conn_t *)arg;
 447         squeue_t        *sqp = (squeue_t *)arg2;
 448         squeue_t        *new_sqp;
 449         uint32_t        conn_flags;
 450 
 451         cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener_unbound");
 452 
 453         ASSERT(ira->ira_sqp != NULL);
 454         new_sqp = ira->ira_sqp;
 455 
 456         if (connp->conn_fanout == NULL) {
 457                 goto done;
 458         }
 459 
 460         /*
 461          * Bind to correct squeue.
 462          */
 463         if (!(connp->conn_flags & IPCL_FULLY_BOUND)) {
 464                 cmn_err(CE_NOTE, "not fully bound");
 465 
 466                 mutex_enter(&connp->conn_fanout->connf_lock);
 467                 mutex_enter(&connp->conn_lock);
 468 
 469                 if (connp->conn_ref != 4 ||
 470                     connp->conn_dccp->dccp_state != DCCPS_LISTEN) {
 471                         mutex_exit(&connp->conn_lock);
 472                         mutex_exit(&connp->conn_fanout->connf_lock);
 473                         goto done;
 474                 }
 475 
 476                 if (connp->conn_sqp != new_sqp) {
 477                         while (connp->conn_sqp != new_sqp) {
 478                                 (void) casptr(&connp->conn_sqp, sqp, new_sqp);
 479                         }
 480                         connp->conn_ixa->ixa_sqp = new_sqp;
 481                 }
 482 
 483                 do {
 484                         conn_flags = connp->conn_flags;
 485                         conn_flags |= IPCL_FULLY_BOUND;
 486                         (void) cas32(&connp->conn_flags, connp->conn_flags,
 487                             conn_flags);
 488                 } while (!(connp->conn_flags & IPCL_FULLY_BOUND));
 489 
 490                 mutex_exit(&connp->conn_lock);
 491                 mutex_exit(&connp->conn_fanout->connf_lock);
 492 
 493                 connp->conn_recv = dccp_input_listener;
 494         }
 495 
 496 done:
 497         if (connp->conn_sqp != sqp) {
 498                 CONN_INC_REF(connp);
 499                 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp,
 500                     ira, SQ_FILL, SQTAG_DCCP_CONN_REQ_UNBOUND);
 501         } else {
 502                 dccp_input_listener(connp, mp, sqp, ira);
 503         }
 504 }
 505 
 506 boolean_t
 507 dccp_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
 508     ip_recv_attr_t *ira)
 509 {
 510         cmn_err(CE_NOTE, "dccp_input.c: dccp_verifyicmp");
 511 
 512         return (B_TRUE);
 513 }
 514 
 515 /*
 516  * After a request-response-ack all packets end up here.
 517  */
 518 void
 519 dccp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
 520 {
 521         conn_t          *connp = (conn_t *)arg;
 522         squeue_t        *sqp = (squeue_t *)arg2;
 523         dccp_t          *dccp = connp->conn_dccp;
 524         dccp_stack_t    *dccps = dccp->dccp_dccps;
 525         dccpha_t        *dccpha;
 526         uchar_t         *iphdr;
 527         uchar_t         *rptr;
 528         sock_upcalls_t  *sockupcalls;
 529         uint_t          ip_hdr_len;
 530 
 531         cmn_err(CE_NOTE, "dccp_input.c: dccp_input_data");
 532 
 533         iphdr = mp->b_rptr;
 534         rptr = mp->b_rptr;
 535         ASSERT(OK_32PTR(rptr));
 536 
 537         ip_hdr_len = ira->ira_ip_hdr_length;
 538 
 539         ASSERT(DB_TYPE(mp) == M_DATA);
 540         ASSERT(mp->b_next == NULL);
 541 
 542         dccpha = (dccpha_t *)&rptr[ip_hdr_len];
 543 }
 544