1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 David Hoeppner. All rights reserved. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/suntpi.h> 40 #include <sys/xti_inet.h> 41 #include <sys/squeue_impl.h> 42 #include <sys/squeue.h> 43 #include <sys/tsol/tnet.h> 44 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 48 #include <sys/cmn_err.h> 49 50 #include "dccp_impl.h" 51 52 static mblk_t *dccp_conn_create_v4(conn_t *, conn_t *, mblk_t *, 53 ip_recv_attr_t *); 54 static mblk_t *dccp_conn_create_v6(conn_t *, conn_t *, mblk_t *, 55 ip_recv_attr_t *); 56 static void dccp_input_listener(void *, mblk_t *, void *, ip_recv_attr_t *); 57 static void dccp_icmp_error_ipv6(dccp_t *, mblk_t *, ip_recv_attr_t *); 58 59 void 60 dccp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 61 { 62 conn_t *connp = (conn_t *)arg1; 63 dccp_t *dccp = connp->conn_dccp; 64 65 cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_input"); 66 67 /* Assume IP provides aligned packets */ 68 ASSERT(OK_32PTR(mp->b_rptr)); 69 ASSERT((MBLKL(mp) >= sizeof (ipha_t))); 70 71 /* Verify IP version. */ 72 if (!(ira->ira_flags & IRAF_IS_IPV4)) { 73 dccp_icmp_error_ipv6(dccp, mp, ira); 74 return; 75 } 76 77 } 78 79 static void 80 dccp_icmp_error_ipv6(dccp_t *dccp, mblk_t *mp, ip_recv_attr_t *ira) 81 { 82 cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_error_ipv6"); 83 } 84 85 void 86 dccp_rsrv(queue_t *q) 87 { 88 cmn_err(CE_NOTE, "dccp_input.c: dccp_rsrv"); 89 } 90 91 /* 92 * Handle a REQUEST on an AF_INET6 socket; can be either IPv4 or IPv6. 93 */ 94 static mblk_t * 95 dccp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp, 96 ip_recv_attr_t *ira) 97 { 98 dccp_t *ldccp = lconnp->conn_dccp; 99 dccp_t *dccp = connp->conn_dccp; 100 dccp_stack_t *dccps = dccp->dccp_dccps; 101 ipha_t *ipha; 102 ip6_t *ip6h; 103 mblk_t *tpi_mp; 104 sin6_t sin6; 105 uint_t ifindex = ira->ira_ruifindex; 106 107 if (ira->ira_flags & IRAF_IS_IPV4) { 108 ipha = (ipha_t *)mp->b_rptr; 109 110 connp->conn_ipversion = IPV4_VERSION; 111 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6); 112 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6); 113 connp->conn_saddr_v6 = connp->conn_laddr_v6; 114 115 sin6 = sin6_null; 116 sin6.sin6_addr = connp->conn_faddr_v6; 117 sin6.sin6_port = connp->conn_fport; 118 sin6.sin6_family = AF_INET6; 119 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6, 120 IPCL_ZONEID(lconnp), dccps->dccps_netstack); 121 122 if (connp->conn_recv_ancillary.crb_recvdstaddr) { 123 sin6_t sin6d; 124 125 sin6d = sin6_null; 126 sin6d.sin6_addr = connp->conn_laddr_v6; 127 sin6d.sin6_port = connp->conn_lport; 128 sin6d.sin6_family = AF_INET; 129 tpi_mp = mi_tpi_extconn_ind(NULL, 130 (char *)&sin6d, sizeof (sin6_t), 131 (char *)&dccp, 132 (t_scalar_t)sizeof (intptr_t), 133 (char *)&sin6d, sizeof (sin6_t), 134 (t_scalar_t)ldccp->dccp_conn_req_seqnum); 135 } else { 136 tpi_mp = mi_tpi_conn_ind(NULL, 137 (char *)&sin6, sizeof (sin6_t), 138 (char *)&dccp, (t_scalar_t)sizeof (intptr_t), 139 (t_scalar_t)ldccp->dccp_conn_req_seqnum); 140 } 141 } else { 142 ip6h = (ip6_t *)mp->b_rptr; 143 144 connp->conn_ipversion = IPV6_VERSION; 145 connp->conn_laddr_v6 = ip6h->ip6_dst; 146 connp->conn_faddr_v6 = ip6h->ip6_src; 147 connp->conn_saddr_v6 = connp->conn_laddr_v6; 148 149 sin6 = sin6_null; 150 sin6.sin6_addr = connp->conn_faddr_v6; 151 sin6.sin6_port = connp->conn_fport; 152 sin6.sin6_family = AF_INET6; 153 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 154 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6, 155 IPCL_ZONEID(lconnp), dccps->dccps_netstack); 156 157 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) { 158 /* Pass up the scope_id of remote addr */ 159 sin6.sin6_scope_id = ifindex; 160 } else { 161 sin6.sin6_scope_id = 0; 162 } 163 if (connp->conn_recv_ancillary.crb_recvdstaddr) { 164 sin6_t sin6d; 165 166 sin6d = sin6_null; 167 sin6.sin6_addr = connp->conn_laddr_v6; 168 sin6d.sin6_port = connp->conn_lport; 169 sin6d.sin6_family = AF_INET6; 170 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_laddr_v6)) 171 sin6d.sin6_scope_id = ifindex; 172 173 tpi_mp = mi_tpi_extconn_ind(NULL, 174 (char *)&sin6d, sizeof (sin6_t), 175 (char *)&dccp, (t_scalar_t)sizeof (intptr_t), 176 (char *)&sin6d, sizeof (sin6_t), 177 (t_scalar_t)ldccp->dccp_conn_req_seqnum); 178 } else { 179 tpi_mp = mi_tpi_conn_ind(NULL, 180 (char *)&sin6, sizeof (sin6_t), 181 (char *)&dccp, (t_scalar_t)sizeof (intptr_t), 182 (t_scalar_t)ldccp->dccp_conn_req_seqnum); 183 } 184 } 185 186 /* XXX mss */ 187 return (tpi_mp); 188 } 189 190 /* 191 * Handle a REQUEST on an AF_INET socket. 192 */ 193 static mblk_t * 194 dccp_conn_create_v4(conn_t *lconnp, conn_t *connp, mblk_t *mp, 195 ip_recv_attr_t *ira) 196 { 197 dccp_t *ldccp = lconnp->conn_dccp; 198 dccp_t *dccp = connp->conn_dccp; 199 dccp_stack_t *dccps = dccp->dccp_dccps; 200 ipha_t *ipha; 201 mblk_t *tpi_mp; 202 sin_t sin; 203 204 ASSERT(ira->ira_flags & IRAF_IS_IPV4); 205 ipha = (ipha_t *)mp->b_rptr; 206 207 connp->conn_ipversion = IPV4_VERSION; 208 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6); 209 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6); 210 connp->conn_saddr_v6 = connp->conn_laddr_v6; 211 212 sin = sin_null; 213 sin.sin_addr.s_addr = connp->conn_faddr_v4; 214 sin.sin_port = connp->conn_fport; 215 sin.sin_family = AF_INET; 216 217 if (lconnp->conn_recv_ancillary.crb_recvdstaddr) { 218 cmn_err(CE_NOTE, "ancillary"); 219 220 sin_t sind; 221 222 sind = sin_null; 223 sind.sin_addr.s_addr = connp->conn_laddr_v4; 224 sind.sin_port = connp->conn_lport; 225 sind.sin_family = AF_INET; 226 227 tpi_mp = mi_tpi_extconn_ind(NULL, 228 (char *)&sind, sizeof (sin_t), (char *)&dccp, 229 (t_scalar_t)sizeof (intptr_t), (char *)&sind, 230 sizeof (sin_t), (t_scalar_t)ldccp->dccp_conn_req_seqnum); 231 232 } else { 233 tpi_mp = mi_tpi_conn_ind(NULL, 234 (char *)&sin, sizeof (sin_t), 235 (char *)&dccp, (t_scalar_t)sizeof (intptr_t), 236 (t_scalar_t)ldccp->dccp_conn_req_seqnum); 237 } 238 239 /* XXX mss */ 240 241 return (tpi_mp); 242 } 243 244 static void 245 dccp_input_listener(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 246 { 247 conn_t *lconnp = (conn_t *)arg; 248 conn_t *econnp; 249 dccp_t *listener = lconnp->conn_dccp; 250 dccp_t *eager; 251 dccp_stack_t *dccps = listener->dccp_dccps; 252 ip_stack_t *ipst = dccps->dccps_netstack->netstack_ip; 253 dccpha_t *dccpha; 254 squeue_t *new_sqp; 255 mblk_t *tpi_mp; 256 mblk_t *mp1; 257 uint_t ifindex = ira->ira_ruifindex; 258 uint_t ip_hdr_len; 259 uint_t type; 260 int error; 261 262 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener"); 263 264 ip_hdr_len = ira->ira_ip_hdr_length; 265 dccpha = (dccpha_t *)&mp->b_rptr[ip_hdr_len]; 266 type = (uint_t)dccpha->dha_type; 267 268 DTRACE_DCCP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, lconnp->conn_ixa, 269 __dtrace_dccp_void_ip_t *, mp->b_rptr, dccp_t *, listener, 270 __dtrace_dccp_dccph_t *, dccpha); 271 272 if (type != DCCP_PKT_REQUEST) { 273 cmn_err(CE_NOTE, "not request pkt"); 274 275 /* XXX do something with a reset packet sent? */ 276 freemsg(mp); 277 return; 278 } 279 280 /* XXX memory pressure */ 281 282 /* XXX request defense */ 283 284 /* XXX number of connections per listener */ 285 286 ASSERT(ira->ira_sqp != NULL); 287 new_sqp = ira->ira_sqp; 288 289 econnp = (conn_t *)dccp_get_conn(arg2, dccps); 290 if (econnp == NULL) { 291 cmn_err(CE_NOTE, "econnp not found (eager)"); 292 goto error2; 293 } 294 295 ASSERT(econnp->conn_netstack == lconnp->conn_netstack); 296 econnp->conn_sqp = new_sqp; 297 econnp->conn_initial_sqp = new_sqp; 298 econnp->conn_ixa->ixa_sqp = new_sqp; 299 300 econnp->conn_fport = dccpha->dha_lport; 301 econnp->conn_lport = dccpha->dha_fport; 302 303 error = conn_inherit_parent(lconnp, econnp); 304 if (error != 0) { 305 cmn_err(CE_NOTE, "conn_inherit_parent failed"); 306 goto error3; 307 } 308 309 /* We already know the laddr of the new connection is ours */ 310 econnp->conn_ixa->ixa_src_generation = ipst->ips_src_generation; 311 312 ASSERT(OK_32PTR(mp->b_rptr)); 313 ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION || 314 IPH_HDR_VERSION(mp->b_rptr) == IPV6_VERSION); 315 316 if (lconnp->conn_family == AF_INET) { 317 ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION); 318 tpi_mp = dccp_conn_create_v4(lconnp, econnp, mp, ira); 319 } else { 320 tpi_mp = dccp_conn_create_v6(lconnp, econnp, mp, ira); 321 } 322 323 if (tpi_mp == NULL) { 324 cmn_err(CE_NOTE, "tpi_mo == NULL"); 325 goto error3; 326 } 327 328 eager = econnp->conn_dccp; 329 SOCK_CONNID_INIT(eager->dccp_connid); 330 331 dccp_init_values(eager, listener); 332 333 ASSERT((econnp->conn_ixa->ixa_flags & 334 (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 335 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO)) == 336 (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 337 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO)); 338 339 if (!(ira->ira_flags & IRAF_IS_IPV4) && econnp->conn_bound_if == 0) { 340 if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_faddr_v6) || 341 IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6)) { 342 econnp->conn_incoming_ifindex = ifindex; 343 econnp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 344 econnp->conn_ixa->ixa_scopeid = ifindex; 345 } 346 } 347 348 if (ira->ira_cred != NULL) { 349 mblk_setcred(tpi_mp, ira->ira_cred, ira->ira_cpid); 350 } 351 352 if (IPCL_IS_NONSTR(lconnp)) { 353 econnp->conn_flags |= IPCL_NONSTR; 354 } 355 356 /* XXX dccps is right? */ 357 dccp_bind_hash_insert(&dccps->dccps_bind_fanout[ 358 DCCP_BIND_HASH(econnp->conn_lport, dccps->dccps_bind_fanout_size)], eager, 0); 359 360 /* XXX CLUSTER */ 361 362 SOCK_CONNID_BUMP(eager->dccp_connid); 363 364 error = dccp_set_destination(eager); 365 if (error != 0) { 366 cmn_err(CE_NOTE, "dccp_set_destination failed."); 367 dccp_bind_hash_remove(eager); 368 goto error3; 369 } 370 371 /* Process all DCCP options */ 372 dccp_process_options(eager, dccpha); 373 374 CONN_INC_REF(lconnp); 375 eager->dccp_conn_req_seqnum = listener->dccp_conn_req_seqnum; 376 if (++listener->dccp_conn_req_seqnum == -1) { 377 /* 378 * -1 is "special" and defined in TPI as something 379 * that should never be used in T_CONN_IND 380 */ 381 ++listener->dccp_conn_req_seqnum; 382 } 383 384 /* XXX SYN DEFENSE */ 385 386 eager->dccp_state = DCCPS_REQUEST_RCVD; 387 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *, 388 econnp->conn_ixa, void, NULL, dccp_t *, eager, void, NULL, 389 int32_t, DCCPS_LISTEN); 390 391 /* 392 mp1 = dccp_xmit_mp(eager, eager->dccp_xmit_head, 0, 393 NULL, NULL, 0, B_FALSE, NULL, B_FALSE); 394 */ 395 mp1 = dccp_generate_packet(econnp, mp); 396 if (mp1 == NULL) { 397 cmn_err(CE_NOTE, "dccp_generate_packet failed"); 398 /* 399 * Increment the ref count as we are going to 400 * enqueueing an mp in squeue 401 */ 402 CONN_INC_REF(econnp); 403 goto error; 404 } 405 406 CONN_INC_REF(econnp); 407 408 error = ipcl_conn_insert(econnp); 409 if (error != 0) { 410 cmn_err(CE_NOTE, "ipcl_conn_insert(econnp) failed"); 411 goto error; 412 } 413 414 ASSERT(econnp->conn_ixa->ixa_notify_cookie == econnp->conn_dccp); 415 freemsg(mp); 416 417 /* 418 * Send the SYN-ACK. Use the right squeue so that conn_ixa is 419 * only used by one thread at a time. 420 */ 421 if (econnp->conn_sqp == lconnp->conn_sqp) { 422 DTRACE_TCP5(send, mblk_t *, NULL, ip_xmit_attr_t *, 423 econnp->conn_ixa, __dtrace_dccp_void_ip_t *, mp1->b_rptr, 424 dccp_t *, eager, __dtrace_dccp_dccph_t *, 425 &mp1->b_rptr[econnp->conn_ixa->ixa_ip_hdr_length]); 426 (void) conn_ip_output(mp1, econnp->conn_ixa); 427 CONN_DEC_REF(econnp); 428 } else { 429 SQUEUE_ENTER_ONE(econnp->conn_sqp, mp1, dccp_send_synack, 430 econnp, NULL, SQ_PROCESS, SQTAG_TCP_SEND_SYNACK); /* XXX */ 431 } 432 433 return; 434 error: 435 freemsg(mp1); 436 error2: 437 CONN_DEC_REF(econnp); 438 error3: 439 freemsg(mp); 440 } 441 442 void 443 dccp_input_listener_unbound(void *arg, mblk_t *mp, void *arg2, 444 ip_recv_attr_t *ira) 445 { 446 conn_t *connp = (conn_t *)arg; 447 squeue_t *sqp = (squeue_t *)arg2; 448 squeue_t *new_sqp; 449 uint32_t conn_flags; 450 451 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener_unbound"); 452 453 ASSERT(ira->ira_sqp != NULL); 454 new_sqp = ira->ira_sqp; 455 456 if (connp->conn_fanout == NULL) { 457 goto done; 458 } 459 460 /* 461 * Bind to correct squeue. 462 */ 463 if (!(connp->conn_flags & IPCL_FULLY_BOUND)) { 464 cmn_err(CE_NOTE, "not fully bound"); 465 466 mutex_enter(&connp->conn_fanout->connf_lock); 467 mutex_enter(&connp->conn_lock); 468 469 if (connp->conn_ref != 4 || 470 connp->conn_dccp->dccp_state != DCCPS_LISTEN) { 471 mutex_exit(&connp->conn_lock); 472 mutex_exit(&connp->conn_fanout->connf_lock); 473 goto done; 474 } 475 476 if (connp->conn_sqp != new_sqp) { 477 while (connp->conn_sqp != new_sqp) { 478 (void) casptr(&connp->conn_sqp, sqp, new_sqp); 479 } 480 connp->conn_ixa->ixa_sqp = new_sqp; 481 } 482 483 do { 484 conn_flags = connp->conn_flags; 485 conn_flags |= IPCL_FULLY_BOUND; 486 (void) cas32(&connp->conn_flags, connp->conn_flags, 487 conn_flags); 488 } while (!(connp->conn_flags & IPCL_FULLY_BOUND)); 489 490 mutex_exit(&connp->conn_lock); 491 mutex_exit(&connp->conn_fanout->connf_lock); 492 493 connp->conn_recv = dccp_input_listener; 494 } 495 496 done: 497 if (connp->conn_sqp != sqp) { 498 CONN_INC_REF(connp); 499 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp, 500 ira, SQ_FILL, SQTAG_DCCP_CONN_REQ_UNBOUND); 501 } else { 502 dccp_input_listener(connp, mp, sqp, ira); 503 } 504 } 505 506 boolean_t 507 dccp_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 508 ip_recv_attr_t *ira) 509 { 510 cmn_err(CE_NOTE, "dccp_input.c: dccp_verifyicmp"); 511 512 return (B_TRUE); 513 } 514 515 /* 516 * After a request-response-ack all packets end up here. 517 */ 518 void 519 dccp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 520 { 521 conn_t *connp = (conn_t *)arg; 522 squeue_t *sqp = (squeue_t *)arg2; 523 dccp_t *dccp = connp->conn_dccp; 524 dccp_stack_t *dccps = dccp->dccp_dccps; 525 dccpha_t *dccpha; 526 uchar_t *iphdr; 527 uchar_t *rptr; 528 sock_upcalls_t *sockupcalls; 529 uint_t ip_hdr_len; 530 531 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_data"); 532 533 iphdr = mp->b_rptr; 534 rptr = mp->b_rptr; 535 ASSERT(OK_32PTR(rptr)); 536 537 ip_hdr_len = ira->ira_ip_hdr_length; 538 539 ASSERT(DB_TYPE(mp) == M_DATA); 540 ASSERT(mp->b_next == NULL); 541 542 dccpha = (dccpha_t *)&rptr[ip_hdr_len]; 543 } 544