1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 David Hoeppner. All rights reserved. 29 */ 30 31 /* 32 * This file implements the Data Congestion Control Protocol (DCCP). 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/strlog.h> 39 #include <sys/strsun.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sockio.h> 45 #include <sys/priv.h> 46 #include <sys/vtrace.h> 47 #include <sys/sdt.h> 48 #include <sys/debug.h> 49 #include <sys/ddi.h> 50 #include <sys/isa_defs.h> 51 #include <sys/policy.h> 52 #include <sys/tsol/label.h> 53 #include <sys/tsol/tnet.h> 54 #include <inet/dccp_impl.h> 55 #include <inet/dccp_stack.h> 56 #include <inet/kstatcom.h> 57 #include <inet/snmpcom.h> 58 59 #include <sys/cmn_err.h> 60 61 int dccp_squeue_flag; 62 63 /* Setable in /etc/system */ 64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE; 65 66 static void dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 67 ixa_notify_arg_t); 68 69 /* Functions to register netstack */ 70 static void *dccp_stack_init(netstackid_t, netstack_t *); 71 static void dccp_stack_fini(netstackid_t, void *); 72 73 /* Stream device open functions */ 74 static int dccp_openv4(queue_t *, dev_t *, int, int, cred_t *); 75 static int dccp_openv6(queue_t *, dev_t *, int, int, cred_t *); 76 static int dccp_open(queue_t *, dev_t *, int, int, cred_t *, 77 boolean_t); 78 79 /* Write service routine */ 80 static void dccp_wsrv(queue_t *); 81 82 /* Connection related functions */ 83 static int dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t); 84 static int dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t, 85 uint_t, uint32_t); 86 87 /* Initialise ISS */ 88 static void dccp_iss_init(dccp_t *); 89 90 struct module_info dccp_rinfo = { 91 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER, 92 DCCP_RECV_LOWATER 93 }; 94 95 static struct module_info dccp_winfo = { 96 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16 97 }; 98 99 /* 100 * Queue information structure with DCCP entry points. 101 */ 102 struct qinit dccp_rinitv4 = { 103 NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo 104 }; 105 106 struct qinit dccp_rinitv6 = { 107 NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo 108 }; 109 110 struct qinit dccp_winit = { 111 (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo 112 }; 113 114 /* Initial entry point for TCP in socket mode */ 115 struct qinit dccp_sock_winit = { 116 (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo 117 }; 118 119 struct qinit dccp_fallback_sock_winit = { 120 (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo 121 }; 122 /* 123 * DCCP as acceptor STREAM. 124 */ 125 struct qinit dccp_acceptor_rinit = { 126 NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo 127 }; 128 129 struct qinit dccp_acceptor_winit = { 130 (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo 131 }; 132 133 /* AF_INET /dev/dccp */ 134 struct streamtab dccpinfov4 = { 135 &dccp_rinitv4, &dccp_winit 136 }; 137 138 /* AF_INET6 /dev/dccp6 */ 139 struct streamtab dccpinfov6 = { 140 &dccp_rinitv6, &dccp_winit 141 }; 142 143 /* Template for response to info request */ 144 struct T_info_ack dccp_g_t_info_ack = { 145 T_INFO_ACK, /* PRIM_type */ 146 0, /* TSDU_size */ 147 T_INFINITE, /* ETSDU_size */ 148 T_INVALID, /* CDATA_size */ 149 T_INVALID, /* DDATA_size */ 150 sizeof (sin_t), /* ADDR_size */ 151 0, /* OPT_size - not initialized here */ 152 TIDUSZ, /* TIDU_size */ 153 T_COTS_ORD, /* SERV_type */ 154 DCCPS_CLOSED, /* CURRENT_state */ 155 (XPG4_1|EXPINLINE) /* PROVIDER_flag */ 156 }; 157 158 struct T_info_ack dccp_g_t_info_ack_v6 = { 159 T_INFO_ACK, /* PRIM_type */ 160 0, /* TSDU_size */ 161 T_INFINITE, /* ETSDU_size */ 162 T_INVALID, /* CDATA_size */ 163 T_INVALID, /* DDATA_size */ 164 sizeof (sin6_t), /* ADDR_size */ 165 0, /* OPT_size - not initialized here */ 166 TIDUSZ, /* TIDU_size */ 167 T_COTS_ORD, /* SERV_type */ 168 DCCPS_CLOSED, /* CURRENT_state */ 169 (XPG4_1|EXPINLINE) /* PROVIDER_flag */ 170 }; 171 172 /* 173 * DCCP Tunables. 174 */ 175 extern mod_prop_info_t dccp_propinfo_tbl[]; 176 extern int dccp_propinfo_count; 177 178 /* 179 * Register DCCP in ip netstack. 180 */ 181 void 182 dccp_ddi_g_init(void) 183 { 184 /* Global timer cache */ 185 dccp_timercache = kmem_cache_create("dccp_timercache", 186 sizeof (dccp_timer_t) + sizeof (mblk_t), 0, 187 NULL, NULL, NULL, NULL, NULL, 0); 188 189 netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini); 190 } 191 192 /* 193 * Unregister DCCP from ip netstack. 194 */ 195 void 196 dccp_ddi_g_destroy(void) 197 { 198 /* Global timer cache */ 199 kmem_cache_destroy(dccp_timercache); 200 201 netstack_unregister(NS_DCCP); 202 } 203 204 #define INET_NAME "ip" 205 206 /* 207 * Initialize this DCCP stack instance. 208 */ 209 static void * 210 dccp_stack_init(netstackid_t stackid, netstack_t *ns) 211 { 212 dccp_stack_t *dccps; 213 major_t major; 214 size_t arrsz; 215 int error; 216 int i; 217 218 dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP); 219 if (dccps == NULL) { 220 return (NULL); 221 } 222 dccps->dccps_netstack = ns; 223 224 /* Ports */ 225 mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 226 dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS; 227 dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 228 dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 229 dccps->dccps_min_anonpriv_port = 512; 230 231 dccps->dccps_bind_fanout_size = dccp_bind_fanout_size; 232 233 /* Bind fanout */ 234 dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size * 235 sizeof (dccp_df_t), KM_SLEEP); 236 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) { 237 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL, 238 MUTEX_DEFAULT, NULL); 239 } 240 241 /* Tunable properties */ 242 arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t); 243 dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP); 244 if (dccps->dccps_propinfo_tbl == NULL) { 245 kmem_free(dccps, sizeof (*dccps)); 246 return (NULL); 247 } 248 bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz); 249 250 /* Allocate per netstack cpu stats */ 251 mutex_enter(&cpu_lock); 252 dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus); 253 mutex_exit(&cpu_lock); 254 255 dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *), 256 KM_SLEEP); 257 for (i = 0; i < dccps->dccps_sc_cnt; i++) { 258 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t), 259 KM_SLEEP); 260 } 261 262 /* Kernel statistics */ 263 //dccps->dccps_kstat = dccp_kstat2_init(stackid); 264 //dccps->dccps_mibkp = dccp_kstat_init(stackid); 265 266 /* Driver major number */ 267 major = mod_name_to_major(INET_NAME); 268 error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident); 269 ASSERT(error == 0); 270 271 return (dccps); 272 } 273 274 /* 275 * Destroy this DCCP netstack instance. 276 */ 277 static void 278 dccp_stack_fini(netstackid_t stackid, void *arg) 279 { 280 dccp_stack_t *dccps = (dccp_stack_t *)arg; 281 int i; 282 283 /* Free cpu stats */ 284 for (i = 0; i < dccps->dccps_sc_cnt; i++) { 285 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t)); 286 } 287 kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *)); 288 289 /* Free tunable properties */ 290 kmem_free(dccps->dccps_propinfo_tbl, 291 dccp_propinfo_count * sizeof (mod_prop_info_t)); 292 dccps->dccps_propinfo_tbl = NULL; 293 294 /* Free bind fanout */ 295 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) { 296 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL); 297 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock); 298 } 299 kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size * 300 sizeof (dccp_df_t)); 301 dccps->dccps_bind_fanout = NULL; 302 303 /* Kernel statistics */ 304 dccp_kstat_fini(stackid, dccps->dccps_mibkp); 305 dccps->dccps_mibkp = NULL; 306 dccp_kstat2_fini(stackid, dccps->dccps_kstat); 307 dccps->dccps_kstat = NULL; 308 309 ldi_ident_release(dccps->dccps_ldi_ident); 310 311 kmem_free(dccps, sizeof (*dccps)); 312 } 313 314 /* /dev/dccp */ 315 static int 316 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 317 { 318 cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n"); 319 320 return (dccp_open(q, devp, flag, sflag, credp, B_FALSE)); 321 } 322 323 /* /dev/dccp6 */ 324 static int 325 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 326 { 327 cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n"); 328 329 return (dccp_open(q, devp, flag, sflag, credp, B_TRUE)); 330 } 331 332 /* 333 * Common open function for v4 and v6 devices. 334 */ 335 static int 336 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 337 boolean_t isv6) 338 { 339 conn_t *connp; 340 dccp_t *dccp; 341 vmem_t *minor_arena; 342 dev_t conn_dev; 343 boolean_t issocket; 344 int error; 345 346 cmn_err(CE_NOTE, "dccp.c: dccp_open"); 347 348 /* If the stream is already open, return immediately */ 349 if (q->q_ptr != NULL) { 350 return (0); 351 } 352 353 if (sflag == MODOPEN) { 354 return (EINVAL); 355 } 356 357 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 358 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 359 minor_arena = ip_minor_arena_la; 360 } else { 361 /* 362 * Either minor numbers in the large arena were exhausted 363 * or a non socket application is doing the open. 364 * Try to allocate from the small arena. 365 */ 366 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 367 return (EBUSY); 368 } 369 minor_arena = ip_minor_arena_sa; 370 } 371 372 ASSERT(minor_arena != NULL); 373 374 *devp = makedevice(getmajor(*devp), (minor_t)conn_dev); 375 376 if (flag & SO_FALLBACK) { 377 /* 378 * Non streams socket needs a stream to fallback to. 379 */ 380 RD(q)->q_ptr = (void *)conn_dev; 381 WR(q)->q_qinfo = &dccp_fallback_sock_winit; 382 WR(q)->q_ptr = (void *)minor_arena; 383 qprocson(q); 384 return (0); 385 } else if (flag & SO_ACCEPTOR) { 386 q->q_qinfo = &dccp_acceptor_rinit; 387 /* 388 * The conn_dev and minor_arena will be subsequently used by 389 * dccp_tli_accept() and dccp_tpi_close_accept() to figure out 390 * the minor device number for this connection from the q_ptr. 391 */ 392 RD(q)->q_ptr = (void *)conn_dev; 393 WR(q)->q_qinfo = &dccp_acceptor_winit; 394 WR(q)->q_ptr = (void *)minor_arena; 395 qprocson(q); 396 return (0); 397 } 398 399 issocket = flag & SO_SOCKSTR; 400 connp = dccp_create_common(credp, isv6, issocket, &error); 401 if (connp == NULL) { 402 inet_minor_free(minor_arena, conn_dev); 403 q->q_ptr = WR(q)->q_ptr = NULL; 404 return (error); 405 } 406 407 connp->conn_rq = q; 408 connp->conn_wq = WR(q); 409 q->q_ptr = WR(q)->q_ptr = connp; 410 411 connp->conn_dev = conn_dev; 412 connp->conn_minor_arena = minor_arena; 413 414 ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6); 415 ASSERT(WR(q)->q_qinfo == &dccp_winit); 416 417 dccp = connp->conn_dccp; 418 419 if (issocket) { 420 WR(q)->q_qinfo = &dccp_sock_winit; 421 } else { 422 #ifdef _ILP32 423 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q); 424 #else 425 dccp->dccp_acceptor_id = conn_dev; 426 #endif /* _ILP32 */ 427 } 428 429 /* 430 * Put the ref for DCCP. Ref for IP was already put 431 * by ipcl_conn_create. Also Make the conn_t globally 432 * visible to walkers. 433 */ 434 mutex_enter(&connp->conn_lock); 435 CONN_INC_REF_LOCKED(connp); 436 ASSERT(connp->conn_ref == 2); 437 connp->conn_state_flags &= ~CONN_INCIPIENT; 438 mutex_exit(&connp->conn_lock); 439 440 qprocson(q); 441 442 return (0); 443 } 444 445 /* 446 * IXA notify 447 */ 448 static void 449 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 450 ixa_notify_arg_t narg) 451 { 452 cmn_err(CE_NOTE, "dccp.c: dccp_notify"); 453 } 454 455 /* 456 * Build the template headers. 457 */ 458 int 459 dccp_build_hdrs(dccp_t *dccp) 460 { 461 dccp_stack_t *dccps = dccp->dccp_dccps; 462 conn_t *connp = dccp->dccp_connp; 463 dccpha_t *dccpha; 464 uint32_t cksum; 465 char buf[DCCP_MAX_HDR_LENGTH]; 466 uint_t buflen; 467 uint_t ulplen = 12; 468 uint_t extralen = 0; 469 int error; 470 471 cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs"); 472 473 buflen = connp->conn_ht_ulp_len; 474 if (buflen != 0) { 475 cmn_err(CE_NOTE, "buflen != 0"); 476 bcopy(connp->conn_ht_ulp, buf, buflen); 477 extralen -= buflen - ulplen; 478 ulplen = buflen; 479 } 480 481 mutex_enter(&connp->conn_lock); 482 error = conn_build_hdr_template(connp, ulplen, extralen, 483 &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo); 484 mutex_exit(&connp->conn_lock); 485 if (error != 0) { 486 cmn_err(CE_NOTE, "conn_build_hdr_template failed"); 487 return (error); 488 } 489 490 dccpha = (dccpha_t *)connp->conn_ht_ulp; 491 dccp->dccp_dccpha = dccpha; 492 493 if (buflen != 0) { 494 bcopy(buf, connp->conn_ht_ulp, buflen); 495 } else { 496 dccpha->dha_sum = 0; 497 dccpha->dha_lport = connp->conn_lport; 498 dccpha->dha_fport = connp->conn_fport; 499 } 500 501 cksum = sizeof (dccpha_t) + connp->conn_sum; 502 cksum = (cksum >> 16) + (cksum & 0xFFFF); 503 dccpha->dha_sum = htons(cksum); 504 dccpha->dha_offset = 7; 505 dccpha->dha_x = 1; 506 507 if (connp->conn_ipversion == IPV4_VERSION) { 508 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc; 509 } else { 510 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc; 511 } 512 513 /* XXX */ 514 515 return (0); 516 } 517 518 /* 519 * DCCP write service routine. 520 */ 521 static void 522 dccp_wsrv(queue_t *q) 523 { 524 dccp_stack_t *dccps = Q_TO_DCCP(q)->dccp_dccps; 525 526 DCCP_STAT(dccps, dccp_wsrv_called); 527 } 528 529 /* 530 * Common create function for streams and sockets. 531 */ 532 conn_t * 533 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket, 534 int *errorp) 535 { 536 conn_t *connp; 537 dccp_t *dccp; 538 dccp_stack_t *dccps; 539 netstack_t *ns; 540 squeue_t *sqp; 541 zoneid_t zoneid; 542 int error; 543 544 cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n"); 545 546 ASSERT(errorp != NULL); 547 548 error = secpolicy_basic_net_access(credp); 549 if (error != 0) { 550 *errorp = error; 551 return (NULL); 552 } 553 554 /* 555 * Find the right netstack. 556 */ 557 ns = netstack_find_by_cred(credp); 558 ASSERT(ns != NULL); 559 dccps = ns->netstack_dccp; 560 ASSERT(dccps != NULL); 561 562 /* 563 * For exclusive stacks we set the zoneid to zero 564 * to make TCP operate as if in the global zone. 565 */ 566 if (ns->netstack_stackid != GLOBAL_NETSTACKID) { 567 zoneid = GLOBAL_ZONEID; 568 } else { 569 zoneid = crgetzoneid(credp); 570 } 571 572 sqp = IP_SQUEUE_GET((uint_t)gethrtime()); 573 connp = (conn_t *)dccp_get_conn(sqp, dccps); 574 netstack_rele(dccps->dccps_netstack); 575 if (connp == NULL) { 576 *errorp = ENOSR; 577 return (NULL); 578 } 579 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 580 581 connp->conn_sqp = sqp; 582 connp->conn_initial_sqp = connp->conn_sqp; 583 connp->conn_ixa->ixa_sqp = connp->conn_sqp; 584 dccp = connp->conn_dccp; 585 586 /* Setting flags for ip output */ 587 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 588 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO; 589 590 ASSERT(connp->conn_proto == IPPROTO_DCCP); 591 ASSERT(connp->conn_dccp == dccp); 592 ASSERT(dccp->dccp_connp == connp); 593 594 if (isv6) { 595 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT; 596 connp->conn_ipversion = IPV6_VERSION; 597 connp->conn_family = AF_INET6; 598 /* XXX mms, ttl */ 599 } else { 600 connp->conn_ipversion = IPV4_VERSION; 601 connp->conn_family = AF_INET; 602 /* XXX mms, ttl */ 603 } 604 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 605 606 crhold(credp); 607 connp->conn_cred = credp; 608 connp->conn_cpid = curproc->p_pid; 609 connp->conn_open_time = ddi_get_lbolt64(); 610 611 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 612 connp->conn_ixa->ixa_cred = credp; 613 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 614 615 connp->conn_zoneid = zoneid; 616 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 617 connp->conn_ixa->ixa_zoneid = zoneid; 618 connp->conn_mlp_type = mlptSingle; 619 620 dccp->dccp_dccps = dccps; 621 dccp->dccp_state = DCCPS_CLOSED; 622 623 ASSERT(connp->conn_netstack == dccps->dccps_netstack); 624 ASSERT(dccp->dccp_dccps == dccps); 625 626 /* 627 * If the caller has the process-wide flag set, then default to MAC 628 * exempt mode. This allows read-down to unlabeled hosts. 629 */ 630 if (getpflags(NET_MAC_AWARE, credp) != 0) { 631 connp->conn_mac_mode = CONN_MAC_AWARE; 632 } 633 634 if (issocket) { 635 dccp->dccp_issocket = 1; 636 } 637 638 /* XXX rcvbuf, sndbuf etc */ 639 640 connp->conn_so_type = SOCK_STREAM; 641 642 SOCK_CONNID_INIT(dccp->dccp_connid); 643 dccp_init_values(dccp, NULL); 644 645 return (connp); 646 } 647 648 /* 649 * Common close function for streams and sockets. 650 */ 651 void 652 dccp_close_common(conn_t *connp, int flags) 653 { 654 dccp_t *dccp = connp->conn_dccp; 655 mblk_t *mp; 656 boolean_t conn_ioctl_cleanup_reqd = B_FALSE; 657 658 cmn_err(CE_NOTE, "dccp.c: dccp_close_common"); 659 660 ASSERT(connp->conn_ref >= 2); 661 662 /* 663 * Mark the conn as closing. ipsq_pending_mp_add will not 664 * add any mp to the pending mp list, after this conn has 665 * started closing. 666 */ 667 mutex_enter(&connp->conn_lock); 668 connp->conn_state_flags |= CONN_CLOSING; 669 670 if (connp->conn_oper_pending_ill != NULL) { 671 conn_ioctl_cleanup_reqd = B_TRUE; 672 } 673 674 CONN_INC_REF_LOCKED(connp); 675 mutex_exit(&connp->conn_lock); 676 677 ASSERT(connp->conn_ref >= 3); 678 679 /* 680 * Cleanup any queued ioctls here. This must be done before the wq/rq 681 * are re-written by dccp_close_output(). 682 */ 683 if (conn_ioctl_cleanup_reqd) { 684 conn_ioctl_cleanup(connp); 685 } 686 687 mutex_enter(&connp->conn_lock); 688 while (connp->conn_ioctlref > 0) { 689 cv_wait(&connp->conn_cv, &connp->conn_lock); 690 } 691 ASSERT(connp->conn_ioctlref == 0); 692 ASSERT(connp->conn_oper_pending_ill == NULL); 693 mutex_exit(&connp->conn_lock); 694 695 /* generate close */ 696 /* 697 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp, 698 NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE); 699 700 */ 701 702 nowait: 703 connp->conn_cpid = NOPID; 704 } 705 706 /* 707 * Common bind function. 708 */ 709 int 710 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 711 boolean_t bind_to_req_port_only) 712 { 713 dccp_t *dccp = connp->conn_dccp; 714 int error; 715 716 cmn_err(CE_NOTE, "dccp.c: dccp_do_bind"); 717 718 if (dccp->dccp_state >= DCCPS_BOUND) { 719 if (connp->conn_debug) { 720 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 721 "dccp_bind: bad state, %d", dccp->dccp_state); 722 } 723 return (-TOUTSTATE); 724 } 725 726 error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only); 727 if (error != 0) { 728 return (error); 729 } 730 731 ASSERT(dccp->dccp_state == DCCPS_LISTEN); 732 /* XXX dccp_conn_req_max = 0 */ 733 734 return (0); 735 } 736 737 /* 738 * Common unbind function. 739 */ 740 int 741 dccp_do_unbind(conn_t *connp) 742 { 743 dccp_t *dccp = connp->conn_dccp; 744 int32_t oldstate; 745 746 cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind"); 747 748 switch (dccp->dccp_state) { 749 case DCCPS_OPEN: 750 case DCCPS_LISTEN: 751 break; 752 default: 753 return (-TOUTSTATE); 754 } 755 756 connp->conn_laddr_v6 = ipv6_all_zeros; 757 connp->conn_saddr_v6 = ipv6_all_zeros; 758 759 dccp_bind_hash_remove(dccp); 760 761 oldstate = dccp->dccp_state; 762 dccp->dccp_state = DCCPS_CLOSED; 763 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *, 764 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL, 765 int32_t, oldstate); 766 767 ip_unbind(connp); 768 bzero(&connp->conn_ports, sizeof (connp->conn_ports)); 769 770 return (0); 771 } 772 773 /* 774 * Common listen function. 775 */ 776 int 777 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len, 778 int backlog, cred_t *cr, boolean_t bind_to_req_port_only) 779 { 780 dccp_t *dccp = connp->conn_dccp; 781 dccp_stack_t *dccps = dccp->dccp_dccps; 782 int32_t oldstate; 783 int error; 784 785 cmn_err(CE_NOTE, "dccp.c: dccp_do_listen"); 786 787 /* All Solaris components should pass a cred for this operation */ 788 ASSERT(cr != NULL); 789 790 if (dccp->dccp_state >= DCCPS_BOUND) { 791 792 if ((dccp->dccp_state == DCCPS_BOUND || 793 dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) { 794 goto do_listen; 795 } 796 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state"); 797 798 if (connp->conn_debug) { 799 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 800 "dccp_listen: bad state, %d", dccp->dccp_state); 801 } 802 return (-TOUTSTATE); 803 } else { 804 if (sa == NULL) { 805 sin6_t addr; 806 sin6_t *sin6; 807 sin_t *sin; 808 809 ASSERT(IPCL_IS_NONSTR(connp)); 810 811 if (connp->conn_family == AF_INET) { 812 len = sizeof (sin_t); 813 sin = (sin_t *)&addr; 814 *sin = sin_null; 815 sin->sin_family = AF_INET; 816 } else { 817 ASSERT(connp->conn_family == AF_INET6); 818 819 len = sizeof (sin6_t); 820 sin6 = (sin6_t *)&addr; 821 *sin6 = sin6_null; 822 sin6->sin6_family = AF_INET6; 823 } 824 825 sa = (struct sockaddr *)&addr; 826 } 827 828 error = dccp_bind_check(connp, sa, len, cr, 829 bind_to_req_port_only); 830 if (error != 0) { 831 cmn_err(CE_NOTE, "dccp_bind_check failed"); 832 return (error); 833 } 834 /* Fall through and do the fanout insertion */ 835 } 836 837 do_listen: 838 ASSERT(dccp->dccp_state == DCCPS_BOUND || 839 dccp->dccp_state == DCCPS_LISTEN); 840 841 /* XXX backlog */ 842 843 connp->conn_recv = dccp_input_listener_unbound; 844 845 /* Insert into the classifier table */ 846 error = ip_laddr_fanout_insert(connp); 847 if (error != 0) { 848 /* Error - undo the bind */ 849 oldstate = dccp->dccp_state; 850 dccp->dccp_state = DCCPS_CLOSED; 851 852 connp->conn_bound_addr_v6 = ipv6_all_zeros; 853 854 connp->conn_laddr_v6 = ipv6_all_zeros; 855 connp->conn_saddr_v6 = ipv6_all_zeros; 856 connp->conn_ports = 0; 857 858 if (connp->conn_anon_port) { 859 zone_t *zone; 860 861 zone = crgetzone(cr); 862 connp->conn_anon_port = B_FALSE; 863 (void) tsol_mlp_anon(zone, connp->conn_mlp_type, 864 connp->conn_proto, connp->conn_lport, B_FALSE); 865 } 866 connp->conn_mlp_type = mlptSingle; 867 868 /* XXX dccp_bind_hash_remove */ 869 870 return (error); 871 } else { 872 /* XXX connection limits */ 873 } 874 875 return (error); 876 } 877 878 /* 879 * Common connect function. 880 */ 881 int 882 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 883 cred_t *cr, pid_t pid) 884 { 885 dccp_t *dccp = connp->conn_dccp; 886 dccp_stack_t *dccps = dccp->dccp_dccps; 887 ip_xmit_attr_t *ixa = connp->conn_ixa; 888 mblk_t *req_mp; 889 sin_t *sin = (sin_t *)sa; 890 sin6_t *sin6 = (sin6_t *)sa; 891 ipaddr_t *dstaddrp; 892 in_port_t dstport; 893 uint_t srcid; 894 int32_t oldstate; 895 int error; 896 897 cmn_err(CE_NOTE, "dccp.c: dccp_do_connect"); 898 899 oldstate = dccp->dccp_state; 900 901 switch (len) { 902 case sizeof (sin_t): 903 sin = (sin_t *)sa; 904 if (sin->sin_port == 0) { 905 return (-TBADADDR); 906 } 907 if (connp->conn_ipv6_v6only) { 908 return (EAFNOSUPPORT); 909 } 910 break; 911 912 case sizeof (sin6_t): 913 sin6 = (sin6_t *)sa; 914 if (sin6->sin6_port == 0) { 915 return (-TBADADDR); 916 } 917 break; 918 919 default: 920 return (EINVAL); 921 } 922 923 if (connp->conn_family == AF_INET6 && 924 connp->conn_ipversion == IPV6_VERSION && 925 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 926 if (connp->conn_ipv6_v6only) { 927 return (EADDRNOTAVAIL); 928 } 929 930 connp->conn_ipversion = IPV4_VERSION; 931 } 932 933 switch (dccp->dccp_state) { 934 case DCCPS_LISTEN: 935 /* 936 * Listening sockets are not allowed to issue connect(). 937 */ 938 if (IPCL_IS_NONSTR(connp)) { 939 return (EOPNOTSUPP); 940 } 941 942 case DCCPS_CLOSED: 943 /* 944 * We support quick connect. 945 */ 946 /* FALLTHRU */ 947 case DCCPS_OPEN: 948 break; 949 950 default: 951 return (-TOUTSTATE); 952 } 953 954 /* 955 * We update our cred/cpid based on the caller of connect. 956 */ 957 if (connp->conn_cred != cr) { 958 crhold(cr); 959 crfree(connp->conn_cred); 960 connp->conn_cred = cr; 961 } 962 connp->conn_cpid = pid; 963 964 /* Cache things in the ixa without any refhold */ 965 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 966 ixa->ixa_cred = cr; 967 ixa->ixa_cpid = pid; 968 969 if (is_system_labeled()) { 970 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 971 } 972 973 if (connp->conn_family == AF_INET6) { 974 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 975 error = dccp_connect_ipv6(dccp, &sin6->sin6_addr, 976 sin6->sin6_port, sin6->sin6_flowinfo, 977 sin6->__sin6_src_id, sin6->sin6_scope_id); 978 } else { 979 /* 980 * Destination adress is mapped IPv6 address. 981 * Source bound address should be unspecified or 982 * IPv6 mapped address as well. 983 */ 984 if (!IN6_IS_ADDR_UNSPECIFIED( 985 &connp->conn_bound_addr_v6) && 986 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 987 return (EADDRNOTAVAIL); 988 } 989 990 dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr)); 991 dstport = sin6->sin6_port; 992 srcid = sin6->__sin6_src_id; 993 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, 994 srcid); 995 } 996 } else { 997 dstaddrp = &sin->sin_addr.s_addr; 998 dstport = sin->sin_port; 999 srcid = 0; 1000 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid); 1001 } 1002 1003 if (error != 0) { 1004 cmn_err(CE_NOTE, "dccp_connect_ip failed"); 1005 goto connect_failed; 1006 } 1007 1008 /* XXX cluster */ 1009 1010 /* Connect succeeded */ 1011 DCCPS_BUMP_MIB(dccps, dccpActiveOpens); 1012 dccp->dccp_active_open = 1; 1013 1014 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *, 1015 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL, 1016 int32_t, DCCPS_BOUND); 1017 1018 DCCP_TIMER_RESTART(dccp, 100); 1019 req_mp = dccp_generate_request(connp); 1020 if (req_mp != NULL) { 1021 /* 1022 * We must bump the generation before sending the request 1023 * to ensure that we use the right generation in case 1024 * this thread issues a "connected" up call. 1025 */ 1026 SOCK_CONNID_BUMP(dccp->dccp_connid); 1027 1028 DTRACE_DCCP5(connect__request, mblk_t *, NULL, 1029 ip_xmit_attr_t *, connp->conn_ixa, 1030 void_ip_t *, req_mp->b_rptr, dccp_t *, dccp, 1031 dccpha_t *, 1032 &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]); 1033 1034 dccp_send_data(dccp, req_mp); 1035 } 1036 1037 return (0); 1038 1039 connect_failed: 1040 cmn_err(CE_NOTE, "dccp_do_connect failed"); 1041 1042 connp->conn_faddr_v6 = ipv6_all_zeros; 1043 connp->conn_fport = 0; 1044 dccp->dccp_state = oldstate; 1045 1046 /* XXX */ 1047 return (error); 1048 } 1049 1050 /* 1051 * Init values of a connection. 1052 */ 1053 void 1054 dccp_init_values(dccp_t *dccp, dccp_t *parent) 1055 { 1056 conn_t *connp = dccp->dccp_connp; 1057 dccp_stack_t *dccps = dccp->dccp_dccps; 1058 1059 connp->conn_mlp_type = mlptSingle; 1060 } 1061 1062 /* 1063 * Free dccp structure. 1064 */ 1065 void 1066 dccp_free(dccp_t *dccp) 1067 { 1068 conn_t *connp = dccp->dccp_connp; 1069 1070 cmn_err(CE_NOTE, "dccp.c: dccp_free"); 1071 1072 connp->conn_rq = NULL; 1073 connp->conn_wq = NULL; 1074 1075 if (connp->conn_upper_handle != NULL) { 1076 if (IPCL_IS_NONSTR(connp)) { 1077 (*connp->conn_upcalls->su_closed)( 1078 connp->conn_upper_handle); 1079 dccp->dccp_detached = B_TRUE; 1080 } 1081 1082 connp->conn_upper_handle = NULL; 1083 connp->conn_upcalls = NULL; 1084 } 1085 } 1086 1087 void * 1088 dccp_get_conn(void *arg, dccp_stack_t *dccps) 1089 { 1090 dccp_t *dccp = NULL; 1091 conn_t *connp; 1092 squeue_t *sqp = (squeue_t *)arg; 1093 netstack_t *ns; 1094 1095 /* XXX timewait */ 1096 1097 connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP, 1098 dccps->dccps_netstack); 1099 if (connp == NULL) { 1100 return (NULL); 1101 } 1102 1103 dccp = connp->conn_dccp; 1104 dccp->dccp_dccps = dccps; 1105 1106 /* List of features being negotated */ 1107 list_create(&dccp->dccp_features, sizeof (dccp_feature_t), 1108 offsetof(dccp_feature_t, df_next)); 1109 1110 connp->conn_recv = dccp_input_data; 1111 connp->conn_recvicmp = dccp_icmp_input; 1112 connp->conn_verifyicmp = dccp_verifyicmp; 1113 1114 connp->conn_ixa->ixa_notify = dccp_notify; 1115 connp->conn_ixa->ixa_notify_cookie = dccp; 1116 1117 return ((void *)connp); 1118 } 1119 1120 /* 1121 * IPv4 connect. 1122 */ 1123 static int 1124 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport, 1125 uint_t srcid) 1126 { 1127 conn_t *connp = dccp->dccp_connp; 1128 dccp_stack_t *dccps = dccp->dccp_dccps; 1129 ipaddr_t dstaddr = *dstaddrp; 1130 uint16_t lport; 1131 int error; 1132 1133 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4"); 1134 1135 ASSERT(connp->conn_ipversion == IPV4_VERSION); 1136 1137 if (dstaddr == INADDR_ANY) { 1138 dstaddr = htonl(INADDR_LOOPBACK); 1139 *dstaddrp = dstaddr; 1140 } 1141 1142 /* Handle __sin6_src_id if socket not bound to an IP address */ 1143 if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) { 1144 ip_srcid_find_id(srcid, &connp->conn_laddr_v6, 1145 IPCL_ZONEID(connp), dccps->dccps_netstack); 1146 connp->conn_saddr_v6 = connp->conn_laddr_v6; 1147 } 1148 1149 IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6); 1150 connp->conn_fport = dstport; 1151 1152 if (dccp->dccp_state == DCCPS_CLOSED) { 1153 lport = dccp_update_next_port(dccps->dccps_next_port_to_try, 1154 dccp, B_TRUE); 1155 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0, 1156 B_TRUE, B_FALSE, B_FALSE); 1157 if (lport == 0) { 1158 return (-TNOADDR); 1159 } 1160 } 1161 1162 error = dccp_set_destination(dccp); 1163 if (error != 0) { 1164 return (error); 1165 } 1166 1167 /* 1168 * Don't connect to oneself. 1169 */ 1170 if (connp->conn_faddr_v4 == connp->conn_laddr_v4 && 1171 connp->conn_fport == connp->conn_lport) { 1172 return (-TBADADDR); 1173 } 1174 1175 dccp->dccp_state = DCCPS_REQUEST; 1176 1177 return (ipcl_conn_insert_v4(connp)); 1178 } 1179 1180 /* 1181 * IPv6 connect. 1182 */ 1183 static int 1184 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport, 1185 uint32_t flowinfo, uint_t srcid, uint32_t scope_id) 1186 { 1187 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6"); 1188 1189 return (0); 1190 } 1191 1192 /* 1193 * Set the ports via conn_connect and build the template 1194 * header. 1195 */ 1196 int 1197 dccp_set_destination(dccp_t *dccp) 1198 { 1199 conn_t *connp = dccp->dccp_connp; 1200 dccp_stack_t *dccps = dccp->dccp_dccps; 1201 iulp_t uinfo; 1202 uint32_t flags; 1203 int error; 1204 1205 flags = IPDF_LSO | IPDF_ZCOPY; 1206 flags |= IPDF_UNIQUE_DCE; 1207 1208 mutex_enter(&connp->conn_lock); 1209 error = conn_connect(connp, &uinfo, flags); 1210 mutex_exit(&connp->conn_lock); 1211 if (error != 0) { 1212 cmn_err(CE_NOTE, "conn_connect failed"); 1213 return (error); 1214 } 1215 1216 error = dccp_build_hdrs(dccp); 1217 if (error != 0) { 1218 cmn_err(CE_NOTE, "dccp_build_hdrs failed"); 1219 return (error); 1220 } 1221 1222 /* XXX */ 1223 1224 /* Initialise the ISS */ 1225 dccp_iss_init(dccp); 1226 1227 mutex_enter(&connp->conn_lock); 1228 connp->conn_state_flags &= ~CONN_INCIPIENT; 1229 mutex_exit(&connp->conn_lock); 1230 1231 return (0); 1232 } 1233 1234 /* 1235 * Init the ISS. 1236 */ 1237 static void 1238 dccp_iss_init(dccp_t *dccp) 1239 { 1240 cmn_err(CE_NOTE, "dccp.c: dccp_iss_init"); 1241 1242 dccp->dccp_iss += gethrtime(); 1243 dccp->dccp_gss = dccp->dccp_iss; 1244 }