1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2012 David Hoeppner. All rights reserved. 29 */ 30 31 /* 32 * This file implements the Data Congestion Control Protocol (DCCP). 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stream.h> 37 #include <sys/stropts.h> 38 #include <sys/strlog.h> 39 #include <sys/strsun.h> 40 #define _SUN_TPI_VERSION 2 41 #include <sys/tihdr.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/sockio.h> 45 #include <sys/priv.h> 46 #include <sys/vtrace.h> 47 #include <sys/sdt.h> 48 #include <sys/debug.h> 49 #include <sys/ddi.h> 50 #include <sys/isa_defs.h> 51 #include <sys/policy.h> 52 #include <sys/tsol/label.h> 53 #include <sys/tsol/tnet.h> 54 #include <inet/dccp_impl.h> 55 #include <inet/dccp_stack.h> 56 #include <inet/kstatcom.h> 57 #include <inet/snmpcom.h> 58 59 #include <sys/cmn_err.h> 60 61 int dccp_squeue_flag; 62 63 /* Setable in /etc/system */ 64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE; 65 66 static void dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 67 ixa_notify_arg_t); 68 69 /* Functions to register netstack */ 70 static void *dccp_stack_init(netstackid_t, netstack_t *); 71 static void dccp_stack_fini(netstackid_t, void *); 72 73 /* Stream device open functions */ 74 static int dccp_openv4(queue_t *, dev_t *, int, int, cred_t *); 75 static int dccp_openv6(queue_t *, dev_t *, int, int, cred_t *); 76 static int dccp_open(queue_t *, dev_t *, int, int, cred_t *, 77 boolean_t); 78 79 /* Write service routine */ 80 static void dccp_wsrv(queue_t *); 81 82 /* Connection related functions */ 83 static int dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t); 84 static int dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t, 85 uint_t, uint32_t); 86 87 /* Initialise ISS */ 88 static void dccp_iss_init(dccp_t *); 89 90 struct module_info dccp_rinfo = { 91 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER, 92 DCCP_RECV_LOWATER 93 }; 94 95 static struct module_info dccp_winfo = { 96 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16 97 }; 98 99 /* 100 * Queue information structure with DCCP entry points. 101 */ 102 struct qinit dccp_rinitv4 = { 103 NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo 104 }; 105 106 struct qinit dccp_rinitv6 = { 107 NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo 108 }; 109 110 struct qinit dccp_winit = { 111 (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo 112 }; 113 114 /* Initial entry point for TCP in socket mode */ 115 struct qinit dccp_sock_winit = { 116 (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo 117 }; 118 119 struct qinit dccp_fallback_sock_winit = { 120 (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo 121 }; 122 /* 123 * DCCP as acceptor STREAM. 124 */ 125 struct qinit dccp_acceptor_rinit = { 126 NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo 127 }; 128 129 struct qinit dccp_acceptor_winit = { 130 (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo 131 }; 132 133 /* AF_INET /dev/dccp */ 134 struct streamtab dccpinfov4 = { 135 &dccp_rinitv4, &dccp_winit 136 }; 137 138 /* AF_INET6 /dev/dccp6 */ 139 struct streamtab dccpinfov6 = { 140 &dccp_rinitv6, &dccp_winit 141 }; 142 143 /* Template for response to info request */ 144 struct T_info_ack dccp_g_t_info_ack = { 145 T_INFO_ACK, /* PRIM_type */ 146 0, /* TSDU_size */ 147 T_INFINITE, /* ETSDU_size */ 148 T_INVALID, /* CDATA_size */ 149 T_INVALID, /* DDATA_size */ 150 sizeof (sin_t), /* ADDR_size */ 151 0, /* OPT_size - not initialized here */ 152 TIDUSZ, /* TIDU_size */ 153 T_COTS_ORD, /* SERV_type */ 154 DCCPS_CLOSED, /* CURRENT_state */ 155 (XPG4_1|EXPINLINE) /* PROVIDER_flag */ 156 }; 157 158 struct T_info_ack dccp_g_t_info_ack_v6 = { 159 T_INFO_ACK, /* PRIM_type */ 160 0, /* TSDU_size */ 161 T_INFINITE, /* ETSDU_size */ 162 T_INVALID, /* CDATA_size */ 163 T_INVALID, /* DDATA_size */ 164 sizeof (sin6_t), /* ADDR_size */ 165 0, /* OPT_size - not initialized here */ 166 TIDUSZ, /* TIDU_size */ 167 T_COTS_ORD, /* SERV_type */ 168 DCCPS_CLOSED, /* CURRENT_state */ 169 (XPG4_1|EXPINLINE) /* PROVIDER_flag */ 170 }; 171 172 /* 173 * DCCP Tunables. 174 */ 175 extern mod_prop_info_t dccp_propinfo_tbl[]; 176 extern int dccp_propinfo_count; 177 178 /* 179 * Register DCCP in ip netstack. 180 */ 181 void 182 dccp_ddi_g_init(void) 183 { 184 /* Global timer cache */ 185 dccp_timercache = kmem_cache_create("dccp_timercache", 186 sizeof (dccp_timer_t) + sizeof (mblk_t), 0, 187 NULL, NULL, NULL, NULL, NULL, 0); 188 189 netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini); 190 } 191 192 /* 193 * Unregister DCCP from ip netstack. 194 */ 195 void 196 dccp_ddi_g_destroy(void) 197 { 198 /* Global timer cache */ 199 kmem_cache_destroy(dccp_timercache); 200 201 netstack_unregister(NS_DCCP); 202 } 203 204 #define INET_NAME "ip" 205 206 /* 207 * Initialize this DCCP stack instance. 208 */ 209 static void * 210 dccp_stack_init(netstackid_t stackid, netstack_t *ns) 211 { 212 dccp_stack_t *dccps; 213 major_t major; 214 size_t arrsz; 215 int error; 216 int i; 217 218 dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP); 219 if (dccps == NULL) { 220 return (NULL); 221 } 222 dccps->dccps_netstack = ns; 223 224 /* Ports */ 225 mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 226 dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS; 227 dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 228 dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 229 dccps->dccps_min_anonpriv_port = 512; 230 231 dccps->dccps_bind_fanout_size = dccp_bind_fanout_size; 232 233 /* Bind fanout */ 234 dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size * 235 sizeof (dccp_df_t), KM_SLEEP); 236 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) { 237 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL, 238 MUTEX_DEFAULT, NULL); 239 } 240 241 /* Tunable properties */ 242 arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t); 243 dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP); 244 if (dccps->dccps_propinfo_tbl == NULL) { 245 kmem_free(dccps, sizeof (*dccps)); 246 return (NULL); 247 } 248 bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz); 249 250 /* Allocate per netstack cpu stats */ 251 mutex_enter(&cpu_lock); 252 dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus); 253 mutex_exit(&cpu_lock); 254 255 dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *), 256 KM_SLEEP); 257 for (i = 0; i < dccps->dccps_sc_cnt; i++) { 258 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t), 259 KM_SLEEP); 260 } 261 262 /* Kernel statistics */ 263 dccps->dccps_kstat = dccp_kstat2_init(stackid); 264 dccps->dccps_mibkp = dccp_kstat_init(stackid); 265 266 /* Driver major number */ 267 major = mod_name_to_major(INET_NAME); 268 error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident); 269 ASSERT(error == 0); 270 271 return (dccps); 272 } 273 274 /* 275 * Destroy this DCCP netstack instance. 276 */ 277 static void 278 dccp_stack_fini(netstackid_t stackid, void *arg) 279 { 280 dccp_stack_t *dccps = (dccp_stack_t *)arg; 281 int i; 282 283 /* Free cpu stats */ 284 for (i = 0; i < dccps->dccps_sc_cnt; i++) { 285 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t)); 286 } 287 kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *)); 288 289 /* Free tunable properties */ 290 kmem_free(dccps->dccps_propinfo_tbl, 291 dccp_propinfo_count * sizeof (mod_prop_info_t)); 292 dccps->dccps_propinfo_tbl = NULL; 293 294 /* Free bind fanout */ 295 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) { 296 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL); 297 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock); 298 } 299 kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size * 300 sizeof (dccp_df_t)); 301 dccps->dccps_bind_fanout = NULL; 302 303 /* Kernel statistics */ 304 dccp_kstat_fini(stackid, dccps->dccps_mibkp); 305 dccps->dccps_mibkp = NULL; 306 307 ldi_ident_release(dccps->dccps_ldi_ident); 308 309 kmem_free(dccps, sizeof (*dccps)); 310 } 311 312 /* /dev/dccp */ 313 static int 314 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 315 { 316 cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n"); 317 318 return (dccp_open(q, devp, flag, sflag, credp, B_FALSE)); 319 } 320 321 /* /dev/dccp6 */ 322 static int 323 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 324 { 325 cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n"); 326 327 return (dccp_open(q, devp, flag, sflag, credp, B_TRUE)); 328 } 329 330 /* 331 * Common open function for v4 and v6 devices. 332 */ 333 static int 334 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 335 boolean_t isv6) 336 { 337 conn_t *connp; 338 dccp_t *dccp; 339 vmem_t *minor_arena; 340 dev_t conn_dev; 341 boolean_t issocket; 342 int error; 343 344 cmn_err(CE_NOTE, "dccp.c: dccp_open"); 345 346 /* If the stream is already open, return immediately */ 347 if (q->q_ptr != NULL) { 348 return (0); 349 } 350 351 if (sflag == MODOPEN) { 352 return (EINVAL); 353 } 354 355 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 356 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 357 minor_arena = ip_minor_arena_la; 358 } else { 359 /* 360 * Either minor numbers in the large arena were exhausted 361 * or a non socket application is doing the open. 362 * Try to allocate from the small arena. 363 */ 364 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 365 return (EBUSY); 366 } 367 minor_arena = ip_minor_arena_sa; 368 } 369 370 ASSERT(minor_arena != NULL); 371 372 *devp = makedevice(getmajor(*devp), (minor_t)conn_dev); 373 374 if (flag & SO_FALLBACK) { 375 /* 376 * Non streams socket needs a stream to fallback to. 377 */ 378 RD(q)->q_ptr = (void *)conn_dev; 379 WR(q)->q_qinfo = &dccp_fallback_sock_winit; 380 WR(q)->q_ptr = (void *)minor_arena; 381 qprocson(q); 382 return (0); 383 } else if (flag & SO_ACCEPTOR) { 384 q->q_qinfo = &dccp_acceptor_rinit; 385 /* 386 * The conn_dev and minor_arena will be subsequently used by 387 * dccp_tli_accept() and dccp_tpi_close_accept() to figure out 388 * the minor device number for this connection from the q_ptr. 389 */ 390 RD(q)->q_ptr = (void *)conn_dev; 391 WR(q)->q_qinfo = &dccp_acceptor_winit; 392 WR(q)->q_ptr = (void *)minor_arena; 393 qprocson(q); 394 return (0); 395 } 396 397 issocket = flag & SO_SOCKSTR; 398 connp = dccp_create_common(credp, isv6, issocket, &error); 399 if (connp == NULL) { 400 inet_minor_free(minor_arena, conn_dev); 401 q->q_ptr = WR(q)->q_ptr = NULL; 402 return (error); 403 } 404 405 connp->conn_rq = q; 406 connp->conn_wq = WR(q); 407 q->q_ptr = WR(q)->q_ptr = connp; 408 409 connp->conn_dev = conn_dev; 410 connp->conn_minor_arena = minor_arena; 411 412 ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6); 413 ASSERT(WR(q)->q_qinfo == &dccp_winit); 414 415 dccp = connp->conn_dccp; 416 417 if (issocket) { 418 WR(q)->q_qinfo = &dccp_sock_winit; 419 } else { 420 #ifdef _ILP32 421 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q); 422 #else 423 dccp->dccp_acceptor_id = conn_dev; 424 #endif /* _ILP32 */ 425 } 426 427 /* 428 * Put the ref for DCCP. Ref for IP was already put 429 * by ipcl_conn_create. Also Make the conn_t globally 430 * visible to walkers. 431 */ 432 mutex_enter(&connp->conn_lock); 433 CONN_INC_REF_LOCKED(connp); 434 ASSERT(connp->conn_ref == 2); 435 connp->conn_state_flags &= ~CONN_INCIPIENT; 436 mutex_exit(&connp->conn_lock); 437 438 qprocson(q); 439 440 return (0); 441 } 442 443 /* 444 * IXA notify 445 */ 446 static void 447 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 448 ixa_notify_arg_t narg) 449 { 450 cmn_err(CE_NOTE, "dccp.c: dccp_notify"); 451 } 452 453 /* 454 * Build the template headers. 455 */ 456 int 457 dccp_build_hdrs(dccp_t *dccp) 458 { 459 dccp_stack_t *dccps = dccp->dccp_dccps; 460 conn_t *connp = dccp->dccp_connp; 461 dccpha_t *dccpha; 462 uint32_t cksum; 463 char buf[DCCP_MAX_HDR_LENGTH]; 464 uint_t buflen; 465 uint_t ulplen = 12; 466 uint_t extralen = 0; 467 int error; 468 469 cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs"); 470 471 buflen = connp->conn_ht_ulp_len; 472 if (buflen != 0) { 473 cmn_err(CE_NOTE, "buflen != 0"); 474 bcopy(connp->conn_ht_ulp, buf, buflen); 475 extralen -= buflen - ulplen; 476 ulplen = buflen; 477 } 478 479 mutex_enter(&connp->conn_lock); 480 error = conn_build_hdr_template(connp, ulplen, extralen, 481 &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo); 482 mutex_exit(&connp->conn_lock); 483 if (error != 0) { 484 cmn_err(CE_NOTE, "conn_build_hdr_template failed"); 485 return (error); 486 } 487 488 dccpha = (dccpha_t *)connp->conn_ht_ulp; 489 dccp->dccp_dccpha = dccpha; 490 491 if (buflen != 0) { 492 bcopy(buf, connp->conn_ht_ulp, buflen); 493 } else { 494 dccpha->dha_sum = 0; 495 dccpha->dha_lport = connp->conn_lport; 496 dccpha->dha_fport = connp->conn_fport; 497 } 498 499 cksum = sizeof (dccpha_t) + connp->conn_sum; 500 cksum = (cksum >> 16) + (cksum & 0xFFFF); 501 dccpha->dha_sum = htons(cksum); 502 dccpha->dha_offset = 7; 503 dccpha->dha_x = 1; 504 505 if (connp->conn_ipversion == IPV4_VERSION) { 506 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc; 507 } else { 508 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc; 509 } 510 511 /* XXX */ 512 513 return (0); 514 } 515 516 /* 517 * DCCP write service routine. 518 */ 519 static void 520 dccp_wsrv(queue_t *q) 521 { 522 dccp_stack_t *dccps = Q_TO_DCCP(q)->dccp_dccps; 523 524 DCCP_STAT(dccps, dccp_wsrv_called); 525 } 526 527 /* 528 * Common create function for streams and sockets. 529 */ 530 conn_t * 531 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket, 532 int *errorp) 533 { 534 conn_t *connp; 535 dccp_t *dccp; 536 dccp_stack_t *dccps; 537 netstack_t *ns; 538 squeue_t *sqp; 539 zoneid_t zoneid; 540 int error; 541 542 cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n"); 543 544 ASSERT(errorp != NULL); 545 546 error = secpolicy_basic_net_access(credp); 547 if (error != 0) { 548 *errorp = error; 549 return (NULL); 550 } 551 552 /* 553 * Find the right netstack. 554 */ 555 ns = netstack_find_by_cred(credp); 556 ASSERT(ns != NULL); 557 dccps = ns->netstack_dccp; 558 ASSERT(dccps != NULL); 559 560 /* 561 * For exclusive stacks we set the zoneid to zero 562 * to make TCP operate as if in the global zone. 563 */ 564 if (ns->netstack_stackid != GLOBAL_NETSTACKID) { 565 zoneid = GLOBAL_ZONEID; 566 } else { 567 zoneid = crgetzoneid(credp); 568 } 569 570 sqp = IP_SQUEUE_GET((uint_t)gethrtime()); 571 connp = (conn_t *)dccp_get_conn(sqp, dccps); 572 netstack_rele(dccps->dccps_netstack); 573 if (connp == NULL) { 574 *errorp = ENOSR; 575 return (NULL); 576 } 577 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 578 579 connp->conn_sqp = sqp; 580 connp->conn_initial_sqp = connp->conn_sqp; 581 connp->conn_ixa->ixa_sqp = connp->conn_sqp; 582 dccp = connp->conn_dccp; 583 584 /* Setting flags for ip output */ 585 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 586 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO; 587 588 ASSERT(connp->conn_proto == IPPROTO_DCCP); 589 ASSERT(connp->conn_dccp == dccp); 590 ASSERT(dccp->dccp_connp == connp); 591 592 if (isv6) { 593 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT; 594 connp->conn_ipversion = IPV6_VERSION; 595 connp->conn_family = AF_INET6; 596 /* XXX mms, ttl */ 597 } else { 598 connp->conn_ipversion = IPV4_VERSION; 599 connp->conn_family = AF_INET; 600 /* XXX mms, ttl */ 601 } 602 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 603 604 crhold(credp); 605 connp->conn_cred = credp; 606 connp->conn_cpid = curproc->p_pid; 607 connp->conn_open_time = ddi_get_lbolt64(); 608 609 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 610 connp->conn_ixa->ixa_cred = credp; 611 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 612 613 connp->conn_zoneid = zoneid; 614 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 615 connp->conn_ixa->ixa_zoneid = zoneid; 616 connp->conn_mlp_type = mlptSingle; 617 618 dccp->dccp_dccps = dccps; 619 dccp->dccp_state = DCCPS_CLOSED; 620 621 ASSERT(connp->conn_netstack == dccps->dccps_netstack); 622 ASSERT(dccp->dccp_dccps == dccps); 623 624 /* 625 * If the caller has the process-wide flag set, then default to MAC 626 * exempt mode. This allows read-down to unlabeled hosts. 627 */ 628 if (getpflags(NET_MAC_AWARE, credp) != 0) { 629 connp->conn_mac_mode = CONN_MAC_AWARE; 630 } 631 632 if (issocket) { 633 dccp->dccp_issocket = 1; 634 } 635 636 /* XXX rcvbuf, sndbuf etc */ 637 638 connp->conn_so_type = SOCK_STREAM; 639 640 SOCK_CONNID_INIT(dccp->dccp_connid); 641 dccp_init_values(dccp, NULL); 642 643 return (connp); 644 } 645 646 /* 647 * Common close function for streams and sockets. 648 */ 649 void 650 dccp_close_common(conn_t *connp, int flags) 651 { 652 dccp_t *dccp = connp->conn_dccp; 653 mblk_t *mp; 654 boolean_t conn_ioctl_cleanup_reqd = B_FALSE; 655 656 cmn_err(CE_NOTE, "dccp.c: dccp_close_common"); 657 658 ASSERT(connp->conn_ref >= 2); 659 660 /* 661 * Mark the conn as closing. ipsq_pending_mp_add will not 662 * add any mp to the pending mp list, after this conn has 663 * started closing. 664 */ 665 mutex_enter(&connp->conn_lock); 666 connp->conn_state_flags |= CONN_CLOSING; 667 668 if (connp->conn_oper_pending_ill != NULL) { 669 conn_ioctl_cleanup_reqd = B_TRUE; 670 } 671 672 CONN_INC_REF_LOCKED(connp); 673 mutex_exit(&connp->conn_lock); 674 675 ASSERT(connp->conn_ref >= 3); 676 677 /* 678 * Cleanup any queued ioctls here. This must be done before the wq/rq 679 * are re-written by dccp_close_output(). 680 */ 681 if (conn_ioctl_cleanup_reqd) { 682 conn_ioctl_cleanup(connp); 683 } 684 685 mutex_enter(&connp->conn_lock); 686 while (connp->conn_ioctlref > 0) { 687 cv_wait(&connp->conn_cv, &connp->conn_lock); 688 } 689 ASSERT(connp->conn_ioctlref == 0); 690 ASSERT(connp->conn_oper_pending_ill == NULL); 691 mutex_exit(&connp->conn_lock); 692 693 /* generate close */ 694 /* 695 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp, 696 NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE); 697 698 */ 699 700 nowait: 701 connp->conn_cpid = NOPID; 702 } 703 704 /* 705 * Common bind function. 706 */ 707 int 708 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 709 boolean_t bind_to_req_port_only) 710 { 711 dccp_t *dccp = connp->conn_dccp; 712 int error; 713 714 cmn_err(CE_NOTE, "dccp.c: dccp_do_bind"); 715 716 if (dccp->dccp_state >= DCCPS_BOUND) { 717 if (connp->conn_debug) { 718 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 719 "dccp_bind: bad state, %d", dccp->dccp_state); 720 } 721 return (-TOUTSTATE); 722 } 723 724 error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only); 725 if (error != 0) { 726 return (error); 727 } 728 729 ASSERT(dccp->dccp_state == DCCPS_LISTEN); 730 /* XXX dccp_conn_req_max = 0 */ 731 732 return (0); 733 } 734 735 /* 736 * Common unbind function. 737 */ 738 int 739 dccp_do_unbind(conn_t *connp) 740 { 741 dccp_t *dccp = connp->conn_dccp; 742 int32_t oldstate; 743 744 cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind"); 745 746 switch (dccp->dccp_state) { 747 case DCCPS_OPEN: 748 case DCCPS_LISTEN: 749 break; 750 default: 751 return (-TOUTSTATE); 752 } 753 754 connp->conn_laddr_v6 = ipv6_all_zeros; 755 connp->conn_saddr_v6 = ipv6_all_zeros; 756 757 dccp_bind_hash_remove(dccp); 758 759 oldstate = dccp->dccp_state; 760 dccp->dccp_state = DCCPS_CLOSED; 761 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *, 762 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL, 763 int32_t, oldstate); 764 765 ip_unbind(connp); 766 bzero(&connp->conn_ports, sizeof (connp->conn_ports)); 767 768 return (0); 769 } 770 771 /* 772 * Common listen function. 773 */ 774 int 775 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len, 776 int backlog, cred_t *cr, boolean_t bind_to_req_port_only) 777 { 778 dccp_t *dccp = connp->conn_dccp; 779 dccp_stack_t *dccps = dccp->dccp_dccps; 780 int32_t oldstate; 781 int error; 782 783 cmn_err(CE_NOTE, "dccp.c: dccp_do_listen"); 784 785 /* All Solaris components should pass a cred for this operation */ 786 ASSERT(cr != NULL); 787 788 if (dccp->dccp_state >= DCCPS_BOUND) { 789 790 if ((dccp->dccp_state == DCCPS_BOUND || 791 dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) { 792 goto do_listen; 793 } 794 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state"); 795 796 if (connp->conn_debug) { 797 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 798 "dccp_listen: bad state, %d", dccp->dccp_state); 799 } 800 return (-TOUTSTATE); 801 } else { 802 if (sa == NULL) { 803 sin6_t addr; 804 sin6_t *sin6; 805 sin_t *sin; 806 807 ASSERT(IPCL_IS_NONSTR(connp)); 808 809 if (connp->conn_family == AF_INET) { 810 len = sizeof (sin_t); 811 sin = (sin_t *)&addr; 812 *sin = sin_null; 813 sin->sin_family = AF_INET; 814 } else { 815 ASSERT(connp->conn_family == AF_INET6); 816 817 len = sizeof (sin6_t); 818 sin6 = (sin6_t *)&addr; 819 *sin6 = sin6_null; 820 sin6->sin6_family = AF_INET6; 821 } 822 823 sa = (struct sockaddr *)&addr; 824 } 825 826 error = dccp_bind_check(connp, sa, len, cr, 827 bind_to_req_port_only); 828 if (error != 0) { 829 cmn_err(CE_NOTE, "dccp_bind_check failed"); 830 return (error); 831 } 832 /* Fall through and do the fanout insertion */ 833 } 834 835 do_listen: 836 ASSERT(dccp->dccp_state == DCCPS_BOUND || 837 dccp->dccp_state == DCCPS_LISTEN); 838 839 /* XXX backlog */ 840 841 connp->conn_recv = dccp_input_listener_unbound; 842 843 /* Insert into the classifier table */ 844 error = ip_laddr_fanout_insert(connp); 845 if (error != 0) { 846 /* Error - undo the bind */ 847 oldstate = dccp->dccp_state; 848 dccp->dccp_state = DCCPS_CLOSED; 849 850 connp->conn_bound_addr_v6 = ipv6_all_zeros; 851 852 connp->conn_laddr_v6 = ipv6_all_zeros; 853 connp->conn_saddr_v6 = ipv6_all_zeros; 854 connp->conn_ports = 0; 855 856 if (connp->conn_anon_port) { 857 zone_t *zone; 858 859 zone = crgetzone(cr); 860 connp->conn_anon_port = B_FALSE; 861 (void) tsol_mlp_anon(zone, connp->conn_mlp_type, 862 connp->conn_proto, connp->conn_lport, B_FALSE); 863 } 864 connp->conn_mlp_type = mlptSingle; 865 866 /* XXX dccp_bind_hash_remove */ 867 868 return (error); 869 } else { 870 /* XXX connection limits */ 871 } 872 873 return (error); 874 } 875 876 /* 877 * Common connect function. 878 */ 879 int 880 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 881 cred_t *cr, pid_t pid) 882 { 883 dccp_t *dccp = connp->conn_dccp; 884 dccp_stack_t *dccps = dccp->dccp_dccps; 885 ip_xmit_attr_t *ixa = connp->conn_ixa; 886 mblk_t *req_mp; 887 sin_t *sin = (sin_t *)sa; 888 sin6_t *sin6 = (sin6_t *)sa; 889 ipaddr_t *dstaddrp; 890 in_port_t dstport; 891 uint_t srcid; 892 int32_t oldstate; 893 int error; 894 895 cmn_err(CE_NOTE, "dccp.c: dccp_do_connect"); 896 897 oldstate = dccp->dccp_state; 898 899 switch (len) { 900 case sizeof (sin_t): 901 sin = (sin_t *)sa; 902 if (sin->sin_port == 0) { 903 return (-TBADADDR); 904 } 905 if (connp->conn_ipv6_v6only) { 906 return (EAFNOSUPPORT); 907 } 908 break; 909 910 case sizeof (sin6_t): 911 sin6 = (sin6_t *)sa; 912 if (sin6->sin6_port == 0) { 913 return (-TBADADDR); 914 } 915 break; 916 917 default: 918 return (EINVAL); 919 } 920 921 if (connp->conn_family == AF_INET6 && 922 connp->conn_ipversion == IPV6_VERSION && 923 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 924 if (connp->conn_ipv6_v6only) { 925 return (EADDRNOTAVAIL); 926 } 927 928 connp->conn_ipversion = IPV4_VERSION; 929 } 930 931 switch (dccp->dccp_state) { 932 case DCCPS_LISTEN: 933 /* 934 * Listening sockets are not allowed to issue connect(). 935 */ 936 if (IPCL_IS_NONSTR(connp)) { 937 return (EOPNOTSUPP); 938 } 939 940 case DCCPS_CLOSED: 941 /* 942 * We support quick connect. 943 */ 944 /* FALLTHRU */ 945 case DCCPS_OPEN: 946 break; 947 948 default: 949 return (-TOUTSTATE); 950 } 951 952 /* 953 * We update our cred/cpid based on the caller of connect. 954 */ 955 if (connp->conn_cred != cr) { 956 crhold(cr); 957 crfree(connp->conn_cred); 958 connp->conn_cred = cr; 959 } 960 connp->conn_cpid = pid; 961 962 /* Cache things in the ixa without any refhold */ 963 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 964 ixa->ixa_cred = cr; 965 ixa->ixa_cpid = pid; 966 967 if (is_system_labeled()) { 968 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 969 } 970 971 if (connp->conn_family == AF_INET6) { 972 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 973 error = dccp_connect_ipv6(dccp, &sin6->sin6_addr, 974 sin6->sin6_port, sin6->sin6_flowinfo, 975 sin6->__sin6_src_id, sin6->sin6_scope_id); 976 } else { 977 /* 978 * Destination adress is mapped IPv6 address. 979 * Source bound address should be unspecified or 980 * IPv6 mapped address as well. 981 */ 982 if (!IN6_IS_ADDR_UNSPECIFIED( 983 &connp->conn_bound_addr_v6) && 984 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 985 return (EADDRNOTAVAIL); 986 } 987 988 dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr)); 989 dstport = sin6->sin6_port; 990 srcid = sin6->__sin6_src_id; 991 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, 992 srcid); 993 } 994 } else { 995 dstaddrp = &sin->sin_addr.s_addr; 996 dstport = sin->sin_port; 997 srcid = 0; 998 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid); 999 } 1000 1001 if (error != 0) { 1002 cmn_err(CE_NOTE, "dccp_connect_ip failed"); 1003 goto connect_failed; 1004 } 1005 1006 /* XXX cluster */ 1007 1008 /* Connect succeeded */ 1009 DCCPS_BUMP_MIB(dccps, dccpActiveOpens); 1010 dccp->dccp_active_open = 1; 1011 1012 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *, 1013 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL, 1014 int32_t, DCCPS_BOUND); 1015 1016 DCCP_TIMER_RESTART(dccp, 100); 1017 req_mp = dccp_generate_request(connp); 1018 if (req_mp != NULL) { 1019 /* 1020 * We must bump the generation before sending the request 1021 * to ensure that we use the right generation in case 1022 * this thread issues a "connected" up call. 1023 */ 1024 SOCK_CONNID_BUMP(dccp->dccp_connid); 1025 1026 DTRACE_DCCP5(connect__request, mblk_t *, NULL, 1027 ip_xmit_attr_t *, connp->conn_ixa, 1028 void_ip_t *, req_mp->b_rptr, dccp_t *, dccp, 1029 dccpha_t *, 1030 &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]); 1031 1032 dccp_send_data(dccp, req_mp); 1033 } 1034 1035 return (0); 1036 1037 connect_failed: 1038 cmn_err(CE_NOTE, "dccp_do_connect failed"); 1039 1040 connp->conn_faddr_v6 = ipv6_all_zeros; 1041 connp->conn_fport = 0; 1042 dccp->dccp_state = oldstate; 1043 1044 /* XXX */ 1045 return (error); 1046 } 1047 1048 /* 1049 * Init values of a connection. 1050 */ 1051 void 1052 dccp_init_values(dccp_t *dccp, dccp_t *parent) 1053 { 1054 conn_t *connp = dccp->dccp_connp; 1055 dccp_stack_t *dccps = dccp->dccp_dccps; 1056 1057 connp->conn_mlp_type = mlptSingle; 1058 } 1059 1060 /* 1061 * Free dccp structure. 1062 */ 1063 void 1064 dccp_free(dccp_t *dccp) 1065 { 1066 conn_t *connp = dccp->dccp_connp; 1067 1068 cmn_err(CE_NOTE, "dccp.c: dccp_free"); 1069 1070 connp->conn_rq = NULL; 1071 connp->conn_wq = NULL; 1072 1073 if (connp->conn_upper_handle != NULL) { 1074 if (IPCL_IS_NONSTR(connp)) { 1075 (*connp->conn_upcalls->su_closed)( 1076 connp->conn_upper_handle); 1077 dccp->dccp_detached = B_TRUE; 1078 } 1079 1080 connp->conn_upper_handle = NULL; 1081 connp->conn_upcalls = NULL; 1082 } 1083 } 1084 1085 void * 1086 dccp_get_conn(void *arg, dccp_stack_t *dccps) 1087 { 1088 dccp_t *dccp = NULL; 1089 conn_t *connp; 1090 squeue_t *sqp = (squeue_t *)arg; 1091 netstack_t *ns; 1092 1093 /* XXX timewait */ 1094 1095 connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP, 1096 dccps->dccps_netstack); 1097 if (connp == NULL) { 1098 return (NULL); 1099 } 1100 1101 dccp = connp->conn_dccp; 1102 dccp->dccp_dccps = dccps; 1103 1104 /* List of features being negotated */ 1105 list_create(&dccp->dccp_features, sizeof (dccp_feature_t), 1106 offsetof(dccp_feature_t, df_next)); 1107 1108 connp->conn_recv = dccp_input_data; 1109 connp->conn_recvicmp = dccp_icmp_input; 1110 connp->conn_verifyicmp = dccp_verifyicmp; 1111 1112 connp->conn_ixa->ixa_notify = dccp_notify; 1113 connp->conn_ixa->ixa_notify_cookie = dccp; 1114 1115 return ((void *)connp); 1116 } 1117 1118 /* 1119 * IPv4 connect. 1120 */ 1121 static int 1122 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport, 1123 uint_t srcid) 1124 { 1125 conn_t *connp = dccp->dccp_connp; 1126 dccp_stack_t *dccps = dccp->dccp_dccps; 1127 ipaddr_t dstaddr = *dstaddrp; 1128 uint16_t lport; 1129 int error; 1130 1131 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4"); 1132 1133 ASSERT(connp->conn_ipversion == IPV4_VERSION); 1134 1135 if (dstaddr == INADDR_ANY) { 1136 dstaddr = htonl(INADDR_LOOPBACK); 1137 *dstaddrp = dstaddr; 1138 } 1139 1140 /* Handle __sin6_src_id if socket not bound to an IP address */ 1141 if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) { 1142 ip_srcid_find_id(srcid, &connp->conn_laddr_v6, 1143 IPCL_ZONEID(connp), dccps->dccps_netstack); 1144 connp->conn_saddr_v6 = connp->conn_laddr_v6; 1145 } 1146 1147 IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6); 1148 connp->conn_fport = dstport; 1149 1150 if (dccp->dccp_state == DCCPS_CLOSED) { 1151 lport = dccp_update_next_port(dccps->dccps_next_port_to_try, 1152 dccp, B_TRUE); 1153 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0, 1154 B_TRUE, B_FALSE, B_FALSE); 1155 if (lport == 0) { 1156 return (-TNOADDR); 1157 } 1158 } 1159 1160 error = dccp_set_destination(dccp); 1161 if (error != 0) { 1162 return (error); 1163 } 1164 1165 /* 1166 * Don't connect to oneself. 1167 */ 1168 if (connp->conn_faddr_v4 == connp->conn_laddr_v4 && 1169 connp->conn_fport == connp->conn_lport) { 1170 return (-TBADADDR); 1171 } 1172 1173 dccp->dccp_state = DCCPS_REQUEST; 1174 1175 return (ipcl_conn_insert_v4(connp)); 1176 } 1177 1178 /* 1179 * IPv6 connect. 1180 */ 1181 static int 1182 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport, 1183 uint32_t flowinfo, uint_t srcid, uint32_t scope_id) 1184 { 1185 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6"); 1186 1187 return (0); 1188 } 1189 1190 /* 1191 * Set the ports via conn_connect and build the template 1192 * header. 1193 */ 1194 int 1195 dccp_set_destination(dccp_t *dccp) 1196 { 1197 conn_t *connp = dccp->dccp_connp; 1198 dccp_stack_t *dccps = dccp->dccp_dccps; 1199 iulp_t uinfo; 1200 uint32_t flags; 1201 int error; 1202 1203 flags = IPDF_LSO | IPDF_ZCOPY; 1204 flags |= IPDF_UNIQUE_DCE; 1205 1206 mutex_enter(&connp->conn_lock); 1207 error = conn_connect(connp, &uinfo, flags); 1208 mutex_exit(&connp->conn_lock); 1209 if (error != 0) { 1210 cmn_err(CE_NOTE, "conn_connect failed"); 1211 return (error); 1212 } 1213 1214 error = dccp_build_hdrs(dccp); 1215 if (error != 0) { 1216 cmn_err(CE_NOTE, "dccp_build_hdrs failed"); 1217 return (error); 1218 } 1219 1220 /* XXX */ 1221 1222 /* Initialise the ISS */ 1223 dccp_iss_init(dccp); 1224 1225 mutex_enter(&connp->conn_lock); 1226 connp->conn_state_flags &= ~CONN_INCIPIENT; 1227 mutex_exit(&connp->conn_lock); 1228 1229 return (0); 1230 } 1231 1232 /* 1233 * Init the ISS. 1234 */ 1235 static void 1236 dccp_iss_init(dccp_t *dccp) 1237 { 1238 cmn_err(CE_NOTE, "dccp.c: dccp_iss_init"); 1239 1240 dccp->dccp_iss += gethrtime(); 1241 dccp->dccp_gss = dccp->dccp_iss; 1242 }