1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2012 David Hoeppner. All rights reserved.
29 */
30
31 /*
32 * This file implements the Data Congestion Control Protocol (DCCP).
33 */
34
35 #include <sys/types.h>
36 #include <sys/stream.h>
37 #include <sys/stropts.h>
38 #include <sys/strlog.h>
39 #include <sys/strsun.h>
40 #define _SUN_TPI_VERSION 2
41 #include <sys/tihdr.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sockio.h>
45 #include <sys/priv.h>
46 #include <sys/vtrace.h>
47 #include <sys/sdt.h>
48 #include <sys/debug.h>
49 #include <sys/ddi.h>
50 #include <sys/isa_defs.h>
51 #include <sys/policy.h>
52 #include <sys/tsol/label.h>
53 #include <sys/tsol/tnet.h>
54 #include <inet/dccp_impl.h>
55 #include <inet/dccp_stack.h>
56 #include <inet/kstatcom.h>
57 #include <inet/snmpcom.h>
58
59 #include <sys/cmn_err.h>
60
61 int dccp_squeue_flag;
62
63 /* Setable in /etc/system */
64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE;
65
66 static void dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
67 ixa_notify_arg_t);
68
69 /* Functions to register netstack */
70 static void *dccp_stack_init(netstackid_t, netstack_t *);
71 static void dccp_stack_fini(netstackid_t, void *);
72
73 /* Stream device open functions */
74 static int dccp_openv4(queue_t *, dev_t *, int, int, cred_t *);
75 static int dccp_openv6(queue_t *, dev_t *, int, int, cred_t *);
76 static int dccp_open(queue_t *, dev_t *, int, int, cred_t *,
77 boolean_t);
78
79 /* Write service routine */
80 static void dccp_wsrv(queue_t *);
81
82 /* Connection related functions */
83 static int dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t);
84 static int dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t,
85 uint_t, uint32_t);
86
87 /* Initialise ISS */
88 static void dccp_iss_init(dccp_t *);
89
90 struct module_info dccp_rinfo = {
91 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER,
92 DCCP_RECV_LOWATER
93 };
94
95 static struct module_info dccp_winfo = {
96 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16
97 };
98
99 /*
100 * Queue information structure with DCCP entry points.
101 */
102 struct qinit dccp_rinitv4 = {
103 NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo
104 };
105
106 struct qinit dccp_rinitv6 = {
107 NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo
108 };
109
110 struct qinit dccp_winit = {
111 (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
112 };
113
114 /* Initial entry point for TCP in socket mode */
115 struct qinit dccp_sock_winit = {
116 (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
117 };
118
119 struct qinit dccp_fallback_sock_winit = {
120 (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo
121 };
122 /*
123 * DCCP as acceptor STREAM.
124 */
125 struct qinit dccp_acceptor_rinit = {
126 NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo
127 };
128
129 struct qinit dccp_acceptor_winit = {
130 (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo
131 };
132
133 /* AF_INET /dev/dccp */
134 struct streamtab dccpinfov4 = {
135 &dccp_rinitv4, &dccp_winit
136 };
137
138 /* AF_INET6 /dev/dccp6 */
139 struct streamtab dccpinfov6 = {
140 &dccp_rinitv6, &dccp_winit
141 };
142
143 /* Template for response to info request */
144 struct T_info_ack dccp_g_t_info_ack = {
145 T_INFO_ACK, /* PRIM_type */
146 0, /* TSDU_size */
147 T_INFINITE, /* ETSDU_size */
148 T_INVALID, /* CDATA_size */
149 T_INVALID, /* DDATA_size */
150 sizeof (sin_t), /* ADDR_size */
151 0, /* OPT_size - not initialized here */
152 TIDUSZ, /* TIDU_size */
153 T_COTS_ORD, /* SERV_type */
154 DCCPS_CLOSED, /* CURRENT_state */
155 (XPG4_1|EXPINLINE) /* PROVIDER_flag */
156 };
157
158 struct T_info_ack dccp_g_t_info_ack_v6 = {
159 T_INFO_ACK, /* PRIM_type */
160 0, /* TSDU_size */
161 T_INFINITE, /* ETSDU_size */
162 T_INVALID, /* CDATA_size */
163 T_INVALID, /* DDATA_size */
164 sizeof (sin6_t), /* ADDR_size */
165 0, /* OPT_size - not initialized here */
166 TIDUSZ, /* TIDU_size */
167 T_COTS_ORD, /* SERV_type */
168 DCCPS_CLOSED, /* CURRENT_state */
169 (XPG4_1|EXPINLINE) /* PROVIDER_flag */
170 };
171
172 /*
173 * DCCP Tunables.
174 */
175 extern mod_prop_info_t dccp_propinfo_tbl[];
176 extern int dccp_propinfo_count;
177
178 /*
179 * Register DCCP in ip netstack.
180 */
181 void
182 dccp_ddi_g_init(void)
183 {
184 /* Global timer cache */
185 dccp_timercache = kmem_cache_create("dccp_timercache",
186 sizeof (dccp_timer_t) + sizeof (mblk_t), 0,
187 NULL, NULL, NULL, NULL, NULL, 0);
188
189 netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini);
190 }
191
192 /*
193 * Unregister DCCP from ip netstack.
194 */
195 void
196 dccp_ddi_g_destroy(void)
197 {
198 /* Global timer cache */
199 kmem_cache_destroy(dccp_timercache);
200
201 netstack_unregister(NS_DCCP);
202 }
203
204 #define INET_NAME "ip"
205
206 /*
207 * Initialize this DCCP stack instance.
208 */
209 static void *
210 dccp_stack_init(netstackid_t stackid, netstack_t *ns)
211 {
212 dccp_stack_t *dccps;
213 major_t major;
214 size_t arrsz;
215 int error;
216 int i;
217
218 dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP);
219 if (dccps == NULL) {
220 return (NULL);
221 }
222 dccps->dccps_netstack = ns;
223
224 /* Ports */
225 mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
226 dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS;
227 dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
228 dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
229 dccps->dccps_min_anonpriv_port = 512;
230
231 dccps->dccps_bind_fanout_size = dccp_bind_fanout_size;
232
233 /* Bind fanout */
234 dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size *
235 sizeof (dccp_df_t), KM_SLEEP);
236 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
237 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL,
238 MUTEX_DEFAULT, NULL);
239 }
240
241 /* Tunable properties */
242 arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t);
243 dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP);
244 if (dccps->dccps_propinfo_tbl == NULL) {
245 kmem_free(dccps, sizeof (*dccps));
246 return (NULL);
247 }
248 bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz);
249
250 /* Allocate per netstack cpu stats */
251 mutex_enter(&cpu_lock);
252 dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus);
253 mutex_exit(&cpu_lock);
254
255 dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *),
256 KM_SLEEP);
257 for (i = 0; i < dccps->dccps_sc_cnt; i++) {
258 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t),
259 KM_SLEEP);
260 }
261
262 /* Kernel statistics */
263 dccps->dccps_kstat = dccp_kstat2_init(stackid);
264 dccps->dccps_mibkp = dccp_kstat_init(stackid);
265
266 /* Driver major number */
267 major = mod_name_to_major(INET_NAME);
268 error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident);
269 ASSERT(error == 0);
270
271 return (dccps);
272 }
273
274 /*
275 * Destroy this DCCP netstack instance.
276 */
277 static void
278 dccp_stack_fini(netstackid_t stackid, void *arg)
279 {
280 dccp_stack_t *dccps = (dccp_stack_t *)arg;
281 int i;
282
283 /* Free cpu stats */
284 for (i = 0; i < dccps->dccps_sc_cnt; i++) {
285 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t));
286 }
287 kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *));
288
289 /* Free tunable properties */
290 kmem_free(dccps->dccps_propinfo_tbl,
291 dccp_propinfo_count * sizeof (mod_prop_info_t));
292 dccps->dccps_propinfo_tbl = NULL;
293
294 /* Free bind fanout */
295 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
296 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL);
297 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock);
298 }
299 kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size *
300 sizeof (dccp_df_t));
301 dccps->dccps_bind_fanout = NULL;
302
303 /* Kernel statistics */
304 dccp_kstat_fini(stackid, dccps->dccps_mibkp);
305 dccps->dccps_mibkp = NULL;
306
307 ldi_ident_release(dccps->dccps_ldi_ident);
308
309 kmem_free(dccps, sizeof (*dccps));
310 }
311
312 /* /dev/dccp */
313 static int
314 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
315 {
316 cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n");
317
318 return (dccp_open(q, devp, flag, sflag, credp, B_FALSE));
319 }
320
321 /* /dev/dccp6 */
322 static int
323 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
324 {
325 cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n");
326
327 return (dccp_open(q, devp, flag, sflag, credp, B_TRUE));
328 }
329
330 /*
331 * Common open function for v4 and v6 devices.
332 */
333 static int
334 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
335 boolean_t isv6)
336 {
337 conn_t *connp;
338 dccp_t *dccp;
339 vmem_t *minor_arena;
340 dev_t conn_dev;
341 boolean_t issocket;
342 int error;
343
344 cmn_err(CE_NOTE, "dccp.c: dccp_open");
345
346 /* If the stream is already open, return immediately */
347 if (q->q_ptr != NULL) {
348 return (0);
349 }
350
351 if (sflag == MODOPEN) {
352 return (EINVAL);
353 }
354
355 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
356 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
357 minor_arena = ip_minor_arena_la;
358 } else {
359 /*
360 * Either minor numbers in the large arena were exhausted
361 * or a non socket application is doing the open.
362 * Try to allocate from the small arena.
363 */
364 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
365 return (EBUSY);
366 }
367 minor_arena = ip_minor_arena_sa;
368 }
369
370 ASSERT(minor_arena != NULL);
371
372 *devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
373
374 if (flag & SO_FALLBACK) {
375 /*
376 * Non streams socket needs a stream to fallback to.
377 */
378 RD(q)->q_ptr = (void *)conn_dev;
379 WR(q)->q_qinfo = &dccp_fallback_sock_winit;
380 WR(q)->q_ptr = (void *)minor_arena;
381 qprocson(q);
382 return (0);
383 } else if (flag & SO_ACCEPTOR) {
384 q->q_qinfo = &dccp_acceptor_rinit;
385 /*
386 * The conn_dev and minor_arena will be subsequently used by
387 * dccp_tli_accept() and dccp_tpi_close_accept() to figure out
388 * the minor device number for this connection from the q_ptr.
389 */
390 RD(q)->q_ptr = (void *)conn_dev;
391 WR(q)->q_qinfo = &dccp_acceptor_winit;
392 WR(q)->q_ptr = (void *)minor_arena;
393 qprocson(q);
394 return (0);
395 }
396
397 issocket = flag & SO_SOCKSTR;
398 connp = dccp_create_common(credp, isv6, issocket, &error);
399 if (connp == NULL) {
400 inet_minor_free(minor_arena, conn_dev);
401 q->q_ptr = WR(q)->q_ptr = NULL;
402 return (error);
403 }
404
405 connp->conn_rq = q;
406 connp->conn_wq = WR(q);
407 q->q_ptr = WR(q)->q_ptr = connp;
408
409 connp->conn_dev = conn_dev;
410 connp->conn_minor_arena = minor_arena;
411
412 ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6);
413 ASSERT(WR(q)->q_qinfo == &dccp_winit);
414
415 dccp = connp->conn_dccp;
416
417 if (issocket) {
418 WR(q)->q_qinfo = &dccp_sock_winit;
419 } else {
420 #ifdef _ILP32
421 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q);
422 #else
423 dccp->dccp_acceptor_id = conn_dev;
424 #endif /* _ILP32 */
425 }
426
427 /*
428 * Put the ref for DCCP. Ref for IP was already put
429 * by ipcl_conn_create. Also Make the conn_t globally
430 * visible to walkers.
431 */
432 mutex_enter(&connp->conn_lock);
433 CONN_INC_REF_LOCKED(connp);
434 ASSERT(connp->conn_ref == 2);
435 connp->conn_state_flags &= ~CONN_INCIPIENT;
436 mutex_exit(&connp->conn_lock);
437
438 qprocson(q);
439
440 return (0);
441 }
442
443 /*
444 * IXA notify
445 */
446 static void
447 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
448 ixa_notify_arg_t narg)
449 {
450 cmn_err(CE_NOTE, "dccp.c: dccp_notify");
451 }
452
453 /*
454 * Build the template headers.
455 */
456 int
457 dccp_build_hdrs(dccp_t *dccp)
458 {
459 dccp_stack_t *dccps = dccp->dccp_dccps;
460 conn_t *connp = dccp->dccp_connp;
461 dccpha_t *dccpha;
462 uint32_t cksum;
463 char buf[DCCP_MAX_HDR_LENGTH];
464 uint_t buflen;
465 uint_t ulplen = 12;
466 uint_t extralen = 0;
467 int error;
468
469 cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs");
470
471 buflen = connp->conn_ht_ulp_len;
472 if (buflen != 0) {
473 cmn_err(CE_NOTE, "buflen != 0");
474 bcopy(connp->conn_ht_ulp, buf, buflen);
475 extralen -= buflen - ulplen;
476 ulplen = buflen;
477 }
478
479 mutex_enter(&connp->conn_lock);
480 error = conn_build_hdr_template(connp, ulplen, extralen,
481 &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo);
482 mutex_exit(&connp->conn_lock);
483 if (error != 0) {
484 cmn_err(CE_NOTE, "conn_build_hdr_template failed");
485 return (error);
486 }
487
488 dccpha = (dccpha_t *)connp->conn_ht_ulp;
489 dccp->dccp_dccpha = dccpha;
490
491 if (buflen != 0) {
492 bcopy(buf, connp->conn_ht_ulp, buflen);
493 } else {
494 dccpha->dha_sum = 0;
495 dccpha->dha_lport = connp->conn_lport;
496 dccpha->dha_fport = connp->conn_fport;
497 }
498
499 cksum = sizeof (dccpha_t) + connp->conn_sum;
500 cksum = (cksum >> 16) + (cksum & 0xFFFF);
501 dccpha->dha_sum = htons(cksum);
502 dccpha->dha_offset = 7;
503 dccpha->dha_x = 1;
504
505 if (connp->conn_ipversion == IPV4_VERSION) {
506 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc;
507 } else {
508 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc;
509 }
510
511 /* XXX */
512
513 return (0);
514 }
515
516 /*
517 * DCCP write service routine.
518 */
519 static void
520 dccp_wsrv(queue_t *q)
521 {
522 dccp_stack_t *dccps = Q_TO_DCCP(q)->dccp_dccps;
523
524 DCCP_STAT(dccps, dccp_wsrv_called);
525 }
526
527 /*
528 * Common create function for streams and sockets.
529 */
530 conn_t *
531 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
532 int *errorp)
533 {
534 conn_t *connp;
535 dccp_t *dccp;
536 dccp_stack_t *dccps;
537 netstack_t *ns;
538 squeue_t *sqp;
539 zoneid_t zoneid;
540 int error;
541
542 cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n");
543
544 ASSERT(errorp != NULL);
545
546 error = secpolicy_basic_net_access(credp);
547 if (error != 0) {
548 *errorp = error;
549 return (NULL);
550 }
551
552 /*
553 * Find the right netstack.
554 */
555 ns = netstack_find_by_cred(credp);
556 ASSERT(ns != NULL);
557 dccps = ns->netstack_dccp;
558 ASSERT(dccps != NULL);
559
560 /*
561 * For exclusive stacks we set the zoneid to zero
562 * to make TCP operate as if in the global zone.
563 */
564 if (ns->netstack_stackid != GLOBAL_NETSTACKID) {
565 zoneid = GLOBAL_ZONEID;
566 } else {
567 zoneid = crgetzoneid(credp);
568 }
569
570 sqp = IP_SQUEUE_GET((uint_t)gethrtime());
571 connp = (conn_t *)dccp_get_conn(sqp, dccps);
572 netstack_rele(dccps->dccps_netstack);
573 if (connp == NULL) {
574 *errorp = ENOSR;
575 return (NULL);
576 }
577 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
578
579 connp->conn_sqp = sqp;
580 connp->conn_initial_sqp = connp->conn_sqp;
581 connp->conn_ixa->ixa_sqp = connp->conn_sqp;
582 dccp = connp->conn_dccp;
583
584 /* Setting flags for ip output */
585 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
586 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
587
588 ASSERT(connp->conn_proto == IPPROTO_DCCP);
589 ASSERT(connp->conn_dccp == dccp);
590 ASSERT(dccp->dccp_connp == connp);
591
592 if (isv6) {
593 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
594 connp->conn_ipversion = IPV6_VERSION;
595 connp->conn_family = AF_INET6;
596 /* XXX mms, ttl */
597 } else {
598 connp->conn_ipversion = IPV4_VERSION;
599 connp->conn_family = AF_INET;
600 /* XXX mms, ttl */
601 }
602 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
603
604 crhold(credp);
605 connp->conn_cred = credp;
606 connp->conn_cpid = curproc->p_pid;
607 connp->conn_open_time = ddi_get_lbolt64();
608
609 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
610 connp->conn_ixa->ixa_cred = credp;
611 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
612
613 connp->conn_zoneid = zoneid;
614 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
615 connp->conn_ixa->ixa_zoneid = zoneid;
616 connp->conn_mlp_type = mlptSingle;
617
618 dccp->dccp_dccps = dccps;
619 dccp->dccp_state = DCCPS_CLOSED;
620
621 ASSERT(connp->conn_netstack == dccps->dccps_netstack);
622 ASSERT(dccp->dccp_dccps == dccps);
623
624 /*
625 * If the caller has the process-wide flag set, then default to MAC
626 * exempt mode. This allows read-down to unlabeled hosts.
627 */
628 if (getpflags(NET_MAC_AWARE, credp) != 0) {
629 connp->conn_mac_mode = CONN_MAC_AWARE;
630 }
631
632 if (issocket) {
633 dccp->dccp_issocket = 1;
634 }
635
636 /* XXX rcvbuf, sndbuf etc */
637
638 connp->conn_so_type = SOCK_STREAM;
639
640 SOCK_CONNID_INIT(dccp->dccp_connid);
641 dccp_init_values(dccp, NULL);
642
643 return (connp);
644 }
645
646 /*
647 * Common close function for streams and sockets.
648 */
649 void
650 dccp_close_common(conn_t *connp, int flags)
651 {
652 dccp_t *dccp = connp->conn_dccp;
653 mblk_t *mp;
654 boolean_t conn_ioctl_cleanup_reqd = B_FALSE;
655
656 cmn_err(CE_NOTE, "dccp.c: dccp_close_common");
657
658 ASSERT(connp->conn_ref >= 2);
659
660 /*
661 * Mark the conn as closing. ipsq_pending_mp_add will not
662 * add any mp to the pending mp list, after this conn has
663 * started closing.
664 */
665 mutex_enter(&connp->conn_lock);
666 connp->conn_state_flags |= CONN_CLOSING;
667
668 if (connp->conn_oper_pending_ill != NULL) {
669 conn_ioctl_cleanup_reqd = B_TRUE;
670 }
671
672 CONN_INC_REF_LOCKED(connp);
673 mutex_exit(&connp->conn_lock);
674
675 ASSERT(connp->conn_ref >= 3);
676
677 /*
678 * Cleanup any queued ioctls here. This must be done before the wq/rq
679 * are re-written by dccp_close_output().
680 */
681 if (conn_ioctl_cleanup_reqd) {
682 conn_ioctl_cleanup(connp);
683 }
684
685 mutex_enter(&connp->conn_lock);
686 while (connp->conn_ioctlref > 0) {
687 cv_wait(&connp->conn_cv, &connp->conn_lock);
688 }
689 ASSERT(connp->conn_ioctlref == 0);
690 ASSERT(connp->conn_oper_pending_ill == NULL);
691 mutex_exit(&connp->conn_lock);
692
693 /* generate close */
694 /*
695 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp,
696 NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE);
697
698 */
699
700 nowait:
701 connp->conn_cpid = NOPID;
702 }
703
704 /*
705 * Common bind function.
706 */
707 int
708 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
709 boolean_t bind_to_req_port_only)
710 {
711 dccp_t *dccp = connp->conn_dccp;
712 int error;
713
714 cmn_err(CE_NOTE, "dccp.c: dccp_do_bind");
715
716 if (dccp->dccp_state >= DCCPS_BOUND) {
717 if (connp->conn_debug) {
718 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
719 "dccp_bind: bad state, %d", dccp->dccp_state);
720 }
721 return (-TOUTSTATE);
722 }
723
724 error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
725 if (error != 0) {
726 return (error);
727 }
728
729 ASSERT(dccp->dccp_state == DCCPS_LISTEN);
730 /* XXX dccp_conn_req_max = 0 */
731
732 return (0);
733 }
734
735 /*
736 * Common unbind function.
737 */
738 int
739 dccp_do_unbind(conn_t *connp)
740 {
741 dccp_t *dccp = connp->conn_dccp;
742 int32_t oldstate;
743
744 cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind");
745
746 switch (dccp->dccp_state) {
747 case DCCPS_OPEN:
748 case DCCPS_LISTEN:
749 break;
750 default:
751 return (-TOUTSTATE);
752 }
753
754 connp->conn_laddr_v6 = ipv6_all_zeros;
755 connp->conn_saddr_v6 = ipv6_all_zeros;
756
757 dccp_bind_hash_remove(dccp);
758
759 oldstate = dccp->dccp_state;
760 dccp->dccp_state = DCCPS_CLOSED;
761 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
762 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
763 int32_t, oldstate);
764
765 ip_unbind(connp);
766 bzero(&connp->conn_ports, sizeof (connp->conn_ports));
767
768 return (0);
769 }
770
771 /*
772 * Common listen function.
773 */
774 int
775 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
776 int backlog, cred_t *cr, boolean_t bind_to_req_port_only)
777 {
778 dccp_t *dccp = connp->conn_dccp;
779 dccp_stack_t *dccps = dccp->dccp_dccps;
780 int32_t oldstate;
781 int error;
782
783 cmn_err(CE_NOTE, "dccp.c: dccp_do_listen");
784
785 /* All Solaris components should pass a cred for this operation */
786 ASSERT(cr != NULL);
787
788 if (dccp->dccp_state >= DCCPS_BOUND) {
789
790 if ((dccp->dccp_state == DCCPS_BOUND ||
791 dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) {
792 goto do_listen;
793 }
794 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state");
795
796 if (connp->conn_debug) {
797 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
798 "dccp_listen: bad state, %d", dccp->dccp_state);
799 }
800 return (-TOUTSTATE);
801 } else {
802 if (sa == NULL) {
803 sin6_t addr;
804 sin6_t *sin6;
805 sin_t *sin;
806
807 ASSERT(IPCL_IS_NONSTR(connp));
808
809 if (connp->conn_family == AF_INET) {
810 len = sizeof (sin_t);
811 sin = (sin_t *)&addr;
812 *sin = sin_null;
813 sin->sin_family = AF_INET;
814 } else {
815 ASSERT(connp->conn_family == AF_INET6);
816
817 len = sizeof (sin6_t);
818 sin6 = (sin6_t *)&addr;
819 *sin6 = sin6_null;
820 sin6->sin6_family = AF_INET6;
821 }
822
823 sa = (struct sockaddr *)&addr;
824 }
825
826 error = dccp_bind_check(connp, sa, len, cr,
827 bind_to_req_port_only);
828 if (error != 0) {
829 cmn_err(CE_NOTE, "dccp_bind_check failed");
830 return (error);
831 }
832 /* Fall through and do the fanout insertion */
833 }
834
835 do_listen:
836 ASSERT(dccp->dccp_state == DCCPS_BOUND ||
837 dccp->dccp_state == DCCPS_LISTEN);
838
839 /* XXX backlog */
840
841 connp->conn_recv = dccp_input_listener_unbound;
842
843 /* Insert into the classifier table */
844 error = ip_laddr_fanout_insert(connp);
845 if (error != 0) {
846 /* Error - undo the bind */
847 oldstate = dccp->dccp_state;
848 dccp->dccp_state = DCCPS_CLOSED;
849
850 connp->conn_bound_addr_v6 = ipv6_all_zeros;
851
852 connp->conn_laddr_v6 = ipv6_all_zeros;
853 connp->conn_saddr_v6 = ipv6_all_zeros;
854 connp->conn_ports = 0;
855
856 if (connp->conn_anon_port) {
857 zone_t *zone;
858
859 zone = crgetzone(cr);
860 connp->conn_anon_port = B_FALSE;
861 (void) tsol_mlp_anon(zone, connp->conn_mlp_type,
862 connp->conn_proto, connp->conn_lport, B_FALSE);
863 }
864 connp->conn_mlp_type = mlptSingle;
865
866 /* XXX dccp_bind_hash_remove */
867
868 return (error);
869 } else {
870 /* XXX connection limits */
871 }
872
873 return (error);
874 }
875
876 /*
877 * Common connect function.
878 */
879 int
880 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
881 cred_t *cr, pid_t pid)
882 {
883 dccp_t *dccp = connp->conn_dccp;
884 dccp_stack_t *dccps = dccp->dccp_dccps;
885 ip_xmit_attr_t *ixa = connp->conn_ixa;
886 mblk_t *req_mp;
887 sin_t *sin = (sin_t *)sa;
888 sin6_t *sin6 = (sin6_t *)sa;
889 ipaddr_t *dstaddrp;
890 in_port_t dstport;
891 uint_t srcid;
892 int32_t oldstate;
893 int error;
894
895 cmn_err(CE_NOTE, "dccp.c: dccp_do_connect");
896
897 oldstate = dccp->dccp_state;
898
899 switch (len) {
900 case sizeof (sin_t):
901 sin = (sin_t *)sa;
902 if (sin->sin_port == 0) {
903 return (-TBADADDR);
904 }
905 if (connp->conn_ipv6_v6only) {
906 return (EAFNOSUPPORT);
907 }
908 break;
909
910 case sizeof (sin6_t):
911 sin6 = (sin6_t *)sa;
912 if (sin6->sin6_port == 0) {
913 return (-TBADADDR);
914 }
915 break;
916
917 default:
918 return (EINVAL);
919 }
920
921 if (connp->conn_family == AF_INET6 &&
922 connp->conn_ipversion == IPV6_VERSION &&
923 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
924 if (connp->conn_ipv6_v6only) {
925 return (EADDRNOTAVAIL);
926 }
927
928 connp->conn_ipversion = IPV4_VERSION;
929 }
930
931 switch (dccp->dccp_state) {
932 case DCCPS_LISTEN:
933 /*
934 * Listening sockets are not allowed to issue connect().
935 */
936 if (IPCL_IS_NONSTR(connp)) {
937 return (EOPNOTSUPP);
938 }
939
940 case DCCPS_CLOSED:
941 /*
942 * We support quick connect.
943 */
944 /* FALLTHRU */
945 case DCCPS_OPEN:
946 break;
947
948 default:
949 return (-TOUTSTATE);
950 }
951
952 /*
953 * We update our cred/cpid based on the caller of connect.
954 */
955 if (connp->conn_cred != cr) {
956 crhold(cr);
957 crfree(connp->conn_cred);
958 connp->conn_cred = cr;
959 }
960 connp->conn_cpid = pid;
961
962 /* Cache things in the ixa without any refhold */
963 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
964 ixa->ixa_cred = cr;
965 ixa->ixa_cpid = pid;
966
967 if (is_system_labeled()) {
968 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
969 }
970
971 if (connp->conn_family == AF_INET6) {
972 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
973 error = dccp_connect_ipv6(dccp, &sin6->sin6_addr,
974 sin6->sin6_port, sin6->sin6_flowinfo,
975 sin6->__sin6_src_id, sin6->sin6_scope_id);
976 } else {
977 /*
978 * Destination adress is mapped IPv6 address.
979 * Source bound address should be unspecified or
980 * IPv6 mapped address as well.
981 */
982 if (!IN6_IS_ADDR_UNSPECIFIED(
983 &connp->conn_bound_addr_v6) &&
984 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
985 return (EADDRNOTAVAIL);
986 }
987
988 dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
989 dstport = sin6->sin6_port;
990 srcid = sin6->__sin6_src_id;
991 error = dccp_connect_ipv4(dccp, dstaddrp, dstport,
992 srcid);
993 }
994 } else {
995 dstaddrp = &sin->sin_addr.s_addr;
996 dstport = sin->sin_port;
997 srcid = 0;
998 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid);
999 }
1000
1001 if (error != 0) {
1002 cmn_err(CE_NOTE, "dccp_connect_ip failed");
1003 goto connect_failed;
1004 }
1005
1006 /* XXX cluster */
1007
1008 /* Connect succeeded */
1009 DCCPS_BUMP_MIB(dccps, dccpActiveOpens);
1010 dccp->dccp_active_open = 1;
1011
1012 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
1013 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
1014 int32_t, DCCPS_BOUND);
1015
1016 DCCP_TIMER_RESTART(dccp, 100);
1017 req_mp = dccp_generate_request(connp);
1018 if (req_mp != NULL) {
1019 /*
1020 * We must bump the generation before sending the request
1021 * to ensure that we use the right generation in case
1022 * this thread issues a "connected" up call.
1023 */
1024 SOCK_CONNID_BUMP(dccp->dccp_connid);
1025
1026 DTRACE_DCCP5(connect__request, mblk_t *, NULL,
1027 ip_xmit_attr_t *, connp->conn_ixa,
1028 void_ip_t *, req_mp->b_rptr, dccp_t *, dccp,
1029 dccpha_t *,
1030 &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]);
1031
1032 dccp_send_data(dccp, req_mp);
1033 }
1034
1035 return (0);
1036
1037 connect_failed:
1038 cmn_err(CE_NOTE, "dccp_do_connect failed");
1039
1040 connp->conn_faddr_v6 = ipv6_all_zeros;
1041 connp->conn_fport = 0;
1042 dccp->dccp_state = oldstate;
1043
1044 /* XXX */
1045 return (error);
1046 }
1047
1048 /*
1049 * Init values of a connection.
1050 */
1051 void
1052 dccp_init_values(dccp_t *dccp, dccp_t *parent)
1053 {
1054 conn_t *connp = dccp->dccp_connp;
1055 dccp_stack_t *dccps = dccp->dccp_dccps;
1056
1057 connp->conn_mlp_type = mlptSingle;
1058 }
1059
1060 /*
1061 * Free dccp structure.
1062 */
1063 void
1064 dccp_free(dccp_t *dccp)
1065 {
1066 conn_t *connp = dccp->dccp_connp;
1067
1068 cmn_err(CE_NOTE, "dccp.c: dccp_free");
1069
1070 connp->conn_rq = NULL;
1071 connp->conn_wq = NULL;
1072
1073 if (connp->conn_upper_handle != NULL) {
1074 if (IPCL_IS_NONSTR(connp)) {
1075 (*connp->conn_upcalls->su_closed)(
1076 connp->conn_upper_handle);
1077 dccp->dccp_detached = B_TRUE;
1078 }
1079
1080 connp->conn_upper_handle = NULL;
1081 connp->conn_upcalls = NULL;
1082 }
1083 }
1084
1085 void *
1086 dccp_get_conn(void *arg, dccp_stack_t *dccps)
1087 {
1088 dccp_t *dccp = NULL;
1089 conn_t *connp;
1090 squeue_t *sqp = (squeue_t *)arg;
1091 netstack_t *ns;
1092
1093 /* XXX timewait */
1094
1095 connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP,
1096 dccps->dccps_netstack);
1097 if (connp == NULL) {
1098 return (NULL);
1099 }
1100
1101 dccp = connp->conn_dccp;
1102 dccp->dccp_dccps = dccps;
1103
1104 /* List of features being negotated */
1105 list_create(&dccp->dccp_features, sizeof (dccp_feature_t),
1106 offsetof(dccp_feature_t, df_next));
1107
1108 connp->conn_recv = dccp_input_data;
1109 connp->conn_recvicmp = dccp_icmp_input;
1110 connp->conn_verifyicmp = dccp_verifyicmp;
1111
1112 connp->conn_ixa->ixa_notify = dccp_notify;
1113 connp->conn_ixa->ixa_notify_cookie = dccp;
1114
1115 return ((void *)connp);
1116 }
1117
1118 /*
1119 * IPv4 connect.
1120 */
1121 static int
1122 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport,
1123 uint_t srcid)
1124 {
1125 conn_t *connp = dccp->dccp_connp;
1126 dccp_stack_t *dccps = dccp->dccp_dccps;
1127 ipaddr_t dstaddr = *dstaddrp;
1128 uint16_t lport;
1129 int error;
1130
1131 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4");
1132
1133 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1134
1135 if (dstaddr == INADDR_ANY) {
1136 dstaddr = htonl(INADDR_LOOPBACK);
1137 *dstaddrp = dstaddr;
1138 }
1139
1140 /* Handle __sin6_src_id if socket not bound to an IP address */
1141 if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
1142 ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
1143 IPCL_ZONEID(connp), dccps->dccps_netstack);
1144 connp->conn_saddr_v6 = connp->conn_laddr_v6;
1145 }
1146
1147 IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
1148 connp->conn_fport = dstport;
1149
1150 if (dccp->dccp_state == DCCPS_CLOSED) {
1151 lport = dccp_update_next_port(dccps->dccps_next_port_to_try,
1152 dccp, B_TRUE);
1153 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0,
1154 B_TRUE, B_FALSE, B_FALSE);
1155 if (lport == 0) {
1156 return (-TNOADDR);
1157 }
1158 }
1159
1160 error = dccp_set_destination(dccp);
1161 if (error != 0) {
1162 return (error);
1163 }
1164
1165 /*
1166 * Don't connect to oneself.
1167 */
1168 if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
1169 connp->conn_fport == connp->conn_lport) {
1170 return (-TBADADDR);
1171 }
1172
1173 dccp->dccp_state = DCCPS_REQUEST;
1174
1175 return (ipcl_conn_insert_v4(connp));
1176 }
1177
1178 /*
1179 * IPv6 connect.
1180 */
1181 static int
1182 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport,
1183 uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
1184 {
1185 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6");
1186
1187 return (0);
1188 }
1189
1190 /*
1191 * Set the ports via conn_connect and build the template
1192 * header.
1193 */
1194 int
1195 dccp_set_destination(dccp_t *dccp)
1196 {
1197 conn_t *connp = dccp->dccp_connp;
1198 dccp_stack_t *dccps = dccp->dccp_dccps;
1199 iulp_t uinfo;
1200 uint32_t flags;
1201 int error;
1202
1203 flags = IPDF_LSO | IPDF_ZCOPY;
1204 flags |= IPDF_UNIQUE_DCE;
1205
1206 mutex_enter(&connp->conn_lock);
1207 error = conn_connect(connp, &uinfo, flags);
1208 mutex_exit(&connp->conn_lock);
1209 if (error != 0) {
1210 cmn_err(CE_NOTE, "conn_connect failed");
1211 return (error);
1212 }
1213
1214 error = dccp_build_hdrs(dccp);
1215 if (error != 0) {
1216 cmn_err(CE_NOTE, "dccp_build_hdrs failed");
1217 return (error);
1218 }
1219
1220 /* XXX */
1221
1222 /* Initialise the ISS */
1223 dccp_iss_init(dccp);
1224
1225 mutex_enter(&connp->conn_lock);
1226 connp->conn_state_flags &= ~CONN_INCIPIENT;
1227 mutex_exit(&connp->conn_lock);
1228
1229 return (0);
1230 }
1231
1232 /*
1233 * Init the ISS.
1234 */
1235 static void
1236 dccp_iss_init(dccp_t *dccp)
1237 {
1238 cmn_err(CE_NOTE, "dccp.c: dccp_iss_init");
1239
1240 dccp->dccp_iss += gethrtime();
1241 dccp->dccp_gss = dccp->dccp_iss;
1242 }