1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2012 David Hoeppner. All rights reserved.
29 */
30
31 /*
32 * This file implements the Data Congestion Control Protocol (DCCP).
33 */
34
35 #include <sys/types.h>
36 #include <sys/stream.h>
37 #include <sys/stropts.h>
38 #include <sys/strlog.h>
39 #include <sys/strsun.h>
40 #define _SUN_TPI_VERSION 2
41 #include <sys/tihdr.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sockio.h>
45 #include <sys/priv.h>
46 #include <sys/vtrace.h>
47 #include <sys/sdt.h>
48 #include <sys/debug.h>
49 #include <sys/ddi.h>
50 #include <sys/isa_defs.h>
51 #include <sys/policy.h>
52 #include <sys/tsol/label.h>
53 #include <sys/tsol/tnet.h>
54 #include <inet/dccp_impl.h>
55 #include <inet/dccp_stack.h>
56 #include <inet/kstatcom.h>
57 #include <inet/snmpcom.h>
58
59 #include <sys/cmn_err.h>
60
61 int dccp_squeue_flag;
62
63 /* Setable in /etc/system */
64 uint_t dccp_bind_fanout_size = DCCP_BIND_FANOUT_SIZE;
65
66 static void dccp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
67 ixa_notify_arg_t);
68
69 /* Functions to register netstack */
70 static void *dccp_stack_init(netstackid_t, netstack_t *);
71 static void dccp_stack_fini(netstackid_t, void *);
72
73 /* Stream device open functions */
74 static int dccp_openv4(queue_t *, dev_t *, int, int, cred_t *);
75 static int dccp_openv6(queue_t *, dev_t *, int, int, cred_t *);
76 static int dccp_open(queue_t *, dev_t *, int, int, cred_t *,
77 boolean_t);
78
79 /* Write service routine */
80 static void dccp_wsrv(queue_t *);
81
82 /* Connection related functions */
83 static int dccp_connect_ipv4(dccp_t *, ipaddr_t *, in_port_t, uint_t);
84 static int dccp_connect_ipv6(dccp_t *, in6_addr_t *, in_port_t, uint32_t,
85 uint_t, uint32_t);
86
87 /* Initialise ISS */
88 static void dccp_iss_init(dccp_t *);
89
90 struct module_info dccp_rinfo = {
91 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, DCCP_RECV_HIWATER,
92 DCCP_RECV_LOWATER
93 };
94
95 static struct module_info dccp_winfo = {
96 DCCP_MOD_ID, DCCP_MOD_NAME, 0, INFPSZ, 127, 16
97 };
98
99 /*
100 * Queue information structure with DCCP entry points.
101 */
102 struct qinit dccp_rinitv4 = {
103 NULL, (pfi_t)dccp_rsrv, dccp_openv4, dccp_tpi_close, NULL, &dccp_rinfo
104 };
105
106 struct qinit dccp_rinitv6 = {
107 NULL, (pfi_t)dccp_rsrv, dccp_openv6, dccp_tpi_close, NULL, &dccp_rinfo
108 };
109
110 struct qinit dccp_winit = {
111 (pfi_t)dccp_wput, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
112 };
113
114 /* Initial entry point for TCP in socket mode */
115 struct qinit dccp_sock_winit = {
116 (pfi_t)dccp_wput_sock, (pfi_t)dccp_wsrv, NULL, NULL, NULL, &dccp_winfo
117 };
118
119 struct qinit dccp_fallback_sock_winit = {
120 (pfi_t)dccp_wput_fallback, NULL, NULL, NULL, NULL, &dccp_winfo
121 };
122 /*
123 * DCCP as acceptor STREAM.
124 */
125 struct qinit dccp_acceptor_rinit = {
126 NULL, (pfi_t)dccp_rsrv, NULL, dccp_tpi_close_accept, NULL, &dccp_winfo
127 };
128
129 struct qinit dccp_acceptor_winit = {
130 (pfi_t)dccp_tpi_accept, NULL, NULL, NULL, NULL, &dccp_winfo
131 };
132
133 /* AF_INET /dev/dccp */
134 struct streamtab dccpinfov4 = {
135 &dccp_rinitv4, &dccp_winit
136 };
137
138 /* AF_INET6 /dev/dccp6 */
139 struct streamtab dccpinfov6 = {
140 &dccp_rinitv6, &dccp_winit
141 };
142
143 /* Template for response to info request */
144 struct T_info_ack dccp_g_t_info_ack = {
145 T_INFO_ACK, /* PRIM_type */
146 0, /* TSDU_size */
147 T_INFINITE, /* ETSDU_size */
148 T_INVALID, /* CDATA_size */
149 T_INVALID, /* DDATA_size */
150 sizeof (sin_t), /* ADDR_size */
151 0, /* OPT_size - not initialized here */
152 TIDUSZ, /* TIDU_size */
153 T_COTS_ORD, /* SERV_type */
154 DCCPS_CLOSED, /* CURRENT_state */
155 (XPG4_1|EXPINLINE) /* PROVIDER_flag */
156 };
157
158 struct T_info_ack dccp_g_t_info_ack_v6 = {
159 T_INFO_ACK, /* PRIM_type */
160 0, /* TSDU_size */
161 T_INFINITE, /* ETSDU_size */
162 T_INVALID, /* CDATA_size */
163 T_INVALID, /* DDATA_size */
164 sizeof (sin6_t), /* ADDR_size */
165 0, /* OPT_size - not initialized here */
166 TIDUSZ, /* TIDU_size */
167 T_COTS_ORD, /* SERV_type */
168 DCCPS_CLOSED, /* CURRENT_state */
169 (XPG4_1|EXPINLINE) /* PROVIDER_flag */
170 };
171
172 /*
173 * DCCP Tunables.
174 */
175 extern mod_prop_info_t dccp_propinfo_tbl[];
176 extern int dccp_propinfo_count;
177
178 /*
179 * Register DCCP in ip netstack.
180 */
181 void
182 dccp_ddi_g_init(void)
183 {
184 /* Global timer cache */
185 dccp_timercache = kmem_cache_create("dccp_timercache",
186 sizeof (dccp_timer_t) + sizeof (mblk_t), 0,
187 NULL, NULL, NULL, NULL, NULL, 0);
188
189 netstack_register(NS_DCCP, dccp_stack_init, NULL, dccp_stack_fini);
190 }
191
192 /*
193 * Unregister DCCP from ip netstack.
194 */
195 void
196 dccp_ddi_g_destroy(void)
197 {
198 /* Global timer cache */
199 kmem_cache_destroy(dccp_timercache);
200
201 netstack_unregister(NS_DCCP);
202 }
203
204 #define INET_NAME "ip"
205
206 /*
207 * Initialize this DCCP stack instance.
208 */
209 static void *
210 dccp_stack_init(netstackid_t stackid, netstack_t *ns)
211 {
212 dccp_stack_t *dccps;
213 major_t major;
214 size_t arrsz;
215 int error;
216 int i;
217
218 dccps = kmem_zalloc(sizeof (*dccps), KM_SLEEP);
219 if (dccps == NULL) {
220 return (NULL);
221 }
222 dccps->dccps_netstack = ns;
223
224 /* Ports */
225 mutex_init(&dccps->dccps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
226 dccps->dccps_num_epriv_ports = DCCP_NUM_EPRIV_PORTS;
227 dccps->dccps_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
228 dccps->dccps_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
229 dccps->dccps_min_anonpriv_port = 512;
230
231 dccps->dccps_bind_fanout_size = dccp_bind_fanout_size;
232
233 /* Bind fanout */
234 dccps->dccps_bind_fanout = kmem_zalloc(dccps->dccps_bind_fanout_size *
235 sizeof (dccp_df_t), KM_SLEEP);
236 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
237 mutex_init(&dccps->dccps_bind_fanout[i].df_lock, NULL,
238 MUTEX_DEFAULT, NULL);
239 }
240
241 /* Tunable properties */
242 arrsz = dccp_propinfo_count * sizeof (mod_prop_info_t);
243 dccps->dccps_propinfo_tbl = kmem_alloc(arrsz, KM_SLEEP);
244 if (dccps->dccps_propinfo_tbl == NULL) {
245 kmem_free(dccps, sizeof (*dccps));
246 return (NULL);
247 }
248 bcopy(dccp_propinfo_tbl, dccps->dccps_propinfo_tbl, arrsz);
249
250 /* Allocate per netstack cpu stats */
251 mutex_enter(&cpu_lock);
252 dccps->dccps_sc_cnt = MAX(ncpus, boot_ncpus);
253 mutex_exit(&cpu_lock);
254
255 dccps->dccps_sc = kmem_zalloc(max_ncpus * sizeof (dccp_stats_cpu_t *),
256 KM_SLEEP);
257 for (i = 0; i < dccps->dccps_sc_cnt; i++) {
258 dccps->dccps_sc[i] = kmem_zalloc(sizeof (dccp_stats_cpu_t),
259 KM_SLEEP);
260 }
261
262 /* Kernel statistics */
263 //dccps->dccps_kstat = dccp_kstat2_init(stackid);
264 //dccps->dccps_mibkp = dccp_kstat_init(stackid);
265
266 /* Driver major number */
267 major = mod_name_to_major(INET_NAME);
268 error = ldi_ident_from_major(major, &dccps->dccps_ldi_ident);
269 ASSERT(error == 0);
270
271 return (dccps);
272 }
273
274 /*
275 * Destroy this DCCP netstack instance.
276 */
277 static void
278 dccp_stack_fini(netstackid_t stackid, void *arg)
279 {
280 dccp_stack_t *dccps = (dccp_stack_t *)arg;
281 int i;
282
283 /* Free cpu stats */
284 for (i = 0; i < dccps->dccps_sc_cnt; i++) {
285 kmem_free(dccps->dccps_sc[i], sizeof (dccp_stats_cpu_t));
286 }
287 kmem_free(dccps->dccps_sc, max_ncpus * sizeof (dccp_stats_cpu_t *));
288
289 /* Free tunable properties */
290 kmem_free(dccps->dccps_propinfo_tbl,
291 dccp_propinfo_count * sizeof (mod_prop_info_t));
292 dccps->dccps_propinfo_tbl = NULL;
293
294 /* Free bind fanout */
295 for (i = 0; i < dccps->dccps_bind_fanout_size; i++) {
296 ASSERT(dccps->dccps_bind_fanout[i].df_dccp == NULL);
297 mutex_destroy(&dccps->dccps_bind_fanout[i].df_lock);
298 }
299 kmem_free(dccps->dccps_bind_fanout, dccps->dccps_bind_fanout_size *
300 sizeof (dccp_df_t));
301 dccps->dccps_bind_fanout = NULL;
302
303 /* Kernel statistics */
304 dccp_kstat_fini(stackid, dccps->dccps_mibkp);
305 dccps->dccps_mibkp = NULL;
306 dccp_kstat2_fini(stackid, dccps->dccps_kstat);
307 dccps->dccps_kstat = NULL;
308
309 ldi_ident_release(dccps->dccps_ldi_ident);
310
311 kmem_free(dccps, sizeof (*dccps));
312 }
313
314 /* /dev/dccp */
315 static int
316 dccp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
317 {
318 cmn_err(CE_NOTE, "dccp.c: dccp_openv4\n");
319
320 return (dccp_open(q, devp, flag, sflag, credp, B_FALSE));
321 }
322
323 /* /dev/dccp6 */
324 static int
325 dccp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
326 {
327 cmn_err(CE_NOTE, "dccp.c: dccp_openv6\n");
328
329 return (dccp_open(q, devp, flag, sflag, credp, B_TRUE));
330 }
331
332 /*
333 * Common open function for v4 and v6 devices.
334 */
335 static int
336 dccp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
337 boolean_t isv6)
338 {
339 conn_t *connp;
340 dccp_t *dccp;
341 vmem_t *minor_arena;
342 dev_t conn_dev;
343 boolean_t issocket;
344 int error;
345
346 cmn_err(CE_NOTE, "dccp.c: dccp_open");
347
348 /* If the stream is already open, return immediately */
349 if (q->q_ptr != NULL) {
350 return (0);
351 }
352
353 if (sflag == MODOPEN) {
354 return (EINVAL);
355 }
356
357 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
358 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
359 minor_arena = ip_minor_arena_la;
360 } else {
361 /*
362 * Either minor numbers in the large arena were exhausted
363 * or a non socket application is doing the open.
364 * Try to allocate from the small arena.
365 */
366 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
367 return (EBUSY);
368 }
369 minor_arena = ip_minor_arena_sa;
370 }
371
372 ASSERT(minor_arena != NULL);
373
374 *devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
375
376 if (flag & SO_FALLBACK) {
377 /*
378 * Non streams socket needs a stream to fallback to.
379 */
380 RD(q)->q_ptr = (void *)conn_dev;
381 WR(q)->q_qinfo = &dccp_fallback_sock_winit;
382 WR(q)->q_ptr = (void *)minor_arena;
383 qprocson(q);
384 return (0);
385 } else if (flag & SO_ACCEPTOR) {
386 q->q_qinfo = &dccp_acceptor_rinit;
387 /*
388 * The conn_dev and minor_arena will be subsequently used by
389 * dccp_tli_accept() and dccp_tpi_close_accept() to figure out
390 * the minor device number for this connection from the q_ptr.
391 */
392 RD(q)->q_ptr = (void *)conn_dev;
393 WR(q)->q_qinfo = &dccp_acceptor_winit;
394 WR(q)->q_ptr = (void *)minor_arena;
395 qprocson(q);
396 return (0);
397 }
398
399 issocket = flag & SO_SOCKSTR;
400 connp = dccp_create_common(credp, isv6, issocket, &error);
401 if (connp == NULL) {
402 inet_minor_free(minor_arena, conn_dev);
403 q->q_ptr = WR(q)->q_ptr = NULL;
404 return (error);
405 }
406
407 connp->conn_rq = q;
408 connp->conn_wq = WR(q);
409 q->q_ptr = WR(q)->q_ptr = connp;
410
411 connp->conn_dev = conn_dev;
412 connp->conn_minor_arena = minor_arena;
413
414 ASSERT(q->q_qinfo == &dccp_rinitv4 || q->q_qinfo == &dccp_rinitv6);
415 ASSERT(WR(q)->q_qinfo == &dccp_winit);
416
417 dccp = connp->conn_dccp;
418
419 if (issocket) {
420 WR(q)->q_qinfo = &dccp_sock_winit;
421 } else {
422 #ifdef _ILP32
423 dccp->dccp_acceptor_id = (t_uscalar_t)RD(q);
424 #else
425 dccp->dccp_acceptor_id = conn_dev;
426 #endif /* _ILP32 */
427 }
428
429 /*
430 * Put the ref for DCCP. Ref for IP was already put
431 * by ipcl_conn_create. Also Make the conn_t globally
432 * visible to walkers.
433 */
434 mutex_enter(&connp->conn_lock);
435 CONN_INC_REF_LOCKED(connp);
436 ASSERT(connp->conn_ref == 2);
437 connp->conn_state_flags &= ~CONN_INCIPIENT;
438 mutex_exit(&connp->conn_lock);
439
440 qprocson(q);
441
442 return (0);
443 }
444
445 /*
446 * IXA notify
447 */
448 static void
449 dccp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
450 ixa_notify_arg_t narg)
451 {
452 cmn_err(CE_NOTE, "dccp.c: dccp_notify");
453 }
454
455 /*
456 * Build the template headers.
457 */
458 int
459 dccp_build_hdrs(dccp_t *dccp)
460 {
461 dccp_stack_t *dccps = dccp->dccp_dccps;
462 conn_t *connp = dccp->dccp_connp;
463 dccpha_t *dccpha;
464 uint32_t cksum;
465 char buf[DCCP_MAX_HDR_LENGTH];
466 uint_t buflen;
467 uint_t ulplen = 12;
468 uint_t extralen = 0;
469 int error;
470
471 cmn_err(CE_NOTE, "dccp.c: dccp_build_hdrs");
472
473 buflen = connp->conn_ht_ulp_len;
474 if (buflen != 0) {
475 cmn_err(CE_NOTE, "buflen != 0");
476 bcopy(connp->conn_ht_ulp, buf, buflen);
477 extralen -= buflen - ulplen;
478 ulplen = buflen;
479 }
480
481 mutex_enter(&connp->conn_lock);
482 error = conn_build_hdr_template(connp, ulplen, extralen,
483 &connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo);
484 mutex_exit(&connp->conn_lock);
485 if (error != 0) {
486 cmn_err(CE_NOTE, "conn_build_hdr_template failed");
487 return (error);
488 }
489
490 dccpha = (dccpha_t *)connp->conn_ht_ulp;
491 dccp->dccp_dccpha = dccpha;
492
493 if (buflen != 0) {
494 bcopy(buf, connp->conn_ht_ulp, buflen);
495 } else {
496 dccpha->dha_sum = 0;
497 dccpha->dha_lport = connp->conn_lport;
498 dccpha->dha_fport = connp->conn_fport;
499 }
500
501 cksum = sizeof (dccpha_t) + connp->conn_sum;
502 cksum = (cksum >> 16) + (cksum & 0xFFFF);
503 dccpha->dha_sum = htons(cksum);
504 dccpha->dha_offset = 7;
505 dccpha->dha_x = 1;
506
507 if (connp->conn_ipversion == IPV4_VERSION) {
508 dccp->dccp_ipha = (ipha_t *)connp->conn_ht_iphc;
509 } else {
510 dccp->dccp_ip6h = (ip6_t *)connp->conn_ht_iphc;
511 }
512
513 /* XXX */
514
515 return (0);
516 }
517
518 /*
519 * DCCP write service routine.
520 */
521 static void
522 dccp_wsrv(queue_t *q)
523 {
524 dccp_stack_t *dccps = Q_TO_DCCP(q)->dccp_dccps;
525
526 DCCP_STAT(dccps, dccp_wsrv_called);
527 }
528
529 /*
530 * Common create function for streams and sockets.
531 */
532 conn_t *
533 dccp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
534 int *errorp)
535 {
536 conn_t *connp;
537 dccp_t *dccp;
538 dccp_stack_t *dccps;
539 netstack_t *ns;
540 squeue_t *sqp;
541 zoneid_t zoneid;
542 int error;
543
544 cmn_err(CE_NOTE, "dccp.c: dccp_create_common\n");
545
546 ASSERT(errorp != NULL);
547
548 error = secpolicy_basic_net_access(credp);
549 if (error != 0) {
550 *errorp = error;
551 return (NULL);
552 }
553
554 /*
555 * Find the right netstack.
556 */
557 ns = netstack_find_by_cred(credp);
558 ASSERT(ns != NULL);
559 dccps = ns->netstack_dccp;
560 ASSERT(dccps != NULL);
561
562 /*
563 * For exclusive stacks we set the zoneid to zero
564 * to make TCP operate as if in the global zone.
565 */
566 if (ns->netstack_stackid != GLOBAL_NETSTACKID) {
567 zoneid = GLOBAL_ZONEID;
568 } else {
569 zoneid = crgetzoneid(credp);
570 }
571
572 sqp = IP_SQUEUE_GET((uint_t)gethrtime());
573 connp = (conn_t *)dccp_get_conn(sqp, dccps);
574 netstack_rele(dccps->dccps_netstack);
575 if (connp == NULL) {
576 *errorp = ENOSR;
577 return (NULL);
578 }
579 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
580
581 connp->conn_sqp = sqp;
582 connp->conn_initial_sqp = connp->conn_sqp;
583 connp->conn_ixa->ixa_sqp = connp->conn_sqp;
584 dccp = connp->conn_dccp;
585
586 /* Setting flags for ip output */
587 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
588 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
589
590 ASSERT(connp->conn_proto == IPPROTO_DCCP);
591 ASSERT(connp->conn_dccp == dccp);
592 ASSERT(dccp->dccp_connp == connp);
593
594 if (isv6) {
595 connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
596 connp->conn_ipversion = IPV6_VERSION;
597 connp->conn_family = AF_INET6;
598 /* XXX mms, ttl */
599 } else {
600 connp->conn_ipversion = IPV4_VERSION;
601 connp->conn_family = AF_INET;
602 /* XXX mms, ttl */
603 }
604 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
605
606 crhold(credp);
607 connp->conn_cred = credp;
608 connp->conn_cpid = curproc->p_pid;
609 connp->conn_open_time = ddi_get_lbolt64();
610
611 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
612 connp->conn_ixa->ixa_cred = credp;
613 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
614
615 connp->conn_zoneid = zoneid;
616 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
617 connp->conn_ixa->ixa_zoneid = zoneid;
618 connp->conn_mlp_type = mlptSingle;
619
620 dccp->dccp_dccps = dccps;
621 dccp->dccp_state = DCCPS_CLOSED;
622
623 ASSERT(connp->conn_netstack == dccps->dccps_netstack);
624 ASSERT(dccp->dccp_dccps == dccps);
625
626 /*
627 * If the caller has the process-wide flag set, then default to MAC
628 * exempt mode. This allows read-down to unlabeled hosts.
629 */
630 if (getpflags(NET_MAC_AWARE, credp) != 0) {
631 connp->conn_mac_mode = CONN_MAC_AWARE;
632 }
633
634 if (issocket) {
635 dccp->dccp_issocket = 1;
636 }
637
638 /* XXX rcvbuf, sndbuf etc */
639
640 connp->conn_so_type = SOCK_STREAM;
641
642 SOCK_CONNID_INIT(dccp->dccp_connid);
643 dccp_init_values(dccp, NULL);
644
645 return (connp);
646 }
647
648 /*
649 * Common close function for streams and sockets.
650 */
651 void
652 dccp_close_common(conn_t *connp, int flags)
653 {
654 dccp_t *dccp = connp->conn_dccp;
655 mblk_t *mp;
656 boolean_t conn_ioctl_cleanup_reqd = B_FALSE;
657
658 cmn_err(CE_NOTE, "dccp.c: dccp_close_common");
659
660 ASSERT(connp->conn_ref >= 2);
661
662 /*
663 * Mark the conn as closing. ipsq_pending_mp_add will not
664 * add any mp to the pending mp list, after this conn has
665 * started closing.
666 */
667 mutex_enter(&connp->conn_lock);
668 connp->conn_state_flags |= CONN_CLOSING;
669
670 if (connp->conn_oper_pending_ill != NULL) {
671 conn_ioctl_cleanup_reqd = B_TRUE;
672 }
673
674 CONN_INC_REF_LOCKED(connp);
675 mutex_exit(&connp->conn_lock);
676
677 ASSERT(connp->conn_ref >= 3);
678
679 /*
680 * Cleanup any queued ioctls here. This must be done before the wq/rq
681 * are re-written by dccp_close_output().
682 */
683 if (conn_ioctl_cleanup_reqd) {
684 conn_ioctl_cleanup(connp);
685 }
686
687 mutex_enter(&connp->conn_lock);
688 while (connp->conn_ioctlref > 0) {
689 cv_wait(&connp->conn_cv, &connp->conn_lock);
690 }
691 ASSERT(connp->conn_ioctlref == 0);
692 ASSERT(connp->conn_oper_pending_ill == NULL);
693 mutex_exit(&connp->conn_lock);
694
695 /* generate close */
696 /*
697 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, dccp_close_output, connp,
698 NULL, dccp_squeue_flag, SQTAG_IP_DCCP_CLOSE);
699
700 */
701
702 nowait:
703 connp->conn_cpid = NOPID;
704 }
705
706 /*
707 * Common bind function.
708 */
709 int
710 dccp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
711 boolean_t bind_to_req_port_only)
712 {
713 dccp_t *dccp = connp->conn_dccp;
714 int error;
715
716 cmn_err(CE_NOTE, "dccp.c: dccp_do_bind");
717
718 if (dccp->dccp_state >= DCCPS_BOUND) {
719 if (connp->conn_debug) {
720 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
721 "dccp_bind: bad state, %d", dccp->dccp_state);
722 }
723 return (-TOUTSTATE);
724 }
725
726 error = dccp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
727 if (error != 0) {
728 return (error);
729 }
730
731 ASSERT(dccp->dccp_state == DCCPS_LISTEN);
732 /* XXX dccp_conn_req_max = 0 */
733
734 return (0);
735 }
736
737 /*
738 * Common unbind function.
739 */
740 int
741 dccp_do_unbind(conn_t *connp)
742 {
743 dccp_t *dccp = connp->conn_dccp;
744 int32_t oldstate;
745
746 cmn_err(CE_NOTE, "dccp.c: dccp_do_unbind");
747
748 switch (dccp->dccp_state) {
749 case DCCPS_OPEN:
750 case DCCPS_LISTEN:
751 break;
752 default:
753 return (-TOUTSTATE);
754 }
755
756 connp->conn_laddr_v6 = ipv6_all_zeros;
757 connp->conn_saddr_v6 = ipv6_all_zeros;
758
759 dccp_bind_hash_remove(dccp);
760
761 oldstate = dccp->dccp_state;
762 dccp->dccp_state = DCCPS_CLOSED;
763 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
764 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
765 int32_t, oldstate);
766
767 ip_unbind(connp);
768 bzero(&connp->conn_ports, sizeof (connp->conn_ports));
769
770 return (0);
771 }
772
773 /*
774 * Common listen function.
775 */
776 int
777 dccp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
778 int backlog, cred_t *cr, boolean_t bind_to_req_port_only)
779 {
780 dccp_t *dccp = connp->conn_dccp;
781 dccp_stack_t *dccps = dccp->dccp_dccps;
782 int32_t oldstate;
783 int error;
784
785 cmn_err(CE_NOTE, "dccp.c: dccp_do_listen");
786
787 /* All Solaris components should pass a cred for this operation */
788 ASSERT(cr != NULL);
789
790 if (dccp->dccp_state >= DCCPS_BOUND) {
791
792 if ((dccp->dccp_state == DCCPS_BOUND ||
793 dccp->dccp_state == DCCPS_LISTEN) && backlog > 0) {
794 goto do_listen;
795 }
796 cmn_err(CE_NOTE, "DCCPS_BOUND, bad state");
797
798 if (connp->conn_debug) {
799 (void) strlog(DCCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
800 "dccp_listen: bad state, %d", dccp->dccp_state);
801 }
802 return (-TOUTSTATE);
803 } else {
804 if (sa == NULL) {
805 sin6_t addr;
806 sin6_t *sin6;
807 sin_t *sin;
808
809 ASSERT(IPCL_IS_NONSTR(connp));
810
811 if (connp->conn_family == AF_INET) {
812 len = sizeof (sin_t);
813 sin = (sin_t *)&addr;
814 *sin = sin_null;
815 sin->sin_family = AF_INET;
816 } else {
817 ASSERT(connp->conn_family == AF_INET6);
818
819 len = sizeof (sin6_t);
820 sin6 = (sin6_t *)&addr;
821 *sin6 = sin6_null;
822 sin6->sin6_family = AF_INET6;
823 }
824
825 sa = (struct sockaddr *)&addr;
826 }
827
828 error = dccp_bind_check(connp, sa, len, cr,
829 bind_to_req_port_only);
830 if (error != 0) {
831 cmn_err(CE_NOTE, "dccp_bind_check failed");
832 return (error);
833 }
834 /* Fall through and do the fanout insertion */
835 }
836
837 do_listen:
838 ASSERT(dccp->dccp_state == DCCPS_BOUND ||
839 dccp->dccp_state == DCCPS_LISTEN);
840
841 /* XXX backlog */
842
843 connp->conn_recv = dccp_input_listener_unbound;
844
845 /* Insert into the classifier table */
846 error = ip_laddr_fanout_insert(connp);
847 if (error != 0) {
848 /* Error - undo the bind */
849 oldstate = dccp->dccp_state;
850 dccp->dccp_state = DCCPS_CLOSED;
851
852 connp->conn_bound_addr_v6 = ipv6_all_zeros;
853
854 connp->conn_laddr_v6 = ipv6_all_zeros;
855 connp->conn_saddr_v6 = ipv6_all_zeros;
856 connp->conn_ports = 0;
857
858 if (connp->conn_anon_port) {
859 zone_t *zone;
860
861 zone = crgetzone(cr);
862 connp->conn_anon_port = B_FALSE;
863 (void) tsol_mlp_anon(zone, connp->conn_mlp_type,
864 connp->conn_proto, connp->conn_lport, B_FALSE);
865 }
866 connp->conn_mlp_type = mlptSingle;
867
868 /* XXX dccp_bind_hash_remove */
869
870 return (error);
871 } else {
872 /* XXX connection limits */
873 }
874
875 return (error);
876 }
877
878 /*
879 * Common connect function.
880 */
881 int
882 dccp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
883 cred_t *cr, pid_t pid)
884 {
885 dccp_t *dccp = connp->conn_dccp;
886 dccp_stack_t *dccps = dccp->dccp_dccps;
887 ip_xmit_attr_t *ixa = connp->conn_ixa;
888 mblk_t *req_mp;
889 sin_t *sin = (sin_t *)sa;
890 sin6_t *sin6 = (sin6_t *)sa;
891 ipaddr_t *dstaddrp;
892 in_port_t dstport;
893 uint_t srcid;
894 int32_t oldstate;
895 int error;
896
897 cmn_err(CE_NOTE, "dccp.c: dccp_do_connect");
898
899 oldstate = dccp->dccp_state;
900
901 switch (len) {
902 case sizeof (sin_t):
903 sin = (sin_t *)sa;
904 if (sin->sin_port == 0) {
905 return (-TBADADDR);
906 }
907 if (connp->conn_ipv6_v6only) {
908 return (EAFNOSUPPORT);
909 }
910 break;
911
912 case sizeof (sin6_t):
913 sin6 = (sin6_t *)sa;
914 if (sin6->sin6_port == 0) {
915 return (-TBADADDR);
916 }
917 break;
918
919 default:
920 return (EINVAL);
921 }
922
923 if (connp->conn_family == AF_INET6 &&
924 connp->conn_ipversion == IPV6_VERSION &&
925 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
926 if (connp->conn_ipv6_v6only) {
927 return (EADDRNOTAVAIL);
928 }
929
930 connp->conn_ipversion = IPV4_VERSION;
931 }
932
933 switch (dccp->dccp_state) {
934 case DCCPS_LISTEN:
935 /*
936 * Listening sockets are not allowed to issue connect().
937 */
938 if (IPCL_IS_NONSTR(connp)) {
939 return (EOPNOTSUPP);
940 }
941
942 case DCCPS_CLOSED:
943 /*
944 * We support quick connect.
945 */
946 /* FALLTHRU */
947 case DCCPS_OPEN:
948 break;
949
950 default:
951 return (-TOUTSTATE);
952 }
953
954 /*
955 * We update our cred/cpid based on the caller of connect.
956 */
957 if (connp->conn_cred != cr) {
958 crhold(cr);
959 crfree(connp->conn_cred);
960 connp->conn_cred = cr;
961 }
962 connp->conn_cpid = pid;
963
964 /* Cache things in the ixa without any refhold */
965 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
966 ixa->ixa_cred = cr;
967 ixa->ixa_cpid = pid;
968
969 if (is_system_labeled()) {
970 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
971 }
972
973 if (connp->conn_family == AF_INET6) {
974 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
975 error = dccp_connect_ipv6(dccp, &sin6->sin6_addr,
976 sin6->sin6_port, sin6->sin6_flowinfo,
977 sin6->__sin6_src_id, sin6->sin6_scope_id);
978 } else {
979 /*
980 * Destination adress is mapped IPv6 address.
981 * Source bound address should be unspecified or
982 * IPv6 mapped address as well.
983 */
984 if (!IN6_IS_ADDR_UNSPECIFIED(
985 &connp->conn_bound_addr_v6) &&
986 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
987 return (EADDRNOTAVAIL);
988 }
989
990 dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
991 dstport = sin6->sin6_port;
992 srcid = sin6->__sin6_src_id;
993 error = dccp_connect_ipv4(dccp, dstaddrp, dstport,
994 srcid);
995 }
996 } else {
997 dstaddrp = &sin->sin_addr.s_addr;
998 dstport = sin->sin_port;
999 srcid = 0;
1000 error = dccp_connect_ipv4(dccp, dstaddrp, dstport, srcid);
1001 }
1002
1003 if (error != 0) {
1004 cmn_err(CE_NOTE, "dccp_connect_ip failed");
1005 goto connect_failed;
1006 }
1007
1008 /* XXX cluster */
1009
1010 /* Connect succeeded */
1011 DCCPS_BUMP_MIB(dccps, dccpActiveOpens);
1012 dccp->dccp_active_open = 1;
1013
1014 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
1015 connp->conn_ixa, void, NULL, dccp_t *, dccp, void, NULL,
1016 int32_t, DCCPS_BOUND);
1017
1018 DCCP_TIMER_RESTART(dccp, 100);
1019 req_mp = dccp_generate_request(connp);
1020 if (req_mp != NULL) {
1021 /*
1022 * We must bump the generation before sending the request
1023 * to ensure that we use the right generation in case
1024 * this thread issues a "connected" up call.
1025 */
1026 SOCK_CONNID_BUMP(dccp->dccp_connid);
1027
1028 DTRACE_DCCP5(connect__request, mblk_t *, NULL,
1029 ip_xmit_attr_t *, connp->conn_ixa,
1030 void_ip_t *, req_mp->b_rptr, dccp_t *, dccp,
1031 dccpha_t *,
1032 &req_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]);
1033
1034 dccp_send_data(dccp, req_mp);
1035 }
1036
1037 return (0);
1038
1039 connect_failed:
1040 cmn_err(CE_NOTE, "dccp_do_connect failed");
1041
1042 connp->conn_faddr_v6 = ipv6_all_zeros;
1043 connp->conn_fport = 0;
1044 dccp->dccp_state = oldstate;
1045
1046 /* XXX */
1047 return (error);
1048 }
1049
1050 /*
1051 * Init values of a connection.
1052 */
1053 void
1054 dccp_init_values(dccp_t *dccp, dccp_t *parent)
1055 {
1056 conn_t *connp = dccp->dccp_connp;
1057 dccp_stack_t *dccps = dccp->dccp_dccps;
1058
1059 connp->conn_mlp_type = mlptSingle;
1060 }
1061
1062 /*
1063 * Free dccp structure.
1064 */
1065 void
1066 dccp_free(dccp_t *dccp)
1067 {
1068 conn_t *connp = dccp->dccp_connp;
1069
1070 cmn_err(CE_NOTE, "dccp.c: dccp_free");
1071
1072 connp->conn_rq = NULL;
1073 connp->conn_wq = NULL;
1074
1075 if (connp->conn_upper_handle != NULL) {
1076 if (IPCL_IS_NONSTR(connp)) {
1077 (*connp->conn_upcalls->su_closed)(
1078 connp->conn_upper_handle);
1079 dccp->dccp_detached = B_TRUE;
1080 }
1081
1082 connp->conn_upper_handle = NULL;
1083 connp->conn_upcalls = NULL;
1084 }
1085 }
1086
1087 void *
1088 dccp_get_conn(void *arg, dccp_stack_t *dccps)
1089 {
1090 dccp_t *dccp = NULL;
1091 conn_t *connp;
1092 squeue_t *sqp = (squeue_t *)arg;
1093 netstack_t *ns;
1094
1095 /* XXX timewait */
1096
1097 connp = ipcl_conn_create(IPCL_DCCPCONN, KM_NOSLEEP,
1098 dccps->dccps_netstack);
1099 if (connp == NULL) {
1100 return (NULL);
1101 }
1102
1103 dccp = connp->conn_dccp;
1104 dccp->dccp_dccps = dccps;
1105
1106 /* List of features being negotated */
1107 list_create(&dccp->dccp_features, sizeof (dccp_feature_t),
1108 offsetof(dccp_feature_t, df_next));
1109
1110 connp->conn_recv = dccp_input_data;
1111 connp->conn_recvicmp = dccp_icmp_input;
1112 connp->conn_verifyicmp = dccp_verifyicmp;
1113
1114 connp->conn_ixa->ixa_notify = dccp_notify;
1115 connp->conn_ixa->ixa_notify_cookie = dccp;
1116
1117 return ((void *)connp);
1118 }
1119
1120 /*
1121 * IPv4 connect.
1122 */
1123 static int
1124 dccp_connect_ipv4(dccp_t *dccp, ipaddr_t *dstaddrp, in_port_t dstport,
1125 uint_t srcid)
1126 {
1127 conn_t *connp = dccp->dccp_connp;
1128 dccp_stack_t *dccps = dccp->dccp_dccps;
1129 ipaddr_t dstaddr = *dstaddrp;
1130 uint16_t lport;
1131 int error;
1132
1133 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv4");
1134
1135 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1136
1137 if (dstaddr == INADDR_ANY) {
1138 dstaddr = htonl(INADDR_LOOPBACK);
1139 *dstaddrp = dstaddr;
1140 }
1141
1142 /* Handle __sin6_src_id if socket not bound to an IP address */
1143 if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
1144 ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
1145 IPCL_ZONEID(connp), dccps->dccps_netstack);
1146 connp->conn_saddr_v6 = connp->conn_laddr_v6;
1147 }
1148
1149 IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
1150 connp->conn_fport = dstport;
1151
1152 if (dccp->dccp_state == DCCPS_CLOSED) {
1153 lport = dccp_update_next_port(dccps->dccps_next_port_to_try,
1154 dccp, B_TRUE);
1155 lport = dccp_bindi(dccp, lport, &connp->conn_laddr_v6, 0,
1156 B_TRUE, B_FALSE, B_FALSE);
1157 if (lport == 0) {
1158 return (-TNOADDR);
1159 }
1160 }
1161
1162 error = dccp_set_destination(dccp);
1163 if (error != 0) {
1164 return (error);
1165 }
1166
1167 /*
1168 * Don't connect to oneself.
1169 */
1170 if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
1171 connp->conn_fport == connp->conn_lport) {
1172 return (-TBADADDR);
1173 }
1174
1175 dccp->dccp_state = DCCPS_REQUEST;
1176
1177 return (ipcl_conn_insert_v4(connp));
1178 }
1179
1180 /*
1181 * IPv6 connect.
1182 */
1183 static int
1184 dccp_connect_ipv6(dccp_t *dccp, in6_addr_t *dstaddrp, in_port_t dstport,
1185 uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
1186 {
1187 cmn_err(CE_NOTE, "dccp.c: dccp_connect_ipv6");
1188
1189 return (0);
1190 }
1191
1192 /*
1193 * Set the ports via conn_connect and build the template
1194 * header.
1195 */
1196 int
1197 dccp_set_destination(dccp_t *dccp)
1198 {
1199 conn_t *connp = dccp->dccp_connp;
1200 dccp_stack_t *dccps = dccp->dccp_dccps;
1201 iulp_t uinfo;
1202 uint32_t flags;
1203 int error;
1204
1205 flags = IPDF_LSO | IPDF_ZCOPY;
1206 flags |= IPDF_UNIQUE_DCE;
1207
1208 mutex_enter(&connp->conn_lock);
1209 error = conn_connect(connp, &uinfo, flags);
1210 mutex_exit(&connp->conn_lock);
1211 if (error != 0) {
1212 cmn_err(CE_NOTE, "conn_connect failed");
1213 return (error);
1214 }
1215
1216 error = dccp_build_hdrs(dccp);
1217 if (error != 0) {
1218 cmn_err(CE_NOTE, "dccp_build_hdrs failed");
1219 return (error);
1220 }
1221
1222 /* XXX */
1223
1224 /* Initialise the ISS */
1225 dccp_iss_init(dccp);
1226
1227 mutex_enter(&connp->conn_lock);
1228 connp->conn_state_flags &= ~CONN_INCIPIENT;
1229 mutex_exit(&connp->conn_lock);
1230
1231 return (0);
1232 }
1233
1234 /*
1235 * Init the ISS.
1236 */
1237 static void
1238 dccp_iss_init(dccp_t *dccp)
1239 {
1240 cmn_err(CE_NOTE, "dccp.c: dccp_iss_init");
1241
1242 dccp->dccp_iss += gethrtime();
1243 dccp->dccp_gss = dccp->dccp_iss;
1244 }