1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2012 David Hoeppner. All rights reserved.
29 */
30
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/stropts.h>
36 #include <sys/strlog.h>
37 #define _SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/suntpi.h>
40 #include <sys/xti_inet.h>
41 #include <sys/squeue_impl.h>
42 #include <sys/squeue.h>
43 #include <sys/tsol/tnet.h>
44
45 #include <inet/common.h>
46 #include <inet/ip.h>
47
48 #include <sys/cmn_err.h>
49
50 #include "dccp_impl.h"
51
52 static mblk_t *dccp_conn_create_v4(conn_t *, conn_t *, mblk_t *,
53 ip_recv_attr_t *);
54 static mblk_t *dccp_conn_create_v6(conn_t *, conn_t *, mblk_t *,
55 ip_recv_attr_t *);
56 static void dccp_input_listener(void *, mblk_t *, void *, ip_recv_attr_t *);
57 static void dccp_icmp_error_ipv6(dccp_t *, mblk_t *, ip_recv_attr_t *);
58
59 void
60 dccp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
61 {
62 conn_t *connp = (conn_t *)arg1;
63 dccp_t *dccp = connp->conn_dccp;
64
65 cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_input");
66
67 /* Assume IP provides aligned packets */
68 ASSERT(OK_32PTR(mp->b_rptr));
69 ASSERT((MBLKL(mp) >= sizeof (ipha_t)));
70
71 /* Verify IP version. */
72 if (!(ira->ira_flags & IRAF_IS_IPV4)) {
73 dccp_icmp_error_ipv6(dccp, mp, ira);
74 return;
75 }
76
77 }
78
79 static void
80 dccp_icmp_error_ipv6(dccp_t *dccp, mblk_t *mp, ip_recv_attr_t *ira)
81 {
82 cmn_err(CE_NOTE, "dccp_input.c: dccp_icmp_error_ipv6");
83 }
84
85 void
86 dccp_rsrv(queue_t *q)
87 {
88 cmn_err(CE_NOTE, "dccp_input.c: dccp_rsrv");
89 }
90
91 /*
92 * Handle a REQUEST on an AF_INET6 socket; can be either IPv4 or IPv6.
93 */
94 static mblk_t *
95 dccp_conn_create_v6(conn_t *lconnp, conn_t *connp, mblk_t *mp,
96 ip_recv_attr_t *ira)
97 {
98 dccp_t *ldccp = lconnp->conn_dccp;
99 dccp_t *dccp = connp->conn_dccp;
100 dccp_stack_t *dccps = dccp->dccp_dccps;
101 ipha_t *ipha;
102 ip6_t *ip6h;
103 mblk_t *tpi_mp;
104 sin6_t sin6;
105 uint_t ifindex = ira->ira_ruifindex;
106
107 if (ira->ira_flags & IRAF_IS_IPV4) {
108 ipha = (ipha_t *)mp->b_rptr;
109
110 connp->conn_ipversion = IPV4_VERSION;
111 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
112 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
113 connp->conn_saddr_v6 = connp->conn_laddr_v6;
114
115 sin6 = sin6_null;
116 sin6.sin6_addr = connp->conn_faddr_v6;
117 sin6.sin6_port = connp->conn_fport;
118 sin6.sin6_family = AF_INET6;
119 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
120 IPCL_ZONEID(lconnp), dccps->dccps_netstack);
121
122 if (connp->conn_recv_ancillary.crb_recvdstaddr) {
123 sin6_t sin6d;
124
125 sin6d = sin6_null;
126 sin6d.sin6_addr = connp->conn_laddr_v6;
127 sin6d.sin6_port = connp->conn_lport;
128 sin6d.sin6_family = AF_INET;
129 tpi_mp = mi_tpi_extconn_ind(NULL,
130 (char *)&sin6d, sizeof (sin6_t),
131 (char *)&dccp,
132 (t_scalar_t)sizeof (intptr_t),
133 (char *)&sin6d, sizeof (sin6_t),
134 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
135 } else {
136 tpi_mp = mi_tpi_conn_ind(NULL,
137 (char *)&sin6, sizeof (sin6_t),
138 (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
139 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
140 }
141 } else {
142 ip6h = (ip6_t *)mp->b_rptr;
143
144 connp->conn_ipversion = IPV6_VERSION;
145 connp->conn_laddr_v6 = ip6h->ip6_dst;
146 connp->conn_faddr_v6 = ip6h->ip6_src;
147 connp->conn_saddr_v6 = connp->conn_laddr_v6;
148
149 sin6 = sin6_null;
150 sin6.sin6_addr = connp->conn_faddr_v6;
151 sin6.sin6_port = connp->conn_fport;
152 sin6.sin6_family = AF_INET6;
153 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
154 sin6.__sin6_src_id = ip_srcid_find_addr(&connp->conn_laddr_v6,
155 IPCL_ZONEID(lconnp), dccps->dccps_netstack);
156
157 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
158 /* Pass up the scope_id of remote addr */
159 sin6.sin6_scope_id = ifindex;
160 } else {
161 sin6.sin6_scope_id = 0;
162 }
163 if (connp->conn_recv_ancillary.crb_recvdstaddr) {
164 sin6_t sin6d;
165
166 sin6d = sin6_null;
167 sin6.sin6_addr = connp->conn_laddr_v6;
168 sin6d.sin6_port = connp->conn_lport;
169 sin6d.sin6_family = AF_INET6;
170 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_laddr_v6))
171 sin6d.sin6_scope_id = ifindex;
172
173 tpi_mp = mi_tpi_extconn_ind(NULL,
174 (char *)&sin6d, sizeof (sin6_t),
175 (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
176 (char *)&sin6d, sizeof (sin6_t),
177 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
178 } else {
179 tpi_mp = mi_tpi_conn_ind(NULL,
180 (char *)&sin6, sizeof (sin6_t),
181 (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
182 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
183 }
184 }
185
186 /* XXX mss */
187 return (tpi_mp);
188 }
189
190 /*
191 * Handle a REQUEST on an AF_INET socket.
192 */
193 static mblk_t *
194 dccp_conn_create_v4(conn_t *lconnp, conn_t *connp, mblk_t *mp,
195 ip_recv_attr_t *ira)
196 {
197 dccp_t *ldccp = lconnp->conn_dccp;
198 dccp_t *dccp = connp->conn_dccp;
199 dccp_stack_t *dccps = dccp->dccp_dccps;
200 ipha_t *ipha;
201 mblk_t *tpi_mp;
202 sin_t sin;
203
204 ASSERT(ira->ira_flags & IRAF_IS_IPV4);
205 ipha = (ipha_t *)mp->b_rptr;
206
207 connp->conn_ipversion = IPV4_VERSION;
208 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &connp->conn_laddr_v6);
209 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &connp->conn_faddr_v6);
210 connp->conn_saddr_v6 = connp->conn_laddr_v6;
211
212 sin = sin_null;
213 sin.sin_addr.s_addr = connp->conn_faddr_v4;
214 sin.sin_port = connp->conn_fport;
215 sin.sin_family = AF_INET;
216
217 if (lconnp->conn_recv_ancillary.crb_recvdstaddr) {
218 cmn_err(CE_NOTE, "ancillary");
219
220 sin_t sind;
221
222 sind = sin_null;
223 sind.sin_addr.s_addr = connp->conn_laddr_v4;
224 sind.sin_port = connp->conn_lport;
225 sind.sin_family = AF_INET;
226
227 tpi_mp = mi_tpi_extconn_ind(NULL,
228 (char *)&sind, sizeof (sin_t), (char *)&dccp,
229 (t_scalar_t)sizeof (intptr_t), (char *)&sind,
230 sizeof (sin_t), (t_scalar_t)ldccp->dccp_conn_req_seqnum);
231
232 } else {
233 tpi_mp = mi_tpi_conn_ind(NULL,
234 (char *)&sin, sizeof (sin_t),
235 (char *)&dccp, (t_scalar_t)sizeof (intptr_t),
236 (t_scalar_t)ldccp->dccp_conn_req_seqnum);
237 }
238
239 /* XXX mss */
240
241 return (tpi_mp);
242 }
243
244 static void
245 dccp_input_listener(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
246 {
247 conn_t *lconnp = (conn_t *)arg;
248 conn_t *econnp;
249 dccp_t *listener = lconnp->conn_dccp;
250 dccp_t *eager;
251 dccp_stack_t *dccps = listener->dccp_dccps;
252 ip_stack_t *ipst = dccps->dccps_netstack->netstack_ip;
253 dccpha_t *dccpha;
254 squeue_t *new_sqp;
255 mblk_t *tpi_mp;
256 mblk_t *mp1;
257 uint_t ifindex = ira->ira_ruifindex;
258 uint_t ip_hdr_len;
259 uint_t type;
260 int error;
261
262 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener");
263
264 ip_hdr_len = ira->ira_ip_hdr_length;
265 dccpha = (dccpha_t *)&mp->b_rptr[ip_hdr_len];
266 type = (uint_t)dccpha->dha_type;
267
268 DTRACE_DCCP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, lconnp->conn_ixa,
269 __dtrace_dccp_void_ip_t *, mp->b_rptr, dccp_t *, listener,
270 __dtrace_dccp_dccph_t *, dccpha);
271
272 if (type != DCCP_PKT_REQUEST) {
273 cmn_err(CE_NOTE, "not request pkt");
274
275 /* XXX do something with a reset packet sent? */
276 freemsg(mp);
277 return;
278 }
279
280 /* XXX memory pressure */
281
282 /* XXX request defense */
283
284 /* XXX number of connections per listener */
285
286 ASSERT(ira->ira_sqp != NULL);
287 new_sqp = ira->ira_sqp;
288
289 econnp = (conn_t *)dccp_get_conn(arg2, dccps);
290 if (econnp == NULL) {
291 cmn_err(CE_NOTE, "econnp not found (eager)");
292 goto error2;
293 }
294
295 ASSERT(econnp->conn_netstack == lconnp->conn_netstack);
296 econnp->conn_sqp = new_sqp;
297 econnp->conn_initial_sqp = new_sqp;
298 econnp->conn_ixa->ixa_sqp = new_sqp;
299
300 econnp->conn_fport = dccpha->dha_lport;
301 econnp->conn_lport = dccpha->dha_fport;
302
303 error = conn_inherit_parent(lconnp, econnp);
304 if (error != 0) {
305 cmn_err(CE_NOTE, "conn_inherit_parent failed");
306 goto error3;
307 }
308
309 /* We already know the laddr of the new connection is ours */
310 econnp->conn_ixa->ixa_src_generation = ipst->ips_src_generation;
311
312 ASSERT(OK_32PTR(mp->b_rptr));
313 ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ||
314 IPH_HDR_VERSION(mp->b_rptr) == IPV6_VERSION);
315
316 if (lconnp->conn_family == AF_INET) {
317 ASSERT(IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
318 tpi_mp = dccp_conn_create_v4(lconnp, econnp, mp, ira);
319 } else {
320 tpi_mp = dccp_conn_create_v6(lconnp, econnp, mp, ira);
321 }
322
323 if (tpi_mp == NULL) {
324 cmn_err(CE_NOTE, "tpi_mo == NULL");
325 goto error3;
326 }
327
328 eager = econnp->conn_dccp;
329 SOCK_CONNID_INIT(eager->dccp_connid);
330
331 dccp_init_values(eager, listener);
332
333 ASSERT((econnp->conn_ixa->ixa_flags &
334 (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
335 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO)) ==
336 (IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
337 IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO));
338
339 if (!(ira->ira_flags & IRAF_IS_IPV4) && econnp->conn_bound_if == 0) {
340 if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_faddr_v6) ||
341 IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6)) {
342 econnp->conn_incoming_ifindex = ifindex;
343 econnp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
344 econnp->conn_ixa->ixa_scopeid = ifindex;
345 }
346 }
347
348 if (ira->ira_cred != NULL) {
349 mblk_setcred(tpi_mp, ira->ira_cred, ira->ira_cpid);
350 }
351
352 if (IPCL_IS_NONSTR(lconnp)) {
353 econnp->conn_flags |= IPCL_NONSTR;
354 }
355
356 /* XXX dccps is right? */
357 dccp_bind_hash_insert(&dccps->dccps_bind_fanout[
358 DCCP_BIND_HASH(econnp->conn_lport, dccps->dccps_bind_fanout_size)], eager, 0);
359
360 /* XXX CLUSTER */
361
362 SOCK_CONNID_BUMP(eager->dccp_connid);
363
364 error = dccp_set_destination(eager);
365 if (error != 0) {
366 cmn_err(CE_NOTE, "dccp_set_destination failed.");
367 dccp_bind_hash_remove(eager);
368 goto error3;
369 }
370
371 /* Process all DCCP options */
372 dccp_process_options(eager, dccpha);
373
374 CONN_INC_REF(lconnp);
375 eager->dccp_conn_req_seqnum = listener->dccp_conn_req_seqnum;
376 if (++listener->dccp_conn_req_seqnum == -1) {
377 /*
378 * -1 is "special" and defined in TPI as something
379 * that should never be used in T_CONN_IND
380 */
381 ++listener->dccp_conn_req_seqnum;
382 }
383
384 /* XXX SYN DEFENSE */
385
386 eager->dccp_state = DCCPS_REQUEST_RCVD;
387 DTRACE_DCCP6(state__change, void, NULL, ip_xmit_attr_t *,
388 econnp->conn_ixa, void, NULL, dccp_t *, eager, void, NULL,
389 int32_t, DCCPS_LISTEN);
390
391 /*
392 mp1 = dccp_xmit_mp(eager, eager->dccp_xmit_head, 0,
393 NULL, NULL, 0, B_FALSE, NULL, B_FALSE);
394 */
395 mp1 = dccp_generate_packet(econnp, mp);
396 if (mp1 == NULL) {
397 cmn_err(CE_NOTE, "dccp_generate_packet failed");
398 /*
399 * Increment the ref count as we are going to
400 * enqueueing an mp in squeue
401 */
402 CONN_INC_REF(econnp);
403 goto error;
404 }
405
406 CONN_INC_REF(econnp);
407
408 error = ipcl_conn_insert(econnp);
409 if (error != 0) {
410 cmn_err(CE_NOTE, "ipcl_conn_insert(econnp) failed");
411 goto error;
412 }
413
414 ASSERT(econnp->conn_ixa->ixa_notify_cookie == econnp->conn_dccp);
415 freemsg(mp);
416
417 /*
418 * Send the SYN-ACK. Use the right squeue so that conn_ixa is
419 * only used by one thread at a time.
420 */
421 if (econnp->conn_sqp == lconnp->conn_sqp) {
422 DTRACE_TCP5(send, mblk_t *, NULL, ip_xmit_attr_t *,
423 econnp->conn_ixa, __dtrace_dccp_void_ip_t *, mp1->b_rptr,
424 dccp_t *, eager, __dtrace_dccp_dccph_t *,
425 &mp1->b_rptr[econnp->conn_ixa->ixa_ip_hdr_length]);
426 (void) conn_ip_output(mp1, econnp->conn_ixa);
427 CONN_DEC_REF(econnp);
428 } else {
429 SQUEUE_ENTER_ONE(econnp->conn_sqp, mp1, dccp_send_synack,
430 econnp, NULL, SQ_PROCESS, SQTAG_TCP_SEND_SYNACK); /* XXX */
431 }
432
433 return;
434 error:
435 freemsg(mp1);
436 error2:
437 CONN_DEC_REF(econnp);
438 error3:
439 freemsg(mp);
440 }
441
442 void
443 dccp_input_listener_unbound(void *arg, mblk_t *mp, void *arg2,
444 ip_recv_attr_t *ira)
445 {
446 conn_t *connp = (conn_t *)arg;
447 squeue_t *sqp = (squeue_t *)arg2;
448 squeue_t *new_sqp;
449 uint32_t conn_flags;
450
451 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_listener_unbound");
452
453 ASSERT(ira->ira_sqp != NULL);
454 new_sqp = ira->ira_sqp;
455
456 if (connp->conn_fanout == NULL) {
457 goto done;
458 }
459
460 /*
461 * Bind to correct squeue.
462 */
463 if (!(connp->conn_flags & IPCL_FULLY_BOUND)) {
464 cmn_err(CE_NOTE, "not fully bound");
465
466 mutex_enter(&connp->conn_fanout->connf_lock);
467 mutex_enter(&connp->conn_lock);
468
469 if (connp->conn_ref != 4 ||
470 connp->conn_dccp->dccp_state != DCCPS_LISTEN) {
471 mutex_exit(&connp->conn_lock);
472 mutex_exit(&connp->conn_fanout->connf_lock);
473 goto done;
474 }
475
476 if (connp->conn_sqp != new_sqp) {
477 while (connp->conn_sqp != new_sqp) {
478 (void) casptr(&connp->conn_sqp, sqp, new_sqp);
479 }
480 connp->conn_ixa->ixa_sqp = new_sqp;
481 }
482
483 do {
484 conn_flags = connp->conn_flags;
485 conn_flags |= IPCL_FULLY_BOUND;
486 (void) cas32(&connp->conn_flags, connp->conn_flags,
487 conn_flags);
488 } while (!(connp->conn_flags & IPCL_FULLY_BOUND));
489
490 mutex_exit(&connp->conn_lock);
491 mutex_exit(&connp->conn_fanout->connf_lock);
492
493 connp->conn_recv = dccp_input_listener;
494 }
495
496 done:
497 if (connp->conn_sqp != sqp) {
498 CONN_INC_REF(connp);
499 SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, connp,
500 ira, SQ_FILL, SQTAG_DCCP_CONN_REQ_UNBOUND);
501 } else {
502 dccp_input_listener(connp, mp, sqp, ira);
503 }
504 }
505
506 boolean_t
507 dccp_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
508 ip_recv_attr_t *ira)
509 {
510 cmn_err(CE_NOTE, "dccp_input.c: dccp_verifyicmp");
511
512 return (B_TRUE);
513 }
514
515 /*
516 * After a request-response-ack all packets end up here.
517 */
518 void
519 dccp_input_data(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
520 {
521 conn_t *connp = (conn_t *)arg;
522 squeue_t *sqp = (squeue_t *)arg2;
523 dccp_t *dccp = connp->conn_dccp;
524 dccp_stack_t *dccps = dccp->dccp_dccps;
525 dccpha_t *dccpha;
526 uchar_t *iphdr;
527 uchar_t *rptr;
528 sock_upcalls_t *sockupcalls;
529 uint_t ip_hdr_len;
530
531 cmn_err(CE_NOTE, "dccp_input.c: dccp_input_data");
532
533 iphdr = mp->b_rptr;
534 rptr = mp->b_rptr;
535 ASSERT(OK_32PTR(rptr));
536
537 ip_hdr_len = ira->ira_ip_hdr_length;
538
539 ASSERT(DB_TYPE(mp) == M_DATA);
540 ASSERT(mp->b_next == NULL);
541
542 dccpha = (dccpha_t *)&rptr[ip_hdr_len];
543 }
544