37 #include <sys/tihdr.h>
38 #include <sys/timod.h>
39 #include <sys/tpicommon.h>
40 #include <sys/socketvar.h>
41
42 #include <inet/common.h>
43 #include <inet/proto_set.h>
44 #include <inet/ip.h>
45 #include <inet/tcp.h>
46 #include <inet/tcp_impl.h>
47
48 static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
49 sock_upcalls_t *, int, cred_t *);
50 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
51 sock_upper_handle_t, cred_t *);
52 static int tcp_bind(sock_lower_handle_t, struct sockaddr *,
53 socklen_t, cred_t *);
54 static int tcp_listen(sock_lower_handle_t, int, cred_t *);
55 static int tcp_connect(sock_lower_handle_t, const struct sockaddr *,
56 socklen_t, sock_connid_t *, cred_t *);
57 static int tcp_getsockopt(sock_lower_handle_t, int, int, void *,
58 socklen_t *, cred_t *);
59 static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
60 socklen_t, cred_t *);
61 static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
62 cred_t *cr);
63 static int tcp_shutdown(sock_lower_handle_t, int, cred_t *);
64 static void tcp_clr_flowctrl(sock_lower_handle_t);
65 static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
66 cred_t *);
67 static int tcp_close(sock_lower_handle_t, int, cred_t *);
68
69 sock_downcalls_t sock_tcp_downcalls = {
70 tcp_activate,
71 tcp_accept,
72 tcp_bind,
73 tcp_listen,
74 tcp_connect,
75 tcp_getpeername,
76 tcp_getsockname,
77 tcp_getsockopt,
78 tcp_setsockopt,
79 tcp_sendmsg,
80 NULL,
81 NULL,
82 NULL,
114 sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
115 tcp_rinfo.mi_minpsz;
116
117 connp->conn_upcalls = sock_upcalls;
118 connp->conn_upper_handle = sock_handle;
119
120 ASSERT(connp->conn_rcvbuf != 0 &&
121 connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
122 (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
123 }
124
125 /*ARGSUSED*/
126 static int
127 tcp_accept(sock_lower_handle_t lproto_handle,
128 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
129 cred_t *cr)
130 {
131 conn_t *lconnp, *econnp;
132 tcp_t *listener, *eager;
133
134 /*
135 * KSSL can move a socket from one listener to another, in which
136 * case `lproto_handle' points to the new listener. To ensure that
137 * the original listener is used the information is obtained from
138 * the eager.
139 */
140 econnp = (conn_t *)eproto_handle;
141 eager = econnp->conn_tcp;
142 ASSERT(IPCL_IS_NONSTR(econnp));
143 ASSERT(eager->tcp_listener != NULL);
144 listener = eager->tcp_listener;
145 lconnp = (conn_t *)listener->tcp_connp;
146 ASSERT(listener->tcp_state == TCPS_LISTEN);
147 ASSERT(lconnp->conn_upper_handle != NULL);
148
149 /*
150 * It is possible for the accept thread to race with the thread that
151 * made the su_newconn upcall in tcp_newconn_notify. Both
152 * tcp_newconn_notify and tcp_accept require that conn_upper_handle
153 * and conn_upcalls be set before returning, so they both write to
317 error = proto_tlitosyserr(-error);
318 }
319 }
320
321 if (connp->conn_tcp->tcp_loopback) {
322 struct sock_proto_props sopp;
323
324 sopp.sopp_flags = SOCKOPT_LOOPBACK;
325 sopp.sopp_loopback = B_TRUE;
326
327 (*connp->conn_upcalls->su_set_proto_props)(
328 connp->conn_upper_handle, &sopp);
329 }
330 done:
331 squeue_synch_exit(connp);
332
333 return ((error == 0) ? EINPROGRESS : error);
334 }
335
336 /* ARGSUSED3 */
337 int
338 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
339 socklen_t *addrlenp, cred_t *cr)
340 {
341 conn_t *connp = (conn_t *)proto_handle;
342 tcp_t *tcp = connp->conn_tcp;
343
344 /* All Solaris components should pass a cred for this operation. */
345 ASSERT(cr != NULL);
346
347 ASSERT(tcp != NULL);
348 if (tcp->tcp_state < TCPS_SYN_RCVD)
349 return (ENOTCONN);
350
351 return (conn_getpeername(connp, addr, addrlenp));
352 }
353
354 /* ARGSUSED3 */
355 int
356 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
357 socklen_t *addrlenp, cred_t *cr)
358 {
359 conn_t *connp = (conn_t *)proto_handle;
360
361 /* All Solaris components should pass a cred for this operation. */
362 ASSERT(cr != NULL);
363
364 return (conn_getsockname(connp, addr, addrlenp));
365 }
366
367 /* returns UNIX error, the optlen is a value-result arg */
368 static int
369 tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
370 void *optvalp, socklen_t *optlen, cred_t *cr)
371 {
372 conn_t *connp = (conn_t *)proto_handle;
373 int error;
374 t_uscalar_t max_optbuf_len;
375 void *optvalp_buf;
376 int len;
377
378 ASSERT(connp->conn_upper_handle != NULL);
379
380 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
381 tcp_opt_obj.odb_opt_des_arr,
382 tcp_opt_obj.odb_opt_arr_cnt,
383 B_FALSE, B_TRUE, cr);
384 if (error != 0) {
385 if (error < 0) {
386 error = proto_tlitosyserr(-error);
387 }
388 return (error);
389 }
390
391 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
392
393 error = squeue_synch_enter(connp, NULL);
394 if (error == ENOMEM) {
395 kmem_free(optvalp_buf, max_optbuf_len);
396 return (ENOMEM);
397 }
398
399 len = tcp_opt_get(connp, level, option_name, optvalp_buf);
407 /*
408 * update optlen and copy option value
409 */
410 t_uscalar_t size = MIN(len, *optlen);
411
412 bcopy(optvalp_buf, optvalp, size);
413 bcopy(&size, optlen, sizeof (size));
414
415 kmem_free(optvalp_buf, max_optbuf_len);
416 return (0);
417 }
418
419 static int
420 tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
421 const void *optvalp, socklen_t optlen, cred_t *cr)
422 {
423 conn_t *connp = (conn_t *)proto_handle;
424 int error;
425
426 ASSERT(connp->conn_upper_handle != NULL);
427 /*
428 * Entering the squeue synchronously can result in a context switch,
429 * which can cause a rather sever performance degradation. So we try to
430 * handle whatever options we can without entering the squeue.
431 */
432 if (level == IPPROTO_TCP) {
433 switch (option_name) {
434 case TCP_NODELAY:
435 if (optlen != sizeof (int32_t))
436 return (EINVAL);
437 mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
438 connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
439 connp->conn_tcp->tcp_mss;
440 mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
441 return (0);
442 default:
443 break;
444 }
445 }
446
735 * We can't assert the references because there might be other
736 * transient reference places because of some walkers or queued
737 * packets in squeue for the timewait state.
738 */
739 CONN_DEC_REF(connp);
740
741 /*
742 * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
743 * freeing the socket.
744 */
745 return (EINPROGRESS);
746 }
747
748 /* ARGSUSED */
749 sock_lower_handle_t
750 tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
751 uint_t *smodep, int *errorp, int flags, cred_t *credp)
752 {
753 conn_t *connp;
754 boolean_t isv6 = family == AF_INET6;
755 if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
756 (proto != 0 && proto != IPPROTO_TCP)) {
757 *errorp = EPROTONOSUPPORT;
758 return (NULL);
759 }
760
761 connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
762 if (connp == NULL) {
763 return (NULL);
764 }
765
766 /*
767 * Put the ref for TCP. Ref for IP was already put
768 * by ipcl_conn_create. Also Make the conn_t globally
769 * visible to walkers
770 */
771 mutex_enter(&connp->conn_lock);
772 CONN_INC_REF_LOCKED(connp);
773 ASSERT(connp->conn_ref == 2);
774 connp->conn_state_flags &= ~CONN_INCIPIENT;
775
776 connp->conn_flags |= IPCL_NONSTR;
777 mutex_exit(&connp->conn_lock);
778
779 ASSERT(errorp != NULL);
780 *errorp = 0;
781 *sock_downcalls = &sock_tcp_downcalls;
782 *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
783 SM_SENDFILESUPP;
784
785 return ((sock_lower_handle_t)connp);
786 }
787
788 /*
|
37 #include <sys/tihdr.h>
38 #include <sys/timod.h>
39 #include <sys/tpicommon.h>
40 #include <sys/socketvar.h>
41
42 #include <inet/common.h>
43 #include <inet/proto_set.h>
44 #include <inet/ip.h>
45 #include <inet/tcp.h>
46 #include <inet/tcp_impl.h>
47
48 static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
49 sock_upcalls_t *, int, cred_t *);
50 static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
51 sock_upper_handle_t, cred_t *);
52 static int tcp_bind(sock_lower_handle_t, struct sockaddr *,
53 socklen_t, cred_t *);
54 static int tcp_listen(sock_lower_handle_t, int, cred_t *);
55 static int tcp_connect(sock_lower_handle_t, const struct sockaddr *,
56 socklen_t, sock_connid_t *, cred_t *);
57 static int tcp_getpeername(sock_lower_handle_t, struct sockaddr *,
58 socklen_t *, cred_t *);
59 static int tcp_getsockname(sock_lower_handle_t, struct sockaddr *,
60 socklen_t *, cred_t *);
61 static int tcp_getsockopt(sock_lower_handle_t, int, int, void *,
62 socklen_t *, cred_t *);
63 static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
64 socklen_t, cred_t *);
65 static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
66 cred_t *);
67 static int tcp_shutdown(sock_lower_handle_t, int, cred_t *);
68 static void tcp_clr_flowctrl(sock_lower_handle_t);
69 static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
70 cred_t *);
71 static int tcp_close(sock_lower_handle_t, int, cred_t *);
72
73 sock_downcalls_t sock_tcp_downcalls = {
74 tcp_activate,
75 tcp_accept,
76 tcp_bind,
77 tcp_listen,
78 tcp_connect,
79 tcp_getpeername,
80 tcp_getsockname,
81 tcp_getsockopt,
82 tcp_setsockopt,
83 tcp_sendmsg,
84 NULL,
85 NULL,
86 NULL,
118 sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
119 tcp_rinfo.mi_minpsz;
120
121 connp->conn_upcalls = sock_upcalls;
122 connp->conn_upper_handle = sock_handle;
123
124 ASSERT(connp->conn_rcvbuf != 0 &&
125 connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
126 (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
127 }
128
129 /*ARGSUSED*/
130 static int
131 tcp_accept(sock_lower_handle_t lproto_handle,
132 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
133 cred_t *cr)
134 {
135 conn_t *lconnp, *econnp;
136 tcp_t *listener, *eager;
137
138 /* All Solaris components should pass a cred for this operation. */
139 ASSERT(cr != NULL);
140
141 /*
142 * KSSL can move a socket from one listener to another, in which
143 * case `lproto_handle' points to the new listener. To ensure that
144 * the original listener is used the information is obtained from
145 * the eager.
146 */
147 econnp = (conn_t *)eproto_handle;
148 eager = econnp->conn_tcp;
149 ASSERT(IPCL_IS_NONSTR(econnp));
150 ASSERT(eager->tcp_listener != NULL);
151 listener = eager->tcp_listener;
152 lconnp = (conn_t *)listener->tcp_connp;
153 ASSERT(listener->tcp_state == TCPS_LISTEN);
154 ASSERT(lconnp->conn_upper_handle != NULL);
155
156 /*
157 * It is possible for the accept thread to race with the thread that
158 * made the su_newconn upcall in tcp_newconn_notify. Both
159 * tcp_newconn_notify and tcp_accept require that conn_upper_handle
160 * and conn_upcalls be set before returning, so they both write to
324 error = proto_tlitosyserr(-error);
325 }
326 }
327
328 if (connp->conn_tcp->tcp_loopback) {
329 struct sock_proto_props sopp;
330
331 sopp.sopp_flags = SOCKOPT_LOOPBACK;
332 sopp.sopp_loopback = B_TRUE;
333
334 (*connp->conn_upcalls->su_set_proto_props)(
335 connp->conn_upper_handle, &sopp);
336 }
337 done:
338 squeue_synch_exit(connp);
339
340 return ((error == 0) ? EINPROGRESS : error);
341 }
342
343 /* ARGSUSED3 */
344 static int
345 tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
346 socklen_t *addrlenp, cred_t *cr)
347 {
348 conn_t *connp = (conn_t *)proto_handle;
349 tcp_t *tcp = connp->conn_tcp;
350
351 /* All Solaris components should pass a cred for this operation. */
352 ASSERT(cr != NULL);
353
354 ASSERT(tcp != NULL);
355 if (tcp->tcp_state < TCPS_SYN_RCVD)
356 return (ENOTCONN);
357
358 return (conn_getpeername(connp, addr, addrlenp));
359 }
360
361 /* ARGSUSED3 */
362 static int
363 tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
364 socklen_t *addrlenp, cred_t *cr)
365 {
366 conn_t *connp = (conn_t *)proto_handle;
367
368 /* All Solaris components should pass a cred for this operation. */
369 ASSERT(cr != NULL);
370
371 return (conn_getsockname(connp, addr, addrlenp));
372 }
373
374 /* returns UNIX error, the optlen is a value-result arg */
375 static int
376 tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
377 void *optvalp, socklen_t *optlen, cred_t *cr)
378 {
379 conn_t *connp = (conn_t *)proto_handle;
380 int error;
381 t_uscalar_t max_optbuf_len;
382 void *optvalp_buf;
383 int len;
384
385 ASSERT(connp->conn_upper_handle != NULL);
386
387 /* All Solaris components should pass a cred for this operation. */
388 ASSERT(cr != NULL);
389
390 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
391 tcp_opt_obj.odb_opt_des_arr,
392 tcp_opt_obj.odb_opt_arr_cnt,
393 B_FALSE, B_TRUE, cr);
394 if (error != 0) {
395 if (error < 0) {
396 error = proto_tlitosyserr(-error);
397 }
398 return (error);
399 }
400
401 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
402
403 error = squeue_synch_enter(connp, NULL);
404 if (error == ENOMEM) {
405 kmem_free(optvalp_buf, max_optbuf_len);
406 return (ENOMEM);
407 }
408
409 len = tcp_opt_get(connp, level, option_name, optvalp_buf);
417 /*
418 * update optlen and copy option value
419 */
420 t_uscalar_t size = MIN(len, *optlen);
421
422 bcopy(optvalp_buf, optvalp, size);
423 bcopy(&size, optlen, sizeof (size));
424
425 kmem_free(optvalp_buf, max_optbuf_len);
426 return (0);
427 }
428
429 static int
430 tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
431 const void *optvalp, socklen_t optlen, cred_t *cr)
432 {
433 conn_t *connp = (conn_t *)proto_handle;
434 int error;
435
436 ASSERT(connp->conn_upper_handle != NULL);
437
438 /* All Solaris components should pass a cred for this operation. */
439 ASSERT(cr != NULL);
440
441 /*
442 * Entering the squeue synchronously can result in a context switch,
443 * which can cause a rather sever performance degradation. So we try to
444 * handle whatever options we can without entering the squeue.
445 */
446 if (level == IPPROTO_TCP) {
447 switch (option_name) {
448 case TCP_NODELAY:
449 if (optlen != sizeof (int32_t))
450 return (EINVAL);
451 mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
452 connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
453 connp->conn_tcp->tcp_mss;
454 mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
455 return (0);
456 default:
457 break;
458 }
459 }
460
749 * We can't assert the references because there might be other
750 * transient reference places because of some walkers or queued
751 * packets in squeue for the timewait state.
752 */
753 CONN_DEC_REF(connp);
754
755 /*
756 * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
757 * freeing the socket.
758 */
759 return (EINPROGRESS);
760 }
761
762 /* ARGSUSED */
763 sock_lower_handle_t
764 tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
765 uint_t *smodep, int *errorp, int flags, cred_t *credp)
766 {
767 conn_t *connp;
768 boolean_t isv6 = family == AF_INET6;
769
770 if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
771 (proto != 0 && proto != IPPROTO_TCP)) {
772 *errorp = EPROTONOSUPPORT;
773 return (NULL);
774 }
775
776 connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
777 if (connp == NULL) {
778 return (NULL);
779 }
780
781 /*
782 * Put the ref for TCP. Ref for IP was already put
783 * by ipcl_conn_create. Also make the conn_t globally
784 * visible to walkers
785 */
786 mutex_enter(&connp->conn_lock);
787 CONN_INC_REF_LOCKED(connp);
788 ASSERT(connp->conn_ref == 2);
789 connp->conn_state_flags &= ~CONN_INCIPIENT;
790
791 connp->conn_flags |= IPCL_NONSTR;
792 mutex_exit(&connp->conn_lock);
793
794 ASSERT(errorp != NULL);
795 *errorp = 0;
796 *sock_downcalls = &sock_tcp_downcalls;
797 *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
798 SM_SENDFILESUPP;
799
800 return ((sock_lower_handle_t)connp);
801 }
802
803 /*
|