1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/cmn_err.h>
36
37 #include <sys/stropts.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/fcntl.h>
41
42 #define _SUN_TPI_VERSION 2
43 #include <sys/tihdr.h>
44 #include <sys/sockio.h>
45 #include <sys/kmem_impl.h>
46
47 #include <sys/strsubr.h>
48 #include <sys/strsun.h>
49 #include <sys/ddi.h>
50 #include <netinet/in.h>
51 #include <inet/ip.h>
52
53 #include <fs/sockfs/sockcommon.h>
54 #include <fs/sockfs/sockfilter_impl.h>
55
56 #include <sys/socket_proto.h>
57
58 #include <fs/sockfs/socktpi_impl.h>
59 #include <fs/sockfs/sodirect.h>
60 #include <sys/tihdr.h>
61 #include <fs/sockfs/nl7c.h>
62
63 extern int xnet_skip_checks;
64 extern int xnet_check_print;
65
66 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
67
68
69 /*ARGSUSED*/
70 int
71 so_accept_notsupp(struct sonode *lso, int fflag,
72 struct cred *cr, struct sonode **nsop)
73 {
74 return (EOPNOTSUPP);
75 }
76
77 /*ARGSUSED*/
78 int
79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
80 {
81 return (EOPNOTSUPP);
82 }
83
84 /*ARGSUSED*/
85 int
86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
87 socklen_t *len, struct cred *cr)
88 {
89 return (EOPNOTSUPP);
90 }
91
92 /*ARGSUSED*/
93 int
94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
95 socklen_t *addrlen, boolean_t accept, struct cred *cr)
96 {
97 return (EOPNOTSUPP);
98 }
99
100 /*ARGSUSED*/
101 int
102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
103 {
104 return (EOPNOTSUPP);
105 }
106
107 /*ARGSUSED*/
108 int
109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
110 struct cred *cr, mblk_t **mpp)
111 {
112 return (EOPNOTSUPP);
113 }
114
115 /*
116 * Generic Socket Ops
117 */
118
119 /* ARGSUSED */
120 int
121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
122 {
123 return (socket_init_common(so, pso, flags, cr));
124 }
125
126 int
127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
128 int flags, struct cred *cr)
129 {
130 int error;
131
132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
133
134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
135
136 /* X/Open requires this check */
137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
138 if (xnet_check_print) {
139 printf("sockfs: X/Open bind state check "
140 "caused EINVAL\n");
141 }
142 error = EINVAL;
143 goto done;
144 }
145
146 /*
147 * a bind to a NULL address is interpreted as unbind. So just
148 * do the downcall.
149 */
150 if (name == NULL)
151 goto dobind;
152
153 switch (so->so_family) {
154 case AF_INET:
155 if ((size_t)namelen != sizeof (sin_t)) {
156 error = name->sa_family != so->so_family ?
157 EAFNOSUPPORT : EINVAL;
158 eprintsoline(so, error);
159 goto done;
160 }
161
162 if ((flags & _SOBIND_XPG4_2) &&
163 (name->sa_family != so->so_family)) {
164 /*
165 * This check has to be made for X/Open
166 * sockets however application failures have
167 * been observed when it is applied to
168 * all sockets.
169 */
170 error = EAFNOSUPPORT;
171 eprintsoline(so, error);
172 goto done;
173 }
174 /*
175 * Force a zero sa_family to match so_family.
176 *
177 * Some programs like inetd(1M) don't set the
178 * family field. Other programs leave
179 * sin_family set to garbage - SunOS 4.X does
180 * not check the family field on a bind.
181 * We use the family field that
182 * was passed in to the socket() call.
183 */
184 name->sa_family = so->so_family;
185 break;
186
187 case AF_INET6: {
188 #ifdef DEBUG
189 sin6_t *sin6 = (sin6_t *)name;
190 #endif
191 if ((size_t)namelen != sizeof (sin6_t)) {
192 error = name->sa_family != so->so_family ?
193 EAFNOSUPPORT : EINVAL;
194 eprintsoline(so, error);
195 goto done;
196 }
197
198 if (name->sa_family != so->so_family) {
199 /*
200 * With IPv6 we require the family to match
201 * unlike in IPv4.
202 */
203 error = EAFNOSUPPORT;
204 eprintsoline(so, error);
205 goto done;
206 }
207 #ifdef DEBUG
208 /*
209 * Verify that apps don't forget to clear
210 * sin6_scope_id etc
211 */
212 if (sin6->sin6_scope_id != 0 &&
213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
214 zcmn_err(getzoneid(), CE_WARN,
215 "bind with uninitialized sin6_scope_id "
216 "(%d) on socket. Pid = %d\n",
217 (int)sin6->sin6_scope_id,
218 (int)curproc->p_pid);
219 }
220 if (sin6->__sin6_src_id != 0) {
221 zcmn_err(getzoneid(), CE_WARN,
222 "bind with uninitialized __sin6_src_id "
223 "(%d) on socket. Pid = %d\n",
224 (int)sin6->__sin6_src_id,
225 (int)curproc->p_pid);
226 }
227 #endif /* DEBUG */
228
229 break;
230 }
231 default:
232 /* Just pass the request to the protocol */
233 goto dobind;
234 }
235
236 /*
237 * First we check if either NCA or KSSL has been enabled for
238 * the requested address, and if so, we fall back to TPI.
239 * If neither of those two services are enabled, then we just
240 * pass the request to the protocol.
241 *
242 * Note that KSSL can only be enabled on a socket if NCA is NOT
243 * enabled for that socket, hence the else-statement below.
244 */
245 if (nl7c_enabled && ((so->so_family == AF_INET ||
246 so->so_family == AF_INET6) &&
247 nl7c_lookup_addr(name, namelen) != NULL)) {
248 /*
249 * NL7C is not supported in non-global zones,
250 * we enforce this restriction here.
251 */
252 if (so->so_zoneid == GLOBAL_ZONEID) {
253 /* NCA should be used, so fall back to TPI */
254 error = so_tpi_fallback(so, cr);
255 SO_UNBLOCK_FALLBACK(so);
256 if (error)
257 return (error);
258 else
259 return (SOP_BIND(so, name, namelen, flags, cr));
260 }
261 }
262
263 dobind:
264 if (so->so_filter_active == 0 ||
265 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
266 error = (*so->so_downcalls->sd_bind)
267 (so->so_proto_handle, name, namelen, cr);
268 }
269 done:
270 SO_UNBLOCK_FALLBACK(so);
271
272 return (error);
273 }
274
275 int
276 so_listen(struct sonode *so, int backlog, struct cred *cr)
277 {
278 int error = 0;
279
280 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
281 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
282
283 if ((so)->so_filter_active == 0 ||
284 (error = sof_filter_listen(so, &backlog, cr)) < 0)
285 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
286 backlog, cr);
287
288 SO_UNBLOCK_FALLBACK(so);
289
290 return (error);
291 }
292
293
294 int
295 so_connect(struct sonode *so, struct sockaddr *name,
296 socklen_t namelen, int fflag, int flags, struct cred *cr)
297 {
298 int error = 0;
299 sock_connid_t id;
300
301 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
302 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
303
304 /*
305 * If there is a pending error, return error
306 * This can happen if a non blocking operation caused an error.
307 */
308
309 if (so->so_error != 0) {
310 mutex_enter(&so->so_lock);
311 error = sogeterr(so, B_TRUE);
312 mutex_exit(&so->so_lock);
313 if (error != 0)
314 goto done;
315 }
316
317 if (so->so_filter_active == 0 ||
318 (error = sof_filter_connect(so, (struct sockaddr *)name,
319 &namelen, cr)) < 0) {
320 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
321 name, namelen, &id, cr);
322
323 if (error == EINPROGRESS)
324 error = so_wait_connected(so,
325 fflag & (FNONBLOCK|FNDELAY), id);
326 }
327 done:
328 SO_UNBLOCK_FALLBACK(so);
329 return (error);
330 }
331
332 /*ARGSUSED*/
333 int
334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
335 {
336 int error = 0;
337 struct sonode *nso;
338
339 *nsop = NULL;
340
341 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
342 if ((so->so_state & SS_ACCEPTCONN) == 0) {
343 SO_UNBLOCK_FALLBACK(so);
344 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
345 EOPNOTSUPP : EINVAL);
346 }
347
348 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
349 &nso)) == 0) {
350 ASSERT(nso != NULL);
351
352 /* finish the accept */
353 if ((so->so_filter_active > 0 &&
354 (error = sof_filter_accept(nso, cr)) > 0) ||
355 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
356 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
357 (void) socket_close(nso, 0, cr);
358 socket_destroy(nso);
359 } else {
360 *nsop = nso;
361 sonode_insert_pid(nso, curproc);
362 }
363 }
364
365 SO_UNBLOCK_FALLBACK(so);
366 return (error);
367 }
368
369 int
370 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
371 struct cred *cr)
372 {
373 int error, flags;
374 boolean_t dontblock;
375 ssize_t orig_resid;
376 mblk_t *mp;
377
378 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
379
380 flags = msg->msg_flags;
381 error = 0;
382 dontblock = (flags & MSG_DONTWAIT) ||
383 (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
384
385 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
386 /*
387 * Old way of passing fd's is not supported
388 */
389 SO_UNBLOCK_FALLBACK(so);
390 return (EOPNOTSUPP);
391 }
392
393 if ((so->so_mode & SM_ATOMIC) &&
394 uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
395 so->so_proto_props.sopp_maxpsz != -1) {
396 SO_UNBLOCK_FALLBACK(so);
397 return (EMSGSIZE);
398 }
399
400 /*
401 * For atomic sends we will only do one iteration.
402 */
403 do {
404 if (so->so_state & SS_CANTSENDMORE) {
405 error = EPIPE;
406 break;
407 }
408
409 if (so->so_error != 0) {
410 mutex_enter(&so->so_lock);
411 error = sogeterr(so, B_TRUE);
412 mutex_exit(&so->so_lock);
413 if (error != 0)
414 break;
415 }
416
417 /*
418 * Send down OOB messages even if the send path is being
419 * flow controlled (assuming the protocol supports OOB data).
420 */
421 if (flags & MSG_OOB) {
422 if ((so->so_mode & SM_EXDATA) == 0) {
423 error = EOPNOTSUPP;
424 break;
425 }
426 } else if (SO_SND_FLOWCTRLD(so)) {
427 /*
428 * Need to wait until the protocol is ready to receive
429 * more data for transmission.
430 */
431 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
432 break;
433 }
434
435 /*
436 * Time to send data to the protocol. We either copy the
437 * data into mblks or pass the uio directly to the protocol.
438 * We decide what to do based on the available down calls.
439 */
440 if (so->so_downcalls->sd_send_uio != NULL) {
441 error = (*so->so_downcalls->sd_send_uio)
442 (so->so_proto_handle, uiop, msg, cr);
443 if (error != 0)
444 break;
445 } else {
446 /* save the resid in case of failure */
447 orig_resid = uiop->uio_resid;
448
449 if ((mp = socopyinuio(uiop,
450 so->so_proto_props.sopp_maxpsz,
451 so->so_proto_props.sopp_wroff,
452 so->so_proto_props.sopp_maxblk,
453 so->so_proto_props.sopp_tail, &error)) == NULL) {
454 break;
455 }
456 ASSERT(uiop->uio_resid >= 0);
457
458 if (so->so_filter_active > 0 &&
459 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
460 &error)) == NULL)) {
461 if (error != 0)
462 break;
463 continue;
464 }
465 error = (*so->so_downcalls->sd_send)
466 (so->so_proto_handle, mp, msg, cr);
467 if (error != 0) {
468 /*
469 * The send failed. We do not have to free the
470 * mblks, because that is the protocol's
471 * responsibility. However, uio_resid must
472 * remain accurate, so adjust that here.
473 */
474 uiop->uio_resid = orig_resid;
475 break;
476 }
477 }
478 } while (uiop->uio_resid > 0);
479
480 SO_UNBLOCK_FALLBACK(so);
481
482 return (error);
483 }
484
485 int
486 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
487 struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
488 boolean_t fil_inject)
489 {
490 int error;
491 boolean_t dontblock;
492 size_t size;
493 mblk_t *mp = *mpp;
494
495 if (so->so_downcalls->sd_send == NULL)
496 return (EOPNOTSUPP);
497
498 error = 0;
499 dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
500 (fflag & (FNONBLOCK|FNDELAY));
501 size = msgdsize(mp);
502
503 if ((so->so_mode & SM_ATOMIC) &&
504 size > so->so_proto_props.sopp_maxpsz &&
505 so->so_proto_props.sopp_maxpsz != -1) {
506 SO_UNBLOCK_FALLBACK(so);
507 return (EMSGSIZE);
508 }
509
510 while (mp != NULL) {
511 mblk_t *nmp, *last_mblk;
512 size_t mlen;
513
514 if (so->so_state & SS_CANTSENDMORE) {
515 error = EPIPE;
516 break;
517 }
518 if (so->so_error != 0) {
519 mutex_enter(&so->so_lock);
520 error = sogeterr(so, B_TRUE);
521 mutex_exit(&so->so_lock);
522 if (error != 0)
523 break;
524 }
525 /* Socket filters are not flow controlled */
526 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
527 /*
528 * Need to wait until the protocol is ready to receive
529 * more data for transmission.
530 */
531 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
532 break;
533 }
534
535 /*
536 * We only allow so_maxpsz of data to be sent down to
537 * the protocol at time.
538 */
539 mlen = MBLKL(mp);
540 nmp = mp->b_cont;
541 last_mblk = mp;
542 while (nmp != NULL) {
543 mlen += MBLKL(nmp);
544 if (mlen > so->so_proto_props.sopp_maxpsz) {
545 last_mblk->b_cont = NULL;
546 break;
547 }
548 last_mblk = nmp;
549 nmp = nmp->b_cont;
550 }
551
552 if (so->so_filter_active > 0 &&
553 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
554 cr, &error)) == NULL) {
555 *mpp = mp = nmp;
556 if (error != 0)
557 break;
558 continue;
559 }
560 error = (*so->so_downcalls->sd_send)
561 (so->so_proto_handle, mp, msg, cr);
562 if (error != 0) {
563 /*
564 * The send failed. The protocol will free the mblks
565 * that were sent down. Let the caller deal with the
566 * rest.
567 */
568 *mpp = nmp;
569 break;
570 }
571
572 *mpp = mp = nmp;
573 }
574 /* Let the filter know whether the protocol is flow controlled */
575 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
576 error = ENOSPC;
577
578 return (error);
579 }
580
581 #pragma inline(so_sendmblk_impl)
582
583 int
584 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
585 struct cred *cr, mblk_t **mpp)
586 {
587 int error;
588
589 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
590
591 if ((so->so_mode & SM_SENDFILESUPP) == 0) {
592 SO_UNBLOCK_FALLBACK(so);
593 return (EOPNOTSUPP);
594 }
595
596 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
597 B_FALSE);
598
599 SO_UNBLOCK_FALLBACK(so);
600
601 return (error);
602 }
603
604 int
605 so_shutdown(struct sonode *so, int how, struct cred *cr)
606 {
607 int error;
608
609 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
610
611 /*
612 * SunOS 4.X has no check for datagram sockets.
613 * 5.X checks that it is connected (ENOTCONN)
614 * X/Open requires that we check the connected state.
615 */
616 if (!(so->so_state & SS_ISCONNECTED)) {
617 if (!xnet_skip_checks) {
618 error = ENOTCONN;
619 if (xnet_check_print) {
620 printf("sockfs: X/Open shutdown check "
621 "caused ENOTCONN\n");
622 }
623 }
624 goto done;
625 }
626
627 if (so->so_filter_active == 0 ||
628 (error = sof_filter_shutdown(so, &how, cr)) < 0)
629 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
630 how, cr));
631
632 /*
633 * Protocol agreed to shutdown. We need to flush the
634 * receive buffer if the receive side is being shutdown.
635 */
636 if (error == 0 && how != SHUT_WR) {
637 mutex_enter(&so->so_lock);
638 /* wait for active reader to finish */
639 (void) so_lock_read(so, 0);
640
641 so_rcv_flush(so);
642
643 so_unlock_read(so);
644 mutex_exit(&so->so_lock);
645 }
646
647 done:
648 SO_UNBLOCK_FALLBACK(so);
649 return (error);
650 }
651
652 int
653 so_getsockname(struct sonode *so, struct sockaddr *addr,
654 socklen_t *addrlen, struct cred *cr)
655 {
656 int error;
657
658 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
659
660 if (so->so_filter_active == 0 ||
661 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
662 error = (*so->so_downcalls->sd_getsockname)
663 (so->so_proto_handle, addr, addrlen, cr);
664
665 SO_UNBLOCK_FALLBACK(so);
666 return (error);
667 }
668
669 int
670 so_getpeername(struct sonode *so, struct sockaddr *addr,
671 socklen_t *addrlen, boolean_t accept, struct cred *cr)
672 {
673 int error;
674
675 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
676
677 if (accept) {
678 error = (*so->so_downcalls->sd_getpeername)
679 (so->so_proto_handle, addr, addrlen, cr);
680 } else if (!(so->so_state & SS_ISCONNECTED)) {
681 error = ENOTCONN;
682 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
683 /* Added this check for X/Open */
684 error = EINVAL;
685 if (xnet_check_print) {
686 printf("sockfs: X/Open getpeername check => EINVAL\n");
687 }
688 } else if (so->so_filter_active == 0 ||
689 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
690 error = (*so->so_downcalls->sd_getpeername)
691 (so->so_proto_handle, addr, addrlen, cr);
692 }
693
694 SO_UNBLOCK_FALLBACK(so);
695 return (error);
696 }
697
698 int
699 so_getsockopt(struct sonode *so, int level, int option_name,
700 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
701 {
702 int error = 0;
703
704 if (level == SOL_FILTER)
705 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
706
707 SO_BLOCK_FALLBACK(so,
708 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
709
710 if ((so->so_filter_active == 0 ||
711 (error = sof_filter_getsockopt(so, level, option_name, optval,
712 optlenp, cr)) < 0) &&
713 (error = socket_getopt_common(so, level, option_name, optval,
714 optlenp, flags)) < 0) {
715 error = (*so->so_downcalls->sd_getsockopt)
716 (so->so_proto_handle, level, option_name, optval, optlenp,
717 cr);
718 if (error == ENOPROTOOPT) {
719 if (level == SOL_SOCKET) {
720 /*
721 * If a protocol does not support a particular
722 * socket option, set can fail (not allowed)
723 * but get can not fail. This is the previous
724 * sockfs bahvior.
725 */
726 switch (option_name) {
727 case SO_LINGER:
728 if (*optlenp < (t_uscalar_t)
729 sizeof (struct linger)) {
730 error = EINVAL;
731 break;
732 }
733 error = 0;
734 bzero(optval, sizeof (struct linger));
735 *optlenp = sizeof (struct linger);
736 break;
737 case SO_RCVTIMEO:
738 case SO_SNDTIMEO:
739 if (*optlenp < (t_uscalar_t)
740 sizeof (struct timeval)) {
741 error = EINVAL;
742 break;
743 }
744 error = 0;
745 bzero(optval, sizeof (struct timeval));
746 *optlenp = sizeof (struct timeval);
747 break;
748 case SO_SND_BUFINFO:
749 if (*optlenp < (t_uscalar_t)
750 sizeof (struct so_snd_bufinfo)) {
751 error = EINVAL;
752 break;
753 }
754 error = 0;
755 bzero(optval,
756 sizeof (struct so_snd_bufinfo));
757 *optlenp =
758 sizeof (struct so_snd_bufinfo);
759 break;
760 case SO_DEBUG:
761 case SO_REUSEADDR:
762 case SO_KEEPALIVE:
763 case SO_DONTROUTE:
764 case SO_BROADCAST:
765 case SO_USELOOPBACK:
766 case SO_OOBINLINE:
767 case SO_DGRAM_ERRIND:
768 case SO_SNDBUF:
769 case SO_RCVBUF:
770 error = 0;
771 *((int32_t *)optval) = 0;
772 *optlenp = sizeof (int32_t);
773 break;
774 default:
775 break;
776 }
777 }
778 }
779 }
780
781 SO_UNBLOCK_FALLBACK(so);
782 return (error);
783 }
784
785 int
786 so_setsockopt(struct sonode *so, int level, int option_name,
787 const void *optval, socklen_t optlen, struct cred *cr)
788 {
789 int error = 0;
790 struct timeval tl;
791 const void *opt = optval;
792
793 if (level == SOL_FILTER)
794 return (sof_setsockopt(so, option_name, optval, optlen, cr));
795
796 SO_BLOCK_FALLBACK(so,
797 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
798
799 /* X/Open requires this check */
800 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
801 SO_UNBLOCK_FALLBACK(so);
802 if (xnet_check_print)
803 printf("sockfs: X/Open setsockopt check => EINVAL\n");
804 return (EINVAL);
805 }
806
807 if (so->so_filter_active > 0 &&
808 (error = sof_filter_setsockopt(so, level, option_name,
809 (void *)optval, &optlen, cr)) >= 0)
810 goto done;
811
812 if (level == SOL_SOCKET) {
813 switch (option_name) {
814 case SO_RCVTIMEO:
815 case SO_SNDTIMEO: {
816 /*
817 * We pass down these two options to protocol in order
818 * to support some third part protocols which need to
819 * know them. For those protocols which don't care
820 * these two options, simply return 0.
821 */
822 clock_t t_usec;
823
824 if (get_udatamodel() == DATAMODEL_NONE ||
825 get_udatamodel() == DATAMODEL_NATIVE) {
826 if (optlen != sizeof (struct timeval)) {
827 error = EINVAL;
828 goto done;
829 }
830 bcopy((struct timeval *)optval, &tl,
831 sizeof (struct timeval));
832 } else {
833 if (optlen != sizeof (struct timeval32)) {
834 error = EINVAL;
835 goto done;
836 }
837 TIMEVAL32_TO_TIMEVAL(&tl,
838 (struct timeval32 *)optval);
839 }
840 opt = &tl;
841 optlen = sizeof (tl);
842 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
843 mutex_enter(&so->so_lock);
844 if (option_name == SO_RCVTIMEO)
845 so->so_rcvtimeo = drv_usectohz(t_usec);
846 else
847 so->so_sndtimeo = drv_usectohz(t_usec);
848 mutex_exit(&so->so_lock);
849 break;
850 }
851 case SO_RCVBUF:
852 /*
853 * XXX XPG 4.2 applications retrieve SO_RCVBUF from
854 * sockfs since the transport might adjust the value
855 * and not return exactly what was set by the
856 * application.
857 */
858 so->so_xpg_rcvbuf = *(int32_t *)optval;
859 break;
860 }
861 }
862 error = (*so->so_downcalls->sd_setsockopt)
863 (so->so_proto_handle, level, option_name, opt, optlen, cr);
864 done:
865 SO_UNBLOCK_FALLBACK(so);
866 return (error);
867 }
868
869 int
870 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
871 struct cred *cr, int32_t *rvalp)
872 {
873 int error = 0;
874
875 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
876
877 /*
878 * If there is a pending error, return error
879 * This can happen if a non blocking operation caused an error.
880 */
881 if (so->so_error != 0) {
882 mutex_enter(&so->so_lock);
883 error = sogeterr(so, B_TRUE);
884 mutex_exit(&so->so_lock);
885 if (error != 0)
886 goto done;
887 }
888
889 /*
890 * calling strioc can result in the socket falling back to TPI,
891 * if that is supported.
892 */
893 if ((so->so_filter_active == 0 ||
894 (error = sof_filter_ioctl(so, cmd, arg, mode,
895 rvalp, cr)) < 0) &&
896 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
897 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
898 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
899 cmd, arg, mode, rvalp, cr);
900 }
901
902 done:
903 SO_UNBLOCK_FALLBACK(so);
904
905 return (error);
906 }
907
908 int
909 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
910 struct pollhead **phpp)
911 {
912 int state = so->so_state, mask;
913 *reventsp = 0;
914
915 /*
916 * In sockets the errors are represented as input/output events
917 */
918 if (so->so_error != 0 &&
919 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
920 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
921 return (0);
922 }
923
924 /*
925 * If the socket is in a state where it can send data
926 * turn on POLLWRBAND and POLLOUT events.
927 */
928 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
929 /*
930 * out of band data is allowed even if the connection
931 * is flow controlled
932 */
933 *reventsp |= POLLWRBAND & events;
934 if (!SO_SND_FLOWCTRLD(so)) {
935 /*
936 * As long as there is buffer to send data
937 * turn on POLLOUT events
938 */
939 *reventsp |= POLLOUT & events;
940 }
941 }
942
943 /*
944 * Turn on POLLIN whenever there is data on the receive queue,
945 * or the socket is in a state where no more data will be received.
946 * Also, if the socket is accepting connections, flip the bit if
947 * there is something on the queue.
948 *
949 * We do an initial check for events without holding locks. However,
950 * if there are no event available, then we redo the check for POLLIN
951 * events under the lock.
952 */
953
954 /* Pending connections */
955 if (!list_is_empty(&so->so_acceptq_list))
956 *reventsp |= (POLLIN|POLLRDNORM) & events;
957
958 /* Data */
959 /* so_downcalls is null for sctp */
960 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
961 *reventsp |= (*so->so_downcalls->sd_poll)
962 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
963 CRED()) & events;
964 ASSERT((*reventsp & ~events) == 0);
965 /* do not recheck events */
966 events &= ~SO_PROTO_POLLEV;
967 } else {
968 if (SO_HAVE_DATA(so))
969 *reventsp |= (POLLIN|POLLRDNORM) & events;
970
971 /* Urgent data */
972 if ((state & SS_OOBPEND) != 0) {
973 *reventsp |= (POLLRDBAND | POLLPRI) & events;
974 }
975
976 /*
977 * If the socket has become disconnected, we set POLLHUP.
978 * Note that if we are in this state, we will have set POLLIN
979 * (SO_HAVE_DATA() is true on a disconnected socket), but not
980 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with
981 * the semantics of POLLHUP, which is defined to be mutually
982 * exclusive with respect to POLLOUT but not POLLIN. We are
983 * therefore setting POLLHUP primarily for the benefit of
984 * those not polling on POLLIN, as they have no other way of
985 * knowing that the socket has been disconnected.
986 */
987 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
988
989 if ((state & (mask | SS_ISCONNECTED)) == mask)
990 *reventsp |= POLLHUP;
991 }
992
993 if (!*reventsp && !anyyet) {
994 /* Check for read events again, but this time under lock */
995 if (events & (POLLIN|POLLRDNORM)) {
996 mutex_enter(&so->so_lock);
997 if (SO_HAVE_DATA(so) ||
998 !list_is_empty(&so->so_acceptq_list)) {
999 mutex_exit(&so->so_lock);
1000 *reventsp |= (POLLIN|POLLRDNORM) & events;
1001 return (0);
1002 } else {
1003 so->so_pollev |= SO_POLLEV_IN;
1004 mutex_exit(&so->so_lock);
1005 }
1006 }
1007 *phpp = &so->so_poll_list;
1008 }
1009 return (0);
1010 }
1011
1012 /*
1013 * Generic Upcalls
1014 */
1015 void
1016 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1017 cred_t *peer_cred, pid_t peer_cpid)
1018 {
1019 struct sonode *so = (struct sonode *)sock_handle;
1020
1021 mutex_enter(&so->so_lock);
1022 ASSERT(so->so_proto_handle != NULL);
1023
1024 if (peer_cred != NULL) {
1025 if (so->so_peercred != NULL)
1026 crfree(so->so_peercred);
1027 crhold(peer_cred);
1028 so->so_peercred = peer_cred;
1029 so->so_cpid = peer_cpid;
1030 }
1031
1032 so->so_proto_connid = id;
1033 soisconnected(so);
1034 /*
1035 * Wake ones who're waiting for conn to become established.
1036 */
1037 so_notify_connected(so);
1038 }
1039
1040 int
1041 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1042 {
1043 struct sonode *so = (struct sonode *)sock_handle;
1044 boolean_t connect_failed;
1045
1046 mutex_enter(&so->so_lock);
1047
1048 /*
1049 * If we aren't currently connected, then this isn't a disconnect but
1050 * rather a failure to connect.
1051 */
1052 connect_failed = !(so->so_state & SS_ISCONNECTED);
1053
1054 so->so_proto_connid = id;
1055 soisdisconnected(so, error);
1056 so_notify_disconnected(so, connect_failed, error);
1057
1058 return (0);
1059 }
1060
1061 void
1062 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1063 uintptr_t arg)
1064 {
1065 struct sonode *so = (struct sonode *)sock_handle;
1066
1067 switch (action) {
1068 case SOCK_OPCTL_SHUT_SEND:
1069 mutex_enter(&so->so_lock);
1070 socantsendmore(so);
1071 so_notify_disconnecting(so);
1072 break;
1073 case SOCK_OPCTL_SHUT_RECV: {
1074 mutex_enter(&so->so_lock);
1075 socantrcvmore(so);
1076 so_notify_eof(so);
1077 break;
1078 }
1079 case SOCK_OPCTL_ENAB_ACCEPT:
1080 mutex_enter(&so->so_lock);
1081 so->so_state |= SS_ACCEPTCONN;
1082 so->so_backlog = (unsigned int)arg;
1083 /*
1084 * The protocol can stop generating newconn upcalls when
1085 * the backlog is full, so to make sure the listener does
1086 * not end up with a queue full of deferred connections
1087 * we reduce the backlog by one. Thus the listener will
1088 * start closing deferred connections before the backlog
1089 * is full.
1090 */
1091 if (so->so_filter_active > 0)
1092 so->so_backlog = MAX(1, so->so_backlog - 1);
1093 mutex_exit(&so->so_lock);
1094 break;
1095 default:
1096 ASSERT(0);
1097 break;
1098 }
1099 }
1100
1101 void
1102 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1103 {
1104 struct sonode *so = (struct sonode *)sock_handle;
1105
1106 if (qfull) {
1107 so_snd_qfull(so);
1108 } else {
1109 so_snd_qnotfull(so);
1110 mutex_enter(&so->so_lock);
1111 /* so_notify_writable drops so_lock */
1112 so_notify_writable(so);
1113 }
1114 }
1115
1116 sock_upper_handle_t
1117 so_newconn(sock_upper_handle_t parenthandle,
1118 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1119 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1120 {
1121 struct sonode *so = (struct sonode *)parenthandle;
1122 struct sonode *nso;
1123 int error;
1124
1125 ASSERT(proto_handle != NULL);
1126
1127 if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1128 (so->so_acceptq_len >= so->so_backlog &&
1129 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1130 return (NULL);
1131 }
1132
1133 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1134 &error);
1135 if (nso == NULL)
1136 return (NULL);
1137
1138 if (peer_cred != NULL) {
1139 crhold(peer_cred);
1140 nso->so_peercred = peer_cred;
1141 nso->so_cpid = peer_cpid;
1142 }
1143 nso->so_listener = so;
1144
1145 /*
1146 * The new socket (nso), proto_handle and sock_upcallsp are all
1147 * valid at this point. But as soon as nso is placed in the accept
1148 * queue that can no longer be assumed (since an accept() thread may
1149 * pull it off the queue and close the socket).
1150 */
1151 *sock_upcallsp = &so_upcalls;
1152
1153 mutex_enter(&so->so_acceptq_lock);
1154 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1155 mutex_exit(&so->so_acceptq_lock);
1156 ASSERT(nso->so_count == 1);
1157 nso->so_count--;
1158 nso->so_listener = NULL;
1159 /* drop proto ref */
1160 VN_RELE(SOTOV(nso));
1161 socket_destroy(nso);
1162 return (NULL);
1163 } else {
1164 so->so_acceptq_len++;
1165 if (nso->so_state & SS_FIL_DEFER) {
1166 list_insert_tail(&so->so_acceptq_defer, nso);
1167 mutex_exit(&so->so_acceptq_lock);
1168 } else {
1169 list_insert_tail(&so->so_acceptq_list, nso);
1170 cv_signal(&so->so_acceptq_cv);
1171 mutex_exit(&so->so_acceptq_lock);
1172 mutex_enter(&so->so_lock);
1173 so_notify_newconn(so);
1174 }
1175
1176 return ((sock_upper_handle_t)nso);
1177 }
1178 }
1179
1180 void
1181 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1182 {
1183 struct sonode *so;
1184
1185 so = (struct sonode *)sock_handle;
1186
1187 mutex_enter(&so->so_lock);
1188
1189 if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1190 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1191 if (soppp->sopp_flags & SOCKOPT_WROFF)
1192 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1193 if (soppp->sopp_flags & SOCKOPT_TAIL)
1194 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1195 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1196 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1197 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1198 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1199 if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1200 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1201 if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1202 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1203 if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1204 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1205 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1206 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1207 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1208 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1209 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1210 }
1211
1212 if (soppp->sopp_zcopyflag & COPYCACHED) {
1213 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1214 }
1215 }
1216 if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1217 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1218 if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1219 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1220 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1221 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1222 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1223 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1224 if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1225 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1226
1227 mutex_exit(&so->so_lock);
1228
1229 if (so->so_filter_active > 0) {
1230 sof_instance_t *inst;
1231 ssize_t maxblk;
1232 ushort_t wroff, tail;
1233 maxblk = so->so_proto_props.sopp_maxblk;
1234 wroff = so->so_proto_props.sopp_wroff;
1235 tail = so->so_proto_props.sopp_tail;
1236 for (inst = so->so_filter_bottom; inst != NULL;
1237 inst = inst->sofi_prev) {
1238 if (SOF_INTERESTED(inst, mblk_prop)) {
1239 (*inst->sofi_ops->sofop_mblk_prop)(
1240 (sof_handle_t)inst, inst->sofi_cookie,
1241 &maxblk, &wroff, &tail);
1242 }
1243 }
1244 mutex_enter(&so->so_lock);
1245 so->so_proto_props.sopp_maxblk = maxblk;
1246 so->so_proto_props.sopp_wroff = wroff;
1247 so->so_proto_props.sopp_tail = tail;
1248 mutex_exit(&so->so_lock);
1249 }
1250 #ifdef DEBUG
1251 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1252 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1253 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1254 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1255 SOCKOPT_LOOPBACK);
1256 ASSERT(soppp->sopp_flags == 0);
1257 #endif
1258 }
1259
1260 /* ARGSUSED */
1261 ssize_t
1262 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1263 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp,
1264 sof_instance_t *filter)
1265 {
1266 boolean_t force_push = B_TRUE;
1267 int space_left;
1268 sodirect_t *sodp = so->so_direct;
1269
1270 ASSERT(errorp != NULL);
1271 *errorp = 0;
1272 if (mp == NULL) {
1273 if (so->so_downcalls->sd_recv_uio != NULL) {
1274 mutex_enter(&so->so_lock);
1275 /* the notify functions will drop the lock */
1276 if (flags & MSG_OOB)
1277 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1278 else
1279 so_notify_data(so, msg_size);
1280 return (0);
1281 }
1282 ASSERT(msg_size == 0);
1283 mutex_enter(&so->so_lock);
1284 goto space_check;
1285 }
1286
1287 ASSERT(mp->b_next == NULL);
1288 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1289 ASSERT(msg_size == msgdsize(mp));
1290
1291 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1292 /* The read pointer is not aligned correctly for TPI */
1293 zcmn_err(getzoneid(), CE_WARN,
1294 "sockfs: Unaligned TPI message received. rptr = %p\n",
1295 (void *)mp->b_rptr);
1296 freemsg(mp);
1297 mutex_enter(&so->so_lock);
1298 if (sodp != NULL)
1299 SOD_UIOAFINI(sodp);
1300 goto space_check;
1301 }
1302
1303 if (so->so_filter_active > 0) {
1304 for (; filter != NULL; filter = filter->sofi_prev) {
1305 if (!SOF_INTERESTED(filter, data_in))
1306 continue;
1307 mp = (*filter->sofi_ops->sofop_data_in)(
1308 (sof_handle_t)filter, filter->sofi_cookie, mp,
1309 flags, &msg_size);
1310 ASSERT(msgdsize(mp) == msg_size);
1311 DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1312 (mblk_t *), mp);
1313 /* Data was consumed/dropped, just do space check */
1314 if (msg_size == 0) {
1315 mutex_enter(&so->so_lock);
1316 goto space_check;
1317 }
1318 }
1319 }
1320
1321 if (flags & MSG_OOB) {
1322 so_queue_oob(so, mp, msg_size);
1323 mutex_enter(&so->so_lock);
1324 goto space_check;
1325 }
1326
1327 if (force_pushp != NULL)
1328 force_push = *force_pushp;
1329
1330 mutex_enter(&so->so_lock);
1331 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1332 if (sodp != NULL)
1333 SOD_DISABLE(sodp);
1334 mutex_exit(&so->so_lock);
1335 *errorp = EOPNOTSUPP;
1336 return (-1);
1337 }
1338 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1339 freemsg(mp);
1340 if (sodp != NULL)
1341 SOD_DISABLE(sodp);
1342 mutex_exit(&so->so_lock);
1343 return (0);
1344 }
1345
1346 /* process the mblk via I/OAT if capable */
1347 if (sodp != NULL && sodp->sod_enabled) {
1348 if (DB_TYPE(mp) == M_DATA) {
1349 sod_uioa_mblk_init(sodp, mp, msg_size);
1350 } else {
1351 SOD_UIOAFINI(sodp);
1352 }
1353 }
1354
1355 if (mp->b_next == NULL) {
1356 so_enqueue_msg(so, mp, msg_size);
1357 } else {
1358 do {
1359 mblk_t *nmp;
1360
1361 if ((nmp = mp->b_next) != NULL) {
1362 mp->b_next = NULL;
1363 }
1364 so_enqueue_msg(so, mp, msgdsize(mp));
1365 mp = nmp;
1366 } while (mp != NULL);
1367 }
1368
1369 space_left = so->so_rcvbuf - so->so_rcv_queued;
1370 if (space_left <= 0) {
1371 so->so_flowctrld = B_TRUE;
1372 *errorp = ENOSPC;
1373 space_left = -1;
1374 }
1375
1376 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1377 so->so_rcv_queued >= so->so_rcv_wanted) {
1378 SOCKET_TIMER_CANCEL(so);
1379 /*
1380 * so_notify_data will release the lock
1381 */
1382 so_notify_data(so, so->so_rcv_queued);
1383
1384 if (force_pushp != NULL)
1385 *force_pushp = B_TRUE;
1386 goto done;
1387 } else if (so->so_rcv_timer_tid == 0) {
1388 /* Make sure the recv push timer is running */
1389 SOCKET_TIMER_START(so);
1390 }
1391
1392 done_unlock:
1393 mutex_exit(&so->so_lock);
1394 done:
1395 return (space_left);
1396
1397 space_check:
1398 space_left = so->so_rcvbuf - so->so_rcv_queued;
1399 if (space_left <= 0) {
1400 so->so_flowctrld = B_TRUE;
1401 *errorp = ENOSPC;
1402 space_left = -1;
1403 }
1404 goto done_unlock;
1405 }
1406
1407 #pragma inline(so_queue_msg_impl)
1408
1409 ssize_t
1410 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1411 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp)
1412 {
1413 struct sonode *so = (struct sonode *)sock_handle;
1414
1415 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1416 so->so_filter_bottom));
1417 }
1418
1419 /*
1420 * Set the offset of where the oob data is relative to the bytes in
1421 * queued. Also generate SIGURG
1422 */
1423 void
1424 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1425 {
1426 struct sonode *so;
1427
1428 ASSERT(offset >= 0);
1429 so = (struct sonode *)sock_handle;
1430 mutex_enter(&so->so_lock);
1431 if (so->so_direct != NULL)
1432 SOD_UIOAFINI(so->so_direct);
1433
1434 /*
1435 * New urgent data on the way so forget about any old
1436 * urgent data.
1437 */
1438 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1439
1440 /*
1441 * Record that urgent data is pending.
1442 */
1443 so->so_state |= SS_OOBPEND;
1444
1445 if (so->so_oobmsg != NULL) {
1446 dprintso(so, 1, ("sock: discarding old oob\n"));
1447 freemsg(so->so_oobmsg);
1448 so->so_oobmsg = NULL;
1449 }
1450
1451 /*
1452 * set the offset where the urgent byte is
1453 */
1454 so->so_oobmark = so->so_rcv_queued + offset;
1455 if (so->so_oobmark == 0)
1456 so->so_state |= SS_RCVATMARK;
1457 else
1458 so->so_state &= ~SS_RCVATMARK;
1459
1460 so_notify_oobsig(so);
1461 }
1462
1463 /*
1464 * Queue the OOB byte
1465 */
1466 static void
1467 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1468 {
1469 mutex_enter(&so->so_lock);
1470 if (so->so_direct != NULL)
1471 SOD_UIOAFINI(so->so_direct);
1472
1473 ASSERT(mp != NULL);
1474 if (!IS_SO_OOB_INLINE(so)) {
1475 so->so_oobmsg = mp;
1476 so->so_state |= SS_HAVEOOBDATA;
1477 } else {
1478 so_enqueue_msg(so, mp, len);
1479 }
1480
1481 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1482 }
1483
1484 int
1485 so_close(struct sonode *so, int flag, struct cred *cr)
1486 {
1487 int error;
1488
1489 /*
1490 * No new data will be enqueued once the CLOSING flag is set.
1491 */
1492 mutex_enter(&so->so_lock);
1493 so->so_state |= SS_CLOSING;
1494 ASSERT(so_verify_oobstate(so));
1495 so_rcv_flush(so);
1496 mutex_exit(&so->so_lock);
1497
1498 if (so->so_filter_active > 0)
1499 sof_sonode_closing(so);
1500
1501 if (so->so_state & SS_ACCEPTCONN) {
1502 /*
1503 * We grab and release the accept lock to ensure that any
1504 * thread about to insert a socket in so_newconn completes
1505 * before we flush the queue. Any thread calling so_newconn
1506 * after we drop the lock will observe the SS_CLOSING flag,
1507 * which will stop it from inserting the socket in the queue.
1508 */
1509 mutex_enter(&so->so_acceptq_lock);
1510 mutex_exit(&so->so_acceptq_lock);
1511
1512 so_acceptq_flush(so, B_TRUE);
1513 }
1514
1515 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1516 switch (error) {
1517 default:
1518 /* Protocol made a synchronous close; remove proto ref */
1519 VN_RELE(SOTOV(so));
1520 break;
1521 case EINPROGRESS:
1522 /*
1523 * Protocol is in the process of closing, it will make a
1524 * 'closed' upcall to remove the reference.
1525 */
1526 error = 0;
1527 break;
1528 }
1529
1530 return (error);
1531 }
1532
1533 /*
1534 * Upcall made by the protocol when it's doing an asynchronous close. It
1535 * will drop the protocol's reference on the socket.
1536 */
1537 void
1538 so_closed(sock_upper_handle_t sock_handle)
1539 {
1540 struct sonode *so = (struct sonode *)sock_handle;
1541
1542 VN_RELE(SOTOV(so));
1543 }
1544
1545 conn_pid_node_list_hdr_t *
1546 so_get_sock_pid_list(sock_upper_handle_t sock_handle)
1547 {
1548 int sz, n = 0;
1549 pid_node_t *pn;
1550 conn_pid_node_t *cpn;
1551 conn_pid_node_list_hdr_t *cph;
1552 struct sonode *so = (struct sonode *)sock_handle;
1553
1554 mutex_enter(&so->so_pid_list_lock);
1555
1556 n = list_size(&so->so_pid_list);
1557 sz = sizeof (conn_pid_node_list_hdr_t);
1558 sz += (n > 1)?((n - 1) * sizeof (conn_pid_node_t)):0;
1559 cph = kmem_zalloc(sz, KM_SLEEP);
1560
1561 cph->cph_magic = CONN_PID_NODE_LIST_HDR_MAGIC;
1562 cph->cph_contents = CONN_PID_NODE_LIST_HDR_SOC;
1563 cph->cph_pn_cnt = n;
1564 cph->cph_tot_size = sz;
1565 cph->cph_flags = 0;
1566 cph->cph_optional1 = 0;
1567 cph->cph_optional2 = 0;
1568
1569 if (cph->cph_pn_cnt > 0) {
1570 cpn = cph->cph_cpns;
1571 pn = list_head(&so->so_pid_list);
1572 while (pn != NULL) {
1573 PIDNODE2CONNPIDNODE(pn, cpn);
1574 pn = list_next(&so->so_pid_list, pn);
1575 cpn++;
1576 }
1577 }
1578
1579 mutex_exit(&so->so_pid_list_lock);
1580
1581 return (cph);
1582 }
1583
1584 void
1585 so_zcopy_notify(sock_upper_handle_t sock_handle)
1586 {
1587 struct sonode *so = (struct sonode *)sock_handle;
1588
1589 mutex_enter(&so->so_lock);
1590 so->so_copyflag |= STZCNOTIFY;
1591 cv_broadcast(&so->so_copy_cv);
1592 mutex_exit(&so->so_lock);
1593 }
1594
1595 void
1596 so_set_error(sock_upper_handle_t sock_handle, int error)
1597 {
1598 struct sonode *so = (struct sonode *)sock_handle;
1599
1600 mutex_enter(&so->so_lock);
1601
1602 soseterror(so, error);
1603
1604 so_notify_error(so);
1605 }
1606
1607 /*
1608 * so_recvmsg - read data from the socket
1609 *
1610 * There are two ways of obtaining data; either we ask the protocol to
1611 * copy directly into the supplied buffer, or we copy data from the
1612 * sonode's receive queue. The decision which one to use depends on
1613 * whether the protocol has a sd_recv_uio down call.
1614 */
1615 int
1616 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1617 struct cred *cr)
1618 {
1619 rval_t rval;
1620 int flags = 0;
1621 t_uscalar_t controllen, namelen;
1622 int error = 0;
1623 int ret;
1624 mblk_t *mctlp = NULL;
1625 union T_primitives *tpr;
1626 void *control;
1627 ssize_t saved_resid;
1628 struct uio *suiop;
1629
1630 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1631
1632 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1633 (so->so_mode & SM_CONNREQUIRED)) {
1634 SO_UNBLOCK_FALLBACK(so);
1635 return (ENOTCONN);
1636 }
1637
1638 if (msg->msg_flags & MSG_PEEK)
1639 msg->msg_flags &= ~MSG_WAITALL;
1640
1641 if (so->so_mode & SM_ATOMIC)
1642 msg->msg_flags |= MSG_TRUNC;
1643
1644 if (msg->msg_flags & MSG_OOB) {
1645 if ((so->so_mode & SM_EXDATA) == 0) {
1646 error = EOPNOTSUPP;
1647 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1648 error = (*so->so_downcalls->sd_recv_uio)
1649 (so->so_proto_handle, uiop, msg, cr);
1650 } else {
1651 error = sorecvoob(so, msg, uiop, msg->msg_flags,
1652 IS_SO_OOB_INLINE(so));
1653 }
1654 SO_UNBLOCK_FALLBACK(so);
1655 return (error);
1656 }
1657
1658 /*
1659 * If the protocol has the recv down call, then pass the request
1660 * down.
1661 */
1662 if (so->so_downcalls->sd_recv_uio != NULL) {
1663 error = (*so->so_downcalls->sd_recv_uio)
1664 (so->so_proto_handle, uiop, msg, cr);
1665 SO_UNBLOCK_FALLBACK(so);
1666 return (error);
1667 }
1668
1669 /*
1670 * Reading data from the socket buffer
1671 */
1672 flags = msg->msg_flags;
1673 msg->msg_flags = 0;
1674
1675 /*
1676 * Set msg_controllen and msg_namelen to zero here to make it
1677 * simpler in the cases that no control or name is returned.
1678 */
1679 controllen = msg->msg_controllen;
1680 namelen = msg->msg_namelen;
1681 msg->msg_controllen = 0;
1682 msg->msg_namelen = 0;
1683
1684 mutex_enter(&so->so_lock);
1685 /* Set SOREADLOCKED */
1686 error = so_lock_read_intr(so,
1687 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1688 mutex_exit(&so->so_lock);
1689 if (error) {
1690 SO_UNBLOCK_FALLBACK(so);
1691 return (error);
1692 }
1693
1694 suiop = sod_rcv_init(so, flags, &uiop);
1695 retry:
1696 saved_resid = uiop->uio_resid;
1697 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1698 if (error != 0) {
1699 goto out;
1700 }
1701 /*
1702 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1703 * For non-datagrams MOREDATA is used to set MSG_EOR.
1704 */
1705 ASSERT(!(rval.r_val1 & MORECTL));
1706 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1707 msg->msg_flags |= MSG_TRUNC;
1708 if (mctlp == NULL) {
1709 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1710
1711 mutex_enter(&so->so_lock);
1712 /* Set MSG_EOR based on MOREDATA */
1713 if (!(rval.r_val1 & MOREDATA)) {
1714 if (so->so_state & SS_SAVEDEOR) {
1715 msg->msg_flags |= MSG_EOR;
1716 so->so_state &= ~SS_SAVEDEOR;
1717 }
1718 }
1719 /*
1720 * If some data was received (i.e. not EOF) and the
1721 * read/recv* has not been satisfied wait for some more.
1722 */
1723 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1724 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1725 mutex_exit(&so->so_lock);
1726 flags |= MSG_NOMARK;
1727 goto retry;
1728 }
1729
1730 goto out_locked;
1731 }
1732 /* so_queue_msg has already verified length and alignment */
1733 tpr = (union T_primitives *)mctlp->b_rptr;
1734 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1735 switch (tpr->type) {
1736 case T_DATA_IND: {
1737 /*
1738 * Set msg_flags to MSG_EOR based on
1739 * MORE_flag and MOREDATA.
1740 */
1741 mutex_enter(&so->so_lock);
1742 so->so_state &= ~SS_SAVEDEOR;
1743 if (!(tpr->data_ind.MORE_flag & 1)) {
1744 if (!(rval.r_val1 & MOREDATA))
1745 msg->msg_flags |= MSG_EOR;
1746 else
1747 so->so_state |= SS_SAVEDEOR;
1748 }
1749 freemsg(mctlp);
1750 /*
1751 * If some data was received (i.e. not EOF) and the
1752 * read/recv* has not been satisfied wait for some more.
1753 */
1754 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1755 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1756 mutex_exit(&so->so_lock);
1757 flags |= MSG_NOMARK;
1758 goto retry;
1759 }
1760 goto out_locked;
1761 }
1762 case T_UNITDATA_IND: {
1763 void *addr;
1764 t_uscalar_t addrlen;
1765 void *abuf;
1766 t_uscalar_t optlen;
1767 void *opt;
1768
1769 if (namelen != 0) {
1770 /* Caller wants source address */
1771 addrlen = tpr->unitdata_ind.SRC_length;
1772 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1773 addrlen, 1);
1774 if (addr == NULL) {
1775 freemsg(mctlp);
1776 error = EPROTO;
1777 eprintsoline(so, error);
1778 goto out;
1779 }
1780 ASSERT(so->so_family != AF_UNIX);
1781 }
1782 optlen = tpr->unitdata_ind.OPT_length;
1783 if (optlen != 0) {
1784 t_uscalar_t ncontrollen;
1785
1786 /*
1787 * Extract any source address option.
1788 * Determine how large cmsg buffer is needed.
1789 */
1790 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1791 optlen, __TPI_ALIGN_SIZE);
1792
1793 if (opt == NULL) {
1794 freemsg(mctlp);
1795 error = EPROTO;
1796 eprintsoline(so, error);
1797 goto out;
1798 }
1799 if (so->so_family == AF_UNIX)
1800 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1801 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1802 !(flags & MSG_XPG4_2));
1803 if (controllen != 0)
1804 controllen = ncontrollen;
1805 else if (ncontrollen != 0)
1806 msg->msg_flags |= MSG_CTRUNC;
1807 } else {
1808 controllen = 0;
1809 }
1810
1811 if (namelen != 0) {
1812 /*
1813 * Return address to caller.
1814 * Caller handles truncation if length
1815 * exceeds msg_namelen.
1816 * NOTE: AF_UNIX NUL termination is ensured by
1817 * the sender's copyin_name().
1818 */
1819 abuf = kmem_alloc(addrlen, KM_SLEEP);
1820
1821 bcopy(addr, abuf, addrlen);
1822 msg->msg_name = abuf;
1823 msg->msg_namelen = addrlen;
1824 }
1825
1826 if (controllen != 0) {
1827 /*
1828 * Return control msg to caller.
1829 * Caller handles truncation if length
1830 * exceeds msg_controllen.
1831 */
1832 control = kmem_zalloc(controllen, KM_SLEEP);
1833
1834 error = so_opt2cmsg(mctlp, opt, optlen,
1835 !(flags & MSG_XPG4_2), control, controllen);
1836 if (error) {
1837 freemsg(mctlp);
1838 if (msg->msg_namelen != 0)
1839 kmem_free(msg->msg_name,
1840 msg->msg_namelen);
1841 kmem_free(control, controllen);
1842 eprintsoline(so, error);
1843 goto out;
1844 }
1845 msg->msg_control = control;
1846 msg->msg_controllen = controllen;
1847 }
1848
1849 freemsg(mctlp);
1850 goto out;
1851 }
1852 case T_OPTDATA_IND: {
1853 struct T_optdata_req *tdr;
1854 void *opt;
1855 t_uscalar_t optlen;
1856
1857 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1858 optlen = tdr->OPT_length;
1859 if (optlen != 0) {
1860 t_uscalar_t ncontrollen;
1861 /*
1862 * Determine how large cmsg buffer is needed.
1863 */
1864 opt = sogetoff(mctlp,
1865 tpr->optdata_ind.OPT_offset, optlen,
1866 __TPI_ALIGN_SIZE);
1867
1868 if (opt == NULL) {
1869 freemsg(mctlp);
1870 error = EPROTO;
1871 eprintsoline(so, error);
1872 goto out;
1873 }
1874
1875 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1876 !(flags & MSG_XPG4_2));
1877 if (controllen != 0)
1878 controllen = ncontrollen;
1879 else if (ncontrollen != 0)
1880 msg->msg_flags |= MSG_CTRUNC;
1881 } else {
1882 controllen = 0;
1883 }
1884
1885 if (controllen != 0) {
1886 /*
1887 * Return control msg to caller.
1888 * Caller handles truncation if length
1889 * exceeds msg_controllen.
1890 */
1891 control = kmem_zalloc(controllen, KM_SLEEP);
1892
1893 error = so_opt2cmsg(mctlp, opt, optlen,
1894 !(flags & MSG_XPG4_2), control, controllen);
1895 if (error) {
1896 freemsg(mctlp);
1897 kmem_free(control, controllen);
1898 eprintsoline(so, error);
1899 goto out;
1900 }
1901 msg->msg_control = control;
1902 msg->msg_controllen = controllen;
1903 }
1904
1905 /*
1906 * Set msg_flags to MSG_EOR based on
1907 * DATA_flag and MOREDATA.
1908 */
1909 mutex_enter(&so->so_lock);
1910 so->so_state &= ~SS_SAVEDEOR;
1911 if (!(tpr->data_ind.MORE_flag & 1)) {
1912 if (!(rval.r_val1 & MOREDATA))
1913 msg->msg_flags |= MSG_EOR;
1914 else
1915 so->so_state |= SS_SAVEDEOR;
1916 }
1917 freemsg(mctlp);
1918 /*
1919 * If some data was received (i.e. not EOF) and the
1920 * read/recv* has not been satisfied wait for some more.
1921 * Not possible to wait if control info was received.
1922 */
1923 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1924 controllen == 0 &&
1925 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1926 mutex_exit(&so->so_lock);
1927 flags |= MSG_NOMARK;
1928 goto retry;
1929 }
1930 goto out_locked;
1931 }
1932 default:
1933 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1934 tpr->type);
1935 freemsg(mctlp);
1936 error = EPROTO;
1937 ASSERT(0);
1938 }
1939 out:
1940 mutex_enter(&so->so_lock);
1941 out_locked:
1942 ret = sod_rcv_done(so, suiop, uiop);
1943 if (ret != 0 && error == 0)
1944 error = ret;
1945
1946 so_unlock_read(so); /* Clear SOREADLOCKED */
1947 mutex_exit(&so->so_lock);
1948
1949 SO_UNBLOCK_FALLBACK(so);
1950
1951 return (error);
1952 }
1953
1954 sonodeops_t so_sonodeops = {
1955 so_init, /* sop_init */
1956 so_accept, /* sop_accept */
1957 so_bind, /* sop_bind */
1958 so_listen, /* sop_listen */
1959 so_connect, /* sop_connect */
1960 so_recvmsg, /* sop_recvmsg */
1961 so_sendmsg, /* sop_sendmsg */
1962 so_sendmblk, /* sop_sendmblk */
1963 so_getpeername, /* sop_getpeername */
1964 so_getsockname, /* sop_getsockname */
1965 so_shutdown, /* sop_shutdown */
1966 so_getsockopt, /* sop_getsockopt */
1967 so_setsockopt, /* sop_setsockopt */
1968 so_ioctl, /* sop_ioctl */
1969 so_poll, /* sop_poll */
1970 so_close, /* sop_close */
1971 };
1972
1973 sock_upcalls_t so_upcalls = {
1974 so_newconn,
1975 so_connected,
1976 so_disconnected,
1977 so_opctl,
1978 so_queue_msg,
1979 so_set_prop,
1980 so_txq_full,
1981 so_signal_oob,
1982 so_zcopy_notify,
1983 so_set_error,
1984 so_closed,
1985 so_get_sock_pid_list
1986 };