1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/debug.h>
35 #include <sys/cmn_err.h>
36
37 #include <sys/stropts.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/fcntl.h>
41
42 #define _SUN_TPI_VERSION 2
43 #include <sys/tihdr.h>
44 #include <sys/sockio.h>
45 #include <sys/kmem_impl.h>
46
47 #include <sys/strsubr.h>
48 #include <sys/strsun.h>
49 #include <sys/ddi.h>
50 #include <netinet/in.h>
51 #include <inet/ip.h>
52
53 #include <fs/sockfs/sockcommon.h>
54 #include <fs/sockfs/sockfilter_impl.h>
55
56 #include <sys/socket_proto.h>
57
58 #include <fs/sockfs/socktpi_impl.h>
59 #include <fs/sockfs/sodirect.h>
60 #include <sys/tihdr.h>
61 #include <fs/sockfs/nl7c.h>
62
63 extern int xnet_skip_checks;
64 extern int xnet_check_print;
65
66 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
67
68
69 /*ARGSUSED*/
70 int
71 so_accept_notsupp(struct sonode *lso, int fflag,
72 struct cred *cr, struct sonode **nsop)
73 {
74 return (EOPNOTSUPP);
75 }
76
77 /*ARGSUSED*/
78 int
79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
80 {
81 return (EOPNOTSUPP);
82 }
83
84 /*ARGSUSED*/
85 int
86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
87 socklen_t *len, struct cred *cr)
88 {
89 return (EOPNOTSUPP);
90 }
91
92 /*ARGSUSED*/
93 int
94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
95 socklen_t *addrlen, boolean_t accept, struct cred *cr)
96 {
97 return (EOPNOTSUPP);
98 }
99
100 /*ARGSUSED*/
101 int
102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
103 {
104 return (EOPNOTSUPP);
105 }
106
107 /*ARGSUSED*/
108 int
109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
110 struct cred *cr, mblk_t **mpp)
111 {
112 return (EOPNOTSUPP);
113 }
114
115 /*
116 * Generic Socket Ops
117 */
118
119 /* ARGSUSED */
120 int
121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
122 {
123 return (socket_init_common(so, pso, flags, cr));
124 }
125
126 int
127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
128 int flags, struct cred *cr)
129 {
130 int error;
131
132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
133
134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
135
136 /* X/Open requires this check */
137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
138 if (xnet_check_print) {
139 printf("sockfs: X/Open bind state check "
140 "caused EINVAL\n");
141 }
142 error = EINVAL;
143 goto done;
144 }
145
146 /*
147 * a bind to a NULL address is interpreted as unbind. So just
148 * do the downcall.
149 */
150 if (name == NULL)
151 goto dobind;
152
153 switch (so->so_family) {
154 case AF_INET:
155 if ((size_t)namelen != sizeof (sin_t)) {
156 error = name->sa_family != so->so_family ?
157 EAFNOSUPPORT : EINVAL;
158 eprintsoline(so, error);
159 goto done;
160 }
161
162 if ((flags & _SOBIND_XPG4_2) &&
163 (name->sa_family != so->so_family)) {
164 /*
165 * This check has to be made for X/Open
166 * sockets however application failures have
167 * been observed when it is applied to
168 * all sockets.
169 */
170 error = EAFNOSUPPORT;
171 eprintsoline(so, error);
172 goto done;
173 }
174 /*
175 * Force a zero sa_family to match so_family.
176 *
177 * Some programs like inetd(1M) don't set the
178 * family field. Other programs leave
179 * sin_family set to garbage - SunOS 4.X does
180 * not check the family field on a bind.
181 * We use the family field that
182 * was passed in to the socket() call.
183 */
184 name->sa_family = so->so_family;
185 break;
186
187 case AF_INET6: {
188 #ifdef DEBUG
189 sin6_t *sin6 = (sin6_t *)name;
190 #endif
191 if ((size_t)namelen != sizeof (sin6_t)) {
192 error = name->sa_family != so->so_family ?
193 EAFNOSUPPORT : EINVAL;
194 eprintsoline(so, error);
195 goto done;
196 }
197
198 if (name->sa_family != so->so_family) {
199 /*
200 * With IPv6 we require the family to match
201 * unlike in IPv4.
202 */
203 error = EAFNOSUPPORT;
204 eprintsoline(so, error);
205 goto done;
206 }
207 #ifdef DEBUG
208 /*
209 * Verify that apps don't forget to clear
210 * sin6_scope_id etc
211 */
212 if (sin6->sin6_scope_id != 0 &&
213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
214 zcmn_err(getzoneid(), CE_WARN,
215 "bind with uninitialized sin6_scope_id "
216 "(%d) on socket. Pid = %d\n",
217 (int)sin6->sin6_scope_id,
218 (int)curproc->p_pid);
219 }
220 if (sin6->__sin6_src_id != 0) {
221 zcmn_err(getzoneid(), CE_WARN,
222 "bind with uninitialized __sin6_src_id "
223 "(%d) on socket. Pid = %d\n",
224 (int)sin6->__sin6_src_id,
225 (int)curproc->p_pid);
226 }
227 #endif /* DEBUG */
228
229 break;
230 }
231 default:
232 /* Just pass the request to the protocol */
233 goto dobind;
234 }
235
236 /*
237 * First we check if either NCA or KSSL has been enabled for
238 * the requested address, and if so, we fall back to TPI.
239 * If neither of those two services are enabled, then we just
240 * pass the request to the protocol.
241 *
242 * Note that KSSL can only be enabled on a socket if NCA is NOT
243 * enabled for that socket, hence the else-statement below.
244 */
245 if (nl7c_enabled && ((so->so_family == AF_INET ||
246 so->so_family == AF_INET6) &&
247 nl7c_lookup_addr(name, namelen) != NULL)) {
248 /*
249 * NL7C is not supported in non-global zones,
250 * we enforce this restriction here.
251 */
252 if (so->so_zoneid == GLOBAL_ZONEID) {
253 /* NCA should be used, so fall back to TPI */
254 error = so_tpi_fallback(so, cr);
255 SO_UNBLOCK_FALLBACK(so);
256 if (error)
257 return (error);
258 else
259 return (SOP_BIND(so, name, namelen, flags, cr));
260 }
261 }
262
263 dobind:
264 if (so->so_filter_active == 0 ||
265 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
266 error = (*so->so_downcalls->sd_bind)
267 (so->so_proto_handle, name, namelen, cr);
268 }
269 done:
270 SO_UNBLOCK_FALLBACK(so);
271
272 return (error);
273 }
274
275 int
276 so_listen(struct sonode *so, int backlog, struct cred *cr)
277 {
278 int error = 0;
279
280 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
281 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
282
283 if ((so)->so_filter_active == 0 ||
284 (error = sof_filter_listen(so, &backlog, cr)) < 0)
285 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
286 backlog, cr);
287
288 SO_UNBLOCK_FALLBACK(so);
289
290 return (error);
291 }
292
293
294 int
295 so_connect(struct sonode *so, struct sockaddr *name,
296 socklen_t namelen, int fflag, int flags, struct cred *cr)
297 {
298 int error = 0;
299 sock_connid_t id;
300
301 ASSERT(MUTEX_NOT_HELD(&so->so_lock));
302 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
303
304 /*
305 * If there is a pending error, return error
306 * This can happen if a non blocking operation caused an error.
307 */
308
309 if (so->so_error != 0) {
310 mutex_enter(&so->so_lock);
311 error = sogeterr(so, B_TRUE);
312 mutex_exit(&so->so_lock);
313 if (error != 0)
314 goto done;
315 }
316
317 if (so->so_filter_active == 0 ||
318 (error = sof_filter_connect(so, (struct sockaddr *)name,
319 &namelen, cr)) < 0) {
320 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
321 name, namelen, &id, cr);
322
323 if (error == EINPROGRESS)
324 error = so_wait_connected(so,
325 fflag & (FNONBLOCK|FNDELAY), id);
326 }
327 done:
328 SO_UNBLOCK_FALLBACK(so);
329 return (error);
330 }
331
332 /*ARGSUSED*/
333 int
334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
335 {
336 int error = 0;
337 struct sonode *nso;
338
339 *nsop = NULL;
340
341 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
342 if ((so->so_state & SS_ACCEPTCONN) == 0) {
343 SO_UNBLOCK_FALLBACK(so);
344 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
345 EOPNOTSUPP : EINVAL);
346 }
347
348 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
349 &nso)) == 0) {
350 ASSERT(nso != NULL);
351
352 /* finish the accept */
353 if ((so->so_filter_active > 0 &&
354 (error = sof_filter_accept(nso, cr)) > 0) ||
355 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
356 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
357 (void) socket_close(nso, 0, cr);
358 socket_destroy(nso);
359 } else {
360 *nsop = nso;
361 if (!(curproc->p_flag & SSYS))
362 sonode_insert_pid(nso, curproc->p_pidp->pid_id);
363 }
364 }
365
366 SO_UNBLOCK_FALLBACK(so);
367 return (error);
368 }
369
370 int
371 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
372 struct cred *cr)
373 {
374 int error, flags;
375 boolean_t dontblock;
376 ssize_t orig_resid;
377 mblk_t *mp;
378
379 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
380
381 flags = msg->msg_flags;
382 error = 0;
383 dontblock = (flags & MSG_DONTWAIT) ||
384 (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
385
386 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
387 /*
388 * Old way of passing fd's is not supported
389 */
390 SO_UNBLOCK_FALLBACK(so);
391 return (EOPNOTSUPP);
392 }
393
394 if ((so->so_mode & SM_ATOMIC) &&
395 uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
396 so->so_proto_props.sopp_maxpsz != -1) {
397 SO_UNBLOCK_FALLBACK(so);
398 return (EMSGSIZE);
399 }
400
401 /*
402 * For atomic sends we will only do one iteration.
403 */
404 do {
405 if (so->so_state & SS_CANTSENDMORE) {
406 error = EPIPE;
407 break;
408 }
409
410 if (so->so_error != 0) {
411 mutex_enter(&so->so_lock);
412 error = sogeterr(so, B_TRUE);
413 mutex_exit(&so->so_lock);
414 if (error != 0)
415 break;
416 }
417
418 /*
419 * Send down OOB messages even if the send path is being
420 * flow controlled (assuming the protocol supports OOB data).
421 */
422 if (flags & MSG_OOB) {
423 if ((so->so_mode & SM_EXDATA) == 0) {
424 error = EOPNOTSUPP;
425 break;
426 }
427 } else if (SO_SND_FLOWCTRLD(so)) {
428 /*
429 * Need to wait until the protocol is ready to receive
430 * more data for transmission.
431 */
432 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
433 break;
434 }
435
436 /*
437 * Time to send data to the protocol. We either copy the
438 * data into mblks or pass the uio directly to the protocol.
439 * We decide what to do based on the available down calls.
440 */
441 if (so->so_downcalls->sd_send_uio != NULL) {
442 error = (*so->so_downcalls->sd_send_uio)
443 (so->so_proto_handle, uiop, msg, cr);
444 if (error != 0)
445 break;
446 } else {
447 /* save the resid in case of failure */
448 orig_resid = uiop->uio_resid;
449
450 if ((mp = socopyinuio(uiop,
451 so->so_proto_props.sopp_maxpsz,
452 so->so_proto_props.sopp_wroff,
453 so->so_proto_props.sopp_maxblk,
454 so->so_proto_props.sopp_tail, &error)) == NULL) {
455 break;
456 }
457 ASSERT(uiop->uio_resid >= 0);
458
459 if (so->so_filter_active > 0 &&
460 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
461 &error)) == NULL)) {
462 if (error != 0)
463 break;
464 continue;
465 }
466 error = (*so->so_downcalls->sd_send)
467 (so->so_proto_handle, mp, msg, cr);
468 if (error != 0) {
469 /*
470 * The send failed. We do not have to free the
471 * mblks, because that is the protocol's
472 * responsibility. However, uio_resid must
473 * remain accurate, so adjust that here.
474 */
475 uiop->uio_resid = orig_resid;
476 break;
477 }
478 }
479 } while (uiop->uio_resid > 0);
480
481 SO_UNBLOCK_FALLBACK(so);
482
483 return (error);
484 }
485
486 int
487 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
488 struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
489 boolean_t fil_inject)
490 {
491 int error;
492 boolean_t dontblock;
493 size_t size;
494 mblk_t *mp = *mpp;
495
496 if (so->so_downcalls->sd_send == NULL)
497 return (EOPNOTSUPP);
498
499 error = 0;
500 dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
501 (fflag & (FNONBLOCK|FNDELAY));
502 size = msgdsize(mp);
503
504 if ((so->so_mode & SM_ATOMIC) &&
505 size > so->so_proto_props.sopp_maxpsz &&
506 so->so_proto_props.sopp_maxpsz != -1) {
507 SO_UNBLOCK_FALLBACK(so);
508 return (EMSGSIZE);
509 }
510
511 while (mp != NULL) {
512 mblk_t *nmp, *last_mblk;
513 size_t mlen;
514
515 if (so->so_state & SS_CANTSENDMORE) {
516 error = EPIPE;
517 break;
518 }
519 if (so->so_error != 0) {
520 mutex_enter(&so->so_lock);
521 error = sogeterr(so, B_TRUE);
522 mutex_exit(&so->so_lock);
523 if (error != 0)
524 break;
525 }
526 /* Socket filters are not flow controlled */
527 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
528 /*
529 * Need to wait until the protocol is ready to receive
530 * more data for transmission.
531 */
532 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
533 break;
534 }
535
536 /*
537 * We only allow so_maxpsz of data to be sent down to
538 * the protocol at time.
539 */
540 mlen = MBLKL(mp);
541 nmp = mp->b_cont;
542 last_mblk = mp;
543 while (nmp != NULL) {
544 mlen += MBLKL(nmp);
545 if (mlen > so->so_proto_props.sopp_maxpsz) {
546 last_mblk->b_cont = NULL;
547 break;
548 }
549 last_mblk = nmp;
550 nmp = nmp->b_cont;
551 }
552
553 if (so->so_filter_active > 0 &&
554 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
555 cr, &error)) == NULL) {
556 *mpp = mp = nmp;
557 if (error != 0)
558 break;
559 continue;
560 }
561 error = (*so->so_downcalls->sd_send)
562 (so->so_proto_handle, mp, msg, cr);
563 if (error != 0) {
564 /*
565 * The send failed. The protocol will free the mblks
566 * that were sent down. Let the caller deal with the
567 * rest.
568 */
569 *mpp = nmp;
570 break;
571 }
572
573 *mpp = mp = nmp;
574 }
575 /* Let the filter know whether the protocol is flow controlled */
576 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
577 error = ENOSPC;
578
579 return (error);
580 }
581
582 #pragma inline(so_sendmblk_impl)
583
584 int
585 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
586 struct cred *cr, mblk_t **mpp)
587 {
588 int error;
589
590 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
591
592 if ((so->so_mode & SM_SENDFILESUPP) == 0) {
593 SO_UNBLOCK_FALLBACK(so);
594 return (EOPNOTSUPP);
595 }
596
597 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
598 B_FALSE);
599
600 SO_UNBLOCK_FALLBACK(so);
601
602 return (error);
603 }
604
605 int
606 so_shutdown(struct sonode *so, int how, struct cred *cr)
607 {
608 int error;
609
610 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
611
612 /*
613 * SunOS 4.X has no check for datagram sockets.
614 * 5.X checks that it is connected (ENOTCONN)
615 * X/Open requires that we check the connected state.
616 */
617 if (!(so->so_state & SS_ISCONNECTED)) {
618 if (!xnet_skip_checks) {
619 error = ENOTCONN;
620 if (xnet_check_print) {
621 printf("sockfs: X/Open shutdown check "
622 "caused ENOTCONN\n");
623 }
624 }
625 goto done;
626 }
627
628 if (so->so_filter_active == 0 ||
629 (error = sof_filter_shutdown(so, &how, cr)) < 0)
630 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
631 how, cr));
632
633 /*
634 * Protocol agreed to shutdown. We need to flush the
635 * receive buffer if the receive side is being shutdown.
636 */
637 if (error == 0 && how != SHUT_WR) {
638 mutex_enter(&so->so_lock);
639 /* wait for active reader to finish */
640 (void) so_lock_read(so, 0);
641
642 so_rcv_flush(so);
643
644 so_unlock_read(so);
645 mutex_exit(&so->so_lock);
646 }
647
648 done:
649 SO_UNBLOCK_FALLBACK(so);
650 return (error);
651 }
652
653 int
654 so_getsockname(struct sonode *so, struct sockaddr *addr,
655 socklen_t *addrlen, struct cred *cr)
656 {
657 int error;
658
659 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
660
661 if (so->so_filter_active == 0 ||
662 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
663 error = (*so->so_downcalls->sd_getsockname)
664 (so->so_proto_handle, addr, addrlen, cr);
665
666 SO_UNBLOCK_FALLBACK(so);
667 return (error);
668 }
669
670 int
671 so_getpeername(struct sonode *so, struct sockaddr *addr,
672 socklen_t *addrlen, boolean_t accept, struct cred *cr)
673 {
674 int error;
675
676 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
677
678 if (accept) {
679 error = (*so->so_downcalls->sd_getpeername)
680 (so->so_proto_handle, addr, addrlen, cr);
681 } else if (!(so->so_state & SS_ISCONNECTED)) {
682 error = ENOTCONN;
683 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
684 /* Added this check for X/Open */
685 error = EINVAL;
686 if (xnet_check_print) {
687 printf("sockfs: X/Open getpeername check => EINVAL\n");
688 }
689 } else if (so->so_filter_active == 0 ||
690 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
691 error = (*so->so_downcalls->sd_getpeername)
692 (so->so_proto_handle, addr, addrlen, cr);
693 }
694
695 SO_UNBLOCK_FALLBACK(so);
696 return (error);
697 }
698
699 int
700 so_getsockopt(struct sonode *so, int level, int option_name,
701 void *optval, socklen_t *optlenp, int flags, struct cred *cr)
702 {
703 int error = 0;
704
705 if (level == SOL_FILTER)
706 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
707
708 SO_BLOCK_FALLBACK(so,
709 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
710
711 if ((so->so_filter_active == 0 ||
712 (error = sof_filter_getsockopt(so, level, option_name, optval,
713 optlenp, cr)) < 0) &&
714 (error = socket_getopt_common(so, level, option_name, optval,
715 optlenp, flags)) < 0) {
716 error = (*so->so_downcalls->sd_getsockopt)
717 (so->so_proto_handle, level, option_name, optval, optlenp,
718 cr);
719 if (error == ENOPROTOOPT) {
720 if (level == SOL_SOCKET) {
721 /*
722 * If a protocol does not support a particular
723 * socket option, set can fail (not allowed)
724 * but get can not fail. This is the previous
725 * sockfs bahvior.
726 */
727 switch (option_name) {
728 case SO_LINGER:
729 if (*optlenp < (t_uscalar_t)
730 sizeof (struct linger)) {
731 error = EINVAL;
732 break;
733 }
734 error = 0;
735 bzero(optval, sizeof (struct linger));
736 *optlenp = sizeof (struct linger);
737 break;
738 case SO_RCVTIMEO:
739 case SO_SNDTIMEO:
740 if (*optlenp < (t_uscalar_t)
741 sizeof (struct timeval)) {
742 error = EINVAL;
743 break;
744 }
745 error = 0;
746 bzero(optval, sizeof (struct timeval));
747 *optlenp = sizeof (struct timeval);
748 break;
749 case SO_SND_BUFINFO:
750 if (*optlenp < (t_uscalar_t)
751 sizeof (struct so_snd_bufinfo)) {
752 error = EINVAL;
753 break;
754 }
755 error = 0;
756 bzero(optval,
757 sizeof (struct so_snd_bufinfo));
758 *optlenp =
759 sizeof (struct so_snd_bufinfo);
760 break;
761 case SO_DEBUG:
762 case SO_REUSEADDR:
763 case SO_KEEPALIVE:
764 case SO_DONTROUTE:
765 case SO_BROADCAST:
766 case SO_USELOOPBACK:
767 case SO_OOBINLINE:
768 case SO_DGRAM_ERRIND:
769 case SO_SNDBUF:
770 case SO_RCVBUF:
771 error = 0;
772 *((int32_t *)optval) = 0;
773 *optlenp = sizeof (int32_t);
774 break;
775 default:
776 break;
777 }
778 }
779 }
780 }
781
782 SO_UNBLOCK_FALLBACK(so);
783 return (error);
784 }
785
786 int
787 so_setsockopt(struct sonode *so, int level, int option_name,
788 const void *optval, socklen_t optlen, struct cred *cr)
789 {
790 int error = 0;
791 struct timeval tl;
792 const void *opt = optval;
793
794 if (level == SOL_FILTER)
795 return (sof_setsockopt(so, option_name, optval, optlen, cr));
796
797 SO_BLOCK_FALLBACK(so,
798 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
799
800 /* X/Open requires this check */
801 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
802 SO_UNBLOCK_FALLBACK(so);
803 if (xnet_check_print)
804 printf("sockfs: X/Open setsockopt check => EINVAL\n");
805 return (EINVAL);
806 }
807
808 if (so->so_filter_active > 0 &&
809 (error = sof_filter_setsockopt(so, level, option_name,
810 (void *)optval, &optlen, cr)) >= 0)
811 goto done;
812
813 if (level == SOL_SOCKET) {
814 switch (option_name) {
815 case SO_RCVTIMEO:
816 case SO_SNDTIMEO: {
817 /*
818 * We pass down these two options to protocol in order
819 * to support some third part protocols which need to
820 * know them. For those protocols which don't care
821 * these two options, simply return 0.
822 */
823 clock_t t_usec;
824
825 if (get_udatamodel() == DATAMODEL_NONE ||
826 get_udatamodel() == DATAMODEL_NATIVE) {
827 if (optlen != sizeof (struct timeval)) {
828 error = EINVAL;
829 goto done;
830 }
831 bcopy((struct timeval *)optval, &tl,
832 sizeof (struct timeval));
833 } else {
834 if (optlen != sizeof (struct timeval32)) {
835 error = EINVAL;
836 goto done;
837 }
838 TIMEVAL32_TO_TIMEVAL(&tl,
839 (struct timeval32 *)optval);
840 }
841 opt = &tl;
842 optlen = sizeof (tl);
843 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
844 mutex_enter(&so->so_lock);
845 if (option_name == SO_RCVTIMEO)
846 so->so_rcvtimeo = drv_usectohz(t_usec);
847 else
848 so->so_sndtimeo = drv_usectohz(t_usec);
849 mutex_exit(&so->so_lock);
850 break;
851 }
852 case SO_RCVBUF:
853 /*
854 * XXX XPG 4.2 applications retrieve SO_RCVBUF from
855 * sockfs since the transport might adjust the value
856 * and not return exactly what was set by the
857 * application.
858 */
859 so->so_xpg_rcvbuf = *(int32_t *)optval;
860 break;
861 }
862 }
863 error = (*so->so_downcalls->sd_setsockopt)
864 (so->so_proto_handle, level, option_name, opt, optlen, cr);
865 done:
866 SO_UNBLOCK_FALLBACK(so);
867 return (error);
868 }
869
870 int
871 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
872 struct cred *cr, int32_t *rvalp)
873 {
874 int error = 0;
875
876 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
877
878 /*
879 * If there is a pending error, return error
880 * This can happen if a non blocking operation caused an error.
881 */
882 if (so->so_error != 0) {
883 mutex_enter(&so->so_lock);
884 error = sogeterr(so, B_TRUE);
885 mutex_exit(&so->so_lock);
886 if (error != 0)
887 goto done;
888 }
889
890 /*
891 * calling strioc can result in the socket falling back to TPI,
892 * if that is supported.
893 */
894 if ((so->so_filter_active == 0 ||
895 (error = sof_filter_ioctl(so, cmd, arg, mode,
896 rvalp, cr)) < 0) &&
897 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
898 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
899 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
900 cmd, arg, mode, rvalp, cr);
901 }
902
903 done:
904 SO_UNBLOCK_FALLBACK(so);
905
906 return (error);
907 }
908
909 int
910 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
911 struct pollhead **phpp)
912 {
913 int state = so->so_state, mask;
914 *reventsp = 0;
915
916 /*
917 * In sockets the errors are represented as input/output events
918 */
919 if (so->so_error != 0 &&
920 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
921 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
922 return (0);
923 }
924
925 /*
926 * If the socket is in a state where it can send data
927 * turn on POLLWRBAND and POLLOUT events.
928 */
929 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
930 /*
931 * out of band data is allowed even if the connection
932 * is flow controlled
933 */
934 *reventsp |= POLLWRBAND & events;
935 if (!SO_SND_FLOWCTRLD(so)) {
936 /*
937 * As long as there is buffer to send data
938 * turn on POLLOUT events
939 */
940 *reventsp |= POLLOUT & events;
941 }
942 }
943
944 /*
945 * Turn on POLLIN whenever there is data on the receive queue,
946 * or the socket is in a state where no more data will be received.
947 * Also, if the socket is accepting connections, flip the bit if
948 * there is something on the queue.
949 *
950 * We do an initial check for events without holding locks. However,
951 * if there are no event available, then we redo the check for POLLIN
952 * events under the lock.
953 */
954
955 /* Pending connections */
956 if (!list_is_empty(&so->so_acceptq_list))
957 *reventsp |= (POLLIN|POLLRDNORM) & events;
958
959 /*
960 * If we're looking for POLLRDHUP, indicate it if we have sent the
961 * last rx signal for the socket.
962 */
963 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
964 *reventsp |= POLLRDHUP;
965
966 /* Data */
967 /* so_downcalls is null for sctp */
968 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
969 *reventsp |= (*so->so_downcalls->sd_poll)
970 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
971 CRED()) & events;
972 ASSERT((*reventsp & ~events) == 0);
973 /* do not recheck events */
974 events &= ~SO_PROTO_POLLEV;
975 } else {
976 if (SO_HAVE_DATA(so))
977 *reventsp |= (POLLIN|POLLRDNORM) & events;
978
979 /* Urgent data */
980 if ((state & SS_OOBPEND) != 0) {
981 *reventsp |= (POLLRDBAND | POLLPRI) & events;
982 }
983
984 /*
985 * If the socket has become disconnected, we set POLLHUP.
986 * Note that if we are in this state, we will have set POLLIN
987 * (SO_HAVE_DATA() is true on a disconnected socket), but not
988 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with
989 * the semantics of POLLHUP, which is defined to be mutually
990 * exclusive with respect to POLLOUT but not POLLIN. We are
991 * therefore setting POLLHUP primarily for the benefit of
992 * those not polling on POLLIN, as they have no other way of
993 * knowing that the socket has been disconnected.
994 */
995 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
996
997 if ((state & (mask | SS_ISCONNECTED)) == mask)
998 *reventsp |= POLLHUP;
999 }
1000
1001 if ((!*reventsp && !anyyet) || (events & POLLET)) {
1002 /* Check for read events again, but this time under lock */
1003 if (events & (POLLIN|POLLRDNORM)) {
1004 mutex_enter(&so->so_lock);
1005 if (SO_HAVE_DATA(so) ||
1006 !list_is_empty(&so->so_acceptq_list)) {
1007 if (events & POLLET) {
1008 so->so_pollev |= SO_POLLEV_IN;
1009 *phpp = &so->so_poll_list;
1010 }
1011
1012 mutex_exit(&so->so_lock);
1013 *reventsp |= (POLLIN|POLLRDNORM) & events;
1014
1015 return (0);
1016 } else {
1017 so->so_pollev |= SO_POLLEV_IN;
1018 mutex_exit(&so->so_lock);
1019 }
1020 }
1021 *phpp = &so->so_poll_list;
1022 }
1023 return (0);
1024 }
1025
1026 /*
1027 * Generic Upcalls
1028 */
1029 void
1030 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1031 cred_t *peer_cred, pid_t peer_cpid)
1032 {
1033 struct sonode *so = (struct sonode *)sock_handle;
1034
1035 mutex_enter(&so->so_lock);
1036 ASSERT(so->so_proto_handle != NULL);
1037
1038 if (peer_cred != NULL) {
1039 if (so->so_peercred != NULL)
1040 crfree(so->so_peercred);
1041 crhold(peer_cred);
1042 so->so_peercred = peer_cred;
1043 so->so_cpid = peer_cpid;
1044 }
1045
1046 so->so_proto_connid = id;
1047 soisconnected(so);
1048 /*
1049 * Wake ones who're waiting for conn to become established.
1050 */
1051 so_notify_connected(so);
1052 }
1053
1054 int
1055 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1056 {
1057 struct sonode *so = (struct sonode *)sock_handle;
1058 boolean_t connect_failed;
1059
1060 mutex_enter(&so->so_lock);
1061
1062 /*
1063 * If we aren't currently connected, then this isn't a disconnect but
1064 * rather a failure to connect.
1065 */
1066 connect_failed = !(so->so_state & SS_ISCONNECTED);
1067
1068 so->so_proto_connid = id;
1069 soisdisconnected(so, error);
1070 so_notify_disconnected(so, connect_failed, error);
1071
1072 return (0);
1073 }
1074
1075 void
1076 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1077 uintptr_t arg)
1078 {
1079 struct sonode *so = (struct sonode *)sock_handle;
1080
1081 switch (action) {
1082 case SOCK_OPCTL_SHUT_SEND:
1083 mutex_enter(&so->so_lock);
1084 socantsendmore(so);
1085 so_notify_disconnecting(so);
1086 break;
1087 case SOCK_OPCTL_SHUT_RECV: {
1088 mutex_enter(&so->so_lock);
1089 socantrcvmore(so);
1090 so_notify_eof(so);
1091 break;
1092 }
1093 case SOCK_OPCTL_ENAB_ACCEPT:
1094 mutex_enter(&so->so_lock);
1095 so->so_state |= SS_ACCEPTCONN;
1096 so->so_backlog = (unsigned int)arg;
1097 /*
1098 * The protocol can stop generating newconn upcalls when
1099 * the backlog is full, so to make sure the listener does
1100 * not end up with a queue full of deferred connections
1101 * we reduce the backlog by one. Thus the listener will
1102 * start closing deferred connections before the backlog
1103 * is full.
1104 */
1105 if (so->so_filter_active > 0)
1106 so->so_backlog = MAX(1, so->so_backlog - 1);
1107 mutex_exit(&so->so_lock);
1108 break;
1109 default:
1110 ASSERT(0);
1111 break;
1112 }
1113 }
1114
1115 void
1116 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1117 {
1118 struct sonode *so = (struct sonode *)sock_handle;
1119
1120 if (qfull) {
1121 so_snd_qfull(so);
1122 } else {
1123 so_snd_qnotfull(so);
1124 mutex_enter(&so->so_lock);
1125 /* so_notify_writable drops so_lock */
1126 so_notify_writable(so);
1127 }
1128 }
1129
1130 sock_upper_handle_t
1131 so_newconn(sock_upper_handle_t parenthandle,
1132 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1133 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1134 {
1135 struct sonode *so = (struct sonode *)parenthandle;
1136 struct sonode *nso;
1137 int error;
1138
1139 ASSERT(proto_handle != NULL);
1140
1141 if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1142 (so->so_acceptq_len >= so->so_backlog &&
1143 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1144 return (NULL);
1145 }
1146
1147 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1148 &error);
1149 if (nso == NULL)
1150 return (NULL);
1151
1152 if (peer_cred != NULL) {
1153 crhold(peer_cred);
1154 nso->so_peercred = peer_cred;
1155 nso->so_cpid = peer_cpid;
1156 }
1157 nso->so_listener = so;
1158
1159 /*
1160 * The new socket (nso), proto_handle and sock_upcallsp are all
1161 * valid at this point. But as soon as nso is placed in the accept
1162 * queue that can no longer be assumed (since an accept() thread may
1163 * pull it off the queue and close the socket).
1164 */
1165 *sock_upcallsp = &so_upcalls;
1166
1167 mutex_enter(&so->so_acceptq_lock);
1168 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1169 mutex_exit(&so->so_acceptq_lock);
1170 ASSERT(nso->so_count == 1);
1171 nso->so_count--;
1172 nso->so_listener = NULL;
1173 /* drop proto ref */
1174 VN_RELE(SOTOV(nso));
1175 socket_destroy(nso);
1176 return (NULL);
1177 } else {
1178 so->so_acceptq_len++;
1179 if (nso->so_state & SS_FIL_DEFER) {
1180 list_insert_tail(&so->so_acceptq_defer, nso);
1181 mutex_exit(&so->so_acceptq_lock);
1182 } else {
1183 list_insert_tail(&so->so_acceptq_list, nso);
1184 cv_signal(&so->so_acceptq_cv);
1185 mutex_exit(&so->so_acceptq_lock);
1186 mutex_enter(&so->so_lock);
1187 so_notify_newconn(so);
1188 }
1189
1190 return ((sock_upper_handle_t)nso);
1191 }
1192 }
1193
1194 void
1195 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1196 {
1197 struct sonode *so;
1198
1199 so = (struct sonode *)sock_handle;
1200
1201 mutex_enter(&so->so_lock);
1202
1203 if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1204 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1205 if (soppp->sopp_flags & SOCKOPT_WROFF)
1206 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1207 if (soppp->sopp_flags & SOCKOPT_TAIL)
1208 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1209 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1210 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1211 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1212 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1213 if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1214 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1215 if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1216 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1217 if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1218 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1219 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1220 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1221 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1222 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1223 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1224 }
1225
1226 if (soppp->sopp_zcopyflag & COPYCACHED) {
1227 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1228 }
1229 }
1230 if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1231 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1232 if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1233 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1234 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1235 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1236 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1237 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1238 if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1239 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1240
1241 mutex_exit(&so->so_lock);
1242
1243 if (so->so_filter_active > 0) {
1244 sof_instance_t *inst;
1245 ssize_t maxblk;
1246 ushort_t wroff, tail;
1247 maxblk = so->so_proto_props.sopp_maxblk;
1248 wroff = so->so_proto_props.sopp_wroff;
1249 tail = so->so_proto_props.sopp_tail;
1250 for (inst = so->so_filter_bottom; inst != NULL;
1251 inst = inst->sofi_prev) {
1252 if (SOF_INTERESTED(inst, mblk_prop)) {
1253 (*inst->sofi_ops->sofop_mblk_prop)(
1254 (sof_handle_t)inst, inst->sofi_cookie,
1255 &maxblk, &wroff, &tail);
1256 }
1257 }
1258 mutex_enter(&so->so_lock);
1259 so->so_proto_props.sopp_maxblk = maxblk;
1260 so->so_proto_props.sopp_wroff = wroff;
1261 so->so_proto_props.sopp_tail = tail;
1262 mutex_exit(&so->so_lock);
1263 }
1264 #ifdef DEBUG
1265 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1266 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1267 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1268 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1269 SOCKOPT_LOOPBACK);
1270 ASSERT(soppp->sopp_flags == 0);
1271 #endif
1272 }
1273
1274 /* ARGSUSED */
1275 ssize_t
1276 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1277 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp,
1278 sof_instance_t *filter)
1279 {
1280 boolean_t force_push = B_TRUE;
1281 int space_left;
1282 sodirect_t *sodp = so->so_direct;
1283
1284 ASSERT(errorp != NULL);
1285 *errorp = 0;
1286 if (mp == NULL) {
1287 if (so->so_downcalls->sd_recv_uio != NULL) {
1288 mutex_enter(&so->so_lock);
1289 /* the notify functions will drop the lock */
1290 if (flags & MSG_OOB)
1291 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1292 else
1293 so_notify_data(so, msg_size);
1294 return (0);
1295 }
1296 ASSERT(msg_size == 0);
1297 mutex_enter(&so->so_lock);
1298 goto space_check;
1299 }
1300
1301 ASSERT(mp->b_next == NULL);
1302 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1303 ASSERT(msg_size == msgdsize(mp));
1304
1305 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1306 /* The read pointer is not aligned correctly for TPI */
1307 zcmn_err(getzoneid(), CE_WARN,
1308 "sockfs: Unaligned TPI message received. rptr = %p\n",
1309 (void *)mp->b_rptr);
1310 freemsg(mp);
1311 mutex_enter(&so->so_lock);
1312 if (sodp != NULL)
1313 SOD_UIOAFINI(sodp);
1314 goto space_check;
1315 }
1316
1317 if (so->so_filter_active > 0) {
1318 for (; filter != NULL; filter = filter->sofi_prev) {
1319 if (!SOF_INTERESTED(filter, data_in))
1320 continue;
1321 mp = (*filter->sofi_ops->sofop_data_in)(
1322 (sof_handle_t)filter, filter->sofi_cookie, mp,
1323 flags, &msg_size);
1324 ASSERT(msgdsize(mp) == msg_size);
1325 DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1326 (mblk_t *), mp);
1327 /* Data was consumed/dropped, just do space check */
1328 if (msg_size == 0) {
1329 mutex_enter(&so->so_lock);
1330 goto space_check;
1331 }
1332 }
1333 }
1334
1335 if (flags & MSG_OOB) {
1336 so_queue_oob(so, mp, msg_size);
1337 mutex_enter(&so->so_lock);
1338 goto space_check;
1339 }
1340
1341 if (force_pushp != NULL)
1342 force_push = *force_pushp;
1343
1344 mutex_enter(&so->so_lock);
1345 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1346 if (sodp != NULL)
1347 SOD_DISABLE(sodp);
1348 mutex_exit(&so->so_lock);
1349 *errorp = EOPNOTSUPP;
1350 return (-1);
1351 }
1352 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1353 freemsg(mp);
1354 if (sodp != NULL)
1355 SOD_DISABLE(sodp);
1356 mutex_exit(&so->so_lock);
1357 return (0);
1358 }
1359
1360 /* process the mblk via I/OAT if capable */
1361 if (sodp != NULL && sodp->sod_enabled) {
1362 if (DB_TYPE(mp) == M_DATA) {
1363 sod_uioa_mblk_init(sodp, mp, msg_size);
1364 } else {
1365 SOD_UIOAFINI(sodp);
1366 }
1367 }
1368
1369 if (mp->b_next == NULL) {
1370 so_enqueue_msg(so, mp, msg_size);
1371 } else {
1372 do {
1373 mblk_t *nmp;
1374
1375 if ((nmp = mp->b_next) != NULL) {
1376 mp->b_next = NULL;
1377 }
1378 so_enqueue_msg(so, mp, msgdsize(mp));
1379 mp = nmp;
1380 } while (mp != NULL);
1381 }
1382
1383 space_left = so->so_rcvbuf - so->so_rcv_queued;
1384 if (space_left <= 0) {
1385 so->so_flowctrld = B_TRUE;
1386 *errorp = ENOSPC;
1387 space_left = -1;
1388 }
1389
1390 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1391 so->so_rcv_queued >= so->so_rcv_wanted) {
1392 SOCKET_TIMER_CANCEL(so);
1393 /*
1394 * so_notify_data will release the lock
1395 */
1396 so_notify_data(so, so->so_rcv_queued);
1397
1398 if (force_pushp != NULL)
1399 *force_pushp = B_TRUE;
1400 goto done;
1401 } else if (so->so_rcv_timer_tid == 0) {
1402 /* Make sure the recv push timer is running */
1403 SOCKET_TIMER_START(so);
1404 }
1405
1406 done_unlock:
1407 mutex_exit(&so->so_lock);
1408 done:
1409 return (space_left);
1410
1411 space_check:
1412 space_left = so->so_rcvbuf - so->so_rcv_queued;
1413 if (space_left <= 0) {
1414 so->so_flowctrld = B_TRUE;
1415 *errorp = ENOSPC;
1416 space_left = -1;
1417 }
1418 goto done_unlock;
1419 }
1420
1421 #pragma inline(so_queue_msg_impl)
1422
1423 ssize_t
1424 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1425 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp)
1426 {
1427 struct sonode *so = (struct sonode *)sock_handle;
1428
1429 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1430 so->so_filter_bottom));
1431 }
1432
1433 /*
1434 * Set the offset of where the oob data is relative to the bytes in
1435 * queued. Also generate SIGURG
1436 */
1437 void
1438 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1439 {
1440 struct sonode *so;
1441
1442 ASSERT(offset >= 0);
1443 so = (struct sonode *)sock_handle;
1444 mutex_enter(&so->so_lock);
1445 if (so->so_direct != NULL)
1446 SOD_UIOAFINI(so->so_direct);
1447
1448 /*
1449 * New urgent data on the way so forget about any old
1450 * urgent data.
1451 */
1452 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1453
1454 /*
1455 * Record that urgent data is pending.
1456 */
1457 so->so_state |= SS_OOBPEND;
1458
1459 if (so->so_oobmsg != NULL) {
1460 dprintso(so, 1, ("sock: discarding old oob\n"));
1461 freemsg(so->so_oobmsg);
1462 so->so_oobmsg = NULL;
1463 }
1464
1465 /*
1466 * set the offset where the urgent byte is
1467 */
1468 so->so_oobmark = so->so_rcv_queued + offset;
1469 if (so->so_oobmark == 0)
1470 so->so_state |= SS_RCVATMARK;
1471 else
1472 so->so_state &= ~SS_RCVATMARK;
1473
1474 so_notify_oobsig(so);
1475 }
1476
1477 /*
1478 * Queue the OOB byte
1479 */
1480 static void
1481 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1482 {
1483 mutex_enter(&so->so_lock);
1484 if (so->so_direct != NULL)
1485 SOD_UIOAFINI(so->so_direct);
1486
1487 ASSERT(mp != NULL);
1488 if (!IS_SO_OOB_INLINE(so)) {
1489 so->so_oobmsg = mp;
1490 so->so_state |= SS_HAVEOOBDATA;
1491 } else {
1492 so_enqueue_msg(so, mp, len);
1493 }
1494
1495 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1496 }
1497
1498 int
1499 so_close(struct sonode *so, int flag, struct cred *cr)
1500 {
1501 int error;
1502
1503 /*
1504 * No new data will be enqueued once the CLOSING flag is set.
1505 */
1506 mutex_enter(&so->so_lock);
1507 so->so_state |= SS_CLOSING;
1508 ASSERT(so_verify_oobstate(so));
1509 so_rcv_flush(so);
1510 mutex_exit(&so->so_lock);
1511
1512 if (so->so_filter_active > 0)
1513 sof_sonode_closing(so);
1514
1515 if (so->so_state & SS_ACCEPTCONN) {
1516 /*
1517 * We grab and release the accept lock to ensure that any
1518 * thread about to insert a socket in so_newconn completes
1519 * before we flush the queue. Any thread calling so_newconn
1520 * after we drop the lock will observe the SS_CLOSING flag,
1521 * which will stop it from inserting the socket in the queue.
1522 */
1523 mutex_enter(&so->so_acceptq_lock);
1524 mutex_exit(&so->so_acceptq_lock);
1525
1526 so_acceptq_flush(so, B_TRUE);
1527 }
1528
1529 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1530 switch (error) {
1531 default:
1532 /* Protocol made a synchronous close; remove proto ref */
1533 VN_RELE(SOTOV(so));
1534 break;
1535 case EINPROGRESS:
1536 /*
1537 * Protocol is in the process of closing, it will make a
1538 * 'closed' upcall to remove the reference.
1539 */
1540 error = 0;
1541 break;
1542 }
1543
1544 return (error);
1545 }
1546
1547 /*
1548 * Upcall made by the protocol when it's doing an asynchronous close. It
1549 * will drop the protocol's reference on the socket.
1550 */
1551 void
1552 so_closed(sock_upper_handle_t sock_handle)
1553 {
1554 struct sonode *so = (struct sonode *)sock_handle;
1555
1556 VN_RELE(SOTOV(so));
1557 }
1558
1559 mblk_t *
1560 so_get_sock_pid_mblk(sock_upper_handle_t sock_handle)
1561 {
1562 ulong_t sz, n;
1563 mblk_t *mblk;
1564 pid_node_t *pn;
1565 pid_t *pids;
1566 conn_pid_info_t *cpi;
1567 struct sonode *so = (struct sonode *)sock_handle;
1568
1569 mutex_enter(&so->so_pid_tree_lock);
1570
1571 n = avl_numnodes(&so->so_pid_tree);
1572 sz = sizeof (conn_pid_info_t);
1573 sz += (n > 1) ? ((n - 1) * sizeof (pid_t)) : 0;
1574 if ((mblk = allocb(sz, BPRI_HI)) == NULL) {
1575 mutex_exit(&so->so_pid_tree_lock);
1576 return (NULL);
1577 }
1578 mblk->b_wptr += sz;
1579 cpi = (conn_pid_info_t *)mblk->b_datap->db_base;
1580
1581 cpi->cpi_magic = CONN_PID_INFO_MGC;
1582 cpi->cpi_contents = CONN_PID_INFO_SOC;
1583 cpi->cpi_pids_cnt = n;
1584 cpi->cpi_tot_size = sz;
1585 cpi->cpi_pids[0] = 0;
1586
1587 if (cpi->cpi_pids_cnt > 0) {
1588 pids = cpi->cpi_pids;
1589 for (pn = avl_first(&so->so_pid_tree); pn != NULL;
1590 pids++, pn = AVL_NEXT(&so->so_pid_tree, pn))
1591 *pids = pn->pn_pid;
1592 }
1593 mutex_exit(&so->so_pid_tree_lock);
1594 return (mblk);
1595 }
1596
1597 void
1598 so_zcopy_notify(sock_upper_handle_t sock_handle)
1599 {
1600 struct sonode *so = (struct sonode *)sock_handle;
1601
1602 mutex_enter(&so->so_lock);
1603 so->so_copyflag |= STZCNOTIFY;
1604 cv_broadcast(&so->so_copy_cv);
1605 mutex_exit(&so->so_lock);
1606 }
1607
1608 void
1609 so_set_error(sock_upper_handle_t sock_handle, int error)
1610 {
1611 struct sonode *so = (struct sonode *)sock_handle;
1612
1613 mutex_enter(&so->so_lock);
1614
1615 soseterror(so, error);
1616
1617 so_notify_error(so);
1618 }
1619
1620 /*
1621 * so_recvmsg - read data from the socket
1622 *
1623 * There are two ways of obtaining data; either we ask the protocol to
1624 * copy directly into the supplied buffer, or we copy data from the
1625 * sonode's receive queue. The decision which one to use depends on
1626 * whether the protocol has a sd_recv_uio down call.
1627 */
1628 int
1629 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1630 struct cred *cr)
1631 {
1632 rval_t rval;
1633 int flags = 0;
1634 t_uscalar_t controllen, namelen;
1635 int error = 0;
1636 int ret;
1637 mblk_t *mctlp = NULL;
1638 union T_primitives *tpr;
1639 void *control;
1640 ssize_t saved_resid;
1641 struct uio *suiop;
1642
1643 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1644
1645 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1646 (so->so_mode & SM_CONNREQUIRED)) {
1647 SO_UNBLOCK_FALLBACK(so);
1648 return (ENOTCONN);
1649 }
1650
1651 if (msg->msg_flags & MSG_PEEK)
1652 msg->msg_flags &= ~MSG_WAITALL;
1653
1654 if (so->so_mode & SM_ATOMIC)
1655 msg->msg_flags |= MSG_TRUNC;
1656
1657 if (msg->msg_flags & MSG_OOB) {
1658 if ((so->so_mode & SM_EXDATA) == 0) {
1659 error = EOPNOTSUPP;
1660 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1661 error = (*so->so_downcalls->sd_recv_uio)
1662 (so->so_proto_handle, uiop, msg, cr);
1663 } else {
1664 error = sorecvoob(so, msg, uiop, msg->msg_flags,
1665 IS_SO_OOB_INLINE(so));
1666 }
1667 SO_UNBLOCK_FALLBACK(so);
1668 return (error);
1669 }
1670
1671 /*
1672 * If the protocol has the recv down call, then pass the request
1673 * down.
1674 */
1675 if (so->so_downcalls->sd_recv_uio != NULL) {
1676 error = (*so->so_downcalls->sd_recv_uio)
1677 (so->so_proto_handle, uiop, msg, cr);
1678 SO_UNBLOCK_FALLBACK(so);
1679 return (error);
1680 }
1681
1682 /*
1683 * Reading data from the socket buffer
1684 */
1685 flags = msg->msg_flags;
1686 msg->msg_flags = 0;
1687
1688 /*
1689 * Set msg_controllen and msg_namelen to zero here to make it
1690 * simpler in the cases that no control or name is returned.
1691 */
1692 controllen = msg->msg_controllen;
1693 namelen = msg->msg_namelen;
1694 msg->msg_controllen = 0;
1695 msg->msg_namelen = 0;
1696
1697 mutex_enter(&so->so_lock);
1698 /* Set SOREADLOCKED */
1699 error = so_lock_read_intr(so,
1700 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1701 mutex_exit(&so->so_lock);
1702 if (error) {
1703 SO_UNBLOCK_FALLBACK(so);
1704 return (error);
1705 }
1706
1707 suiop = sod_rcv_init(so, flags, &uiop);
1708 retry:
1709 saved_resid = uiop->uio_resid;
1710 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1711 if (error != 0) {
1712 goto out;
1713 }
1714 /*
1715 * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1716 * For non-datagrams MOREDATA is used to set MSG_EOR.
1717 */
1718 ASSERT(!(rval.r_val1 & MORECTL));
1719 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1720 msg->msg_flags |= MSG_TRUNC;
1721 if (mctlp == NULL) {
1722 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1723
1724 mutex_enter(&so->so_lock);
1725 /* Set MSG_EOR based on MOREDATA */
1726 if (!(rval.r_val1 & MOREDATA)) {
1727 if (so->so_state & SS_SAVEDEOR) {
1728 msg->msg_flags |= MSG_EOR;
1729 so->so_state &= ~SS_SAVEDEOR;
1730 }
1731 }
1732 /*
1733 * If some data was received (i.e. not EOF) and the
1734 * read/recv* has not been satisfied wait for some more.
1735 */
1736 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1737 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1738 mutex_exit(&so->so_lock);
1739 flags |= MSG_NOMARK;
1740 goto retry;
1741 }
1742
1743 goto out_locked;
1744 }
1745 /* so_queue_msg has already verified length and alignment */
1746 tpr = (union T_primitives *)mctlp->b_rptr;
1747 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1748 switch (tpr->type) {
1749 case T_DATA_IND: {
1750 /*
1751 * Set msg_flags to MSG_EOR based on
1752 * MORE_flag and MOREDATA.
1753 */
1754 mutex_enter(&so->so_lock);
1755 so->so_state &= ~SS_SAVEDEOR;
1756 if (!(tpr->data_ind.MORE_flag & 1)) {
1757 if (!(rval.r_val1 & MOREDATA))
1758 msg->msg_flags |= MSG_EOR;
1759 else
1760 so->so_state |= SS_SAVEDEOR;
1761 }
1762 freemsg(mctlp);
1763 /*
1764 * If some data was received (i.e. not EOF) and the
1765 * read/recv* has not been satisfied wait for some more.
1766 */
1767 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1768 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1769 mutex_exit(&so->so_lock);
1770 flags |= MSG_NOMARK;
1771 goto retry;
1772 }
1773 goto out_locked;
1774 }
1775 case T_UNITDATA_IND: {
1776 void *addr;
1777 t_uscalar_t addrlen;
1778 void *abuf;
1779 t_uscalar_t optlen;
1780 void *opt;
1781
1782 if (namelen != 0) {
1783 /* Caller wants source address */
1784 addrlen = tpr->unitdata_ind.SRC_length;
1785 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1786 addrlen, 1);
1787 if (addr == NULL) {
1788 freemsg(mctlp);
1789 error = EPROTO;
1790 eprintsoline(so, error);
1791 goto out;
1792 }
1793 ASSERT(so->so_family != AF_UNIX);
1794 }
1795 optlen = tpr->unitdata_ind.OPT_length;
1796 if (optlen != 0) {
1797 t_uscalar_t ncontrollen;
1798
1799 /*
1800 * Extract any source address option.
1801 * Determine how large cmsg buffer is needed.
1802 */
1803 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1804 optlen, __TPI_ALIGN_SIZE);
1805
1806 if (opt == NULL) {
1807 freemsg(mctlp);
1808 error = EPROTO;
1809 eprintsoline(so, error);
1810 goto out;
1811 }
1812 if (so->so_family == AF_UNIX)
1813 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1814 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1815 !(flags & MSG_XPG4_2));
1816 if (controllen != 0)
1817 controllen = ncontrollen;
1818 else if (ncontrollen != 0)
1819 msg->msg_flags |= MSG_CTRUNC;
1820 } else {
1821 controllen = 0;
1822 }
1823
1824 if (namelen != 0) {
1825 /*
1826 * Return address to caller.
1827 * Caller handles truncation if length
1828 * exceeds msg_namelen.
1829 * NOTE: AF_UNIX NUL termination is ensured by
1830 * the sender's copyin_name().
1831 */
1832 abuf = kmem_alloc(addrlen, KM_SLEEP);
1833
1834 bcopy(addr, abuf, addrlen);
1835 msg->msg_name = abuf;
1836 msg->msg_namelen = addrlen;
1837 }
1838
1839 if (controllen != 0) {
1840 /*
1841 * Return control msg to caller.
1842 * Caller handles truncation if length
1843 * exceeds msg_controllen.
1844 */
1845 control = kmem_zalloc(controllen, KM_SLEEP);
1846
1847 error = so_opt2cmsg(mctlp, opt, optlen,
1848 !(flags & MSG_XPG4_2), control, controllen);
1849 if (error) {
1850 freemsg(mctlp);
1851 if (msg->msg_namelen != 0)
1852 kmem_free(msg->msg_name,
1853 msg->msg_namelen);
1854 kmem_free(control, controllen);
1855 eprintsoline(so, error);
1856 goto out;
1857 }
1858 msg->msg_control = control;
1859 msg->msg_controllen = controllen;
1860 }
1861
1862 freemsg(mctlp);
1863 goto out;
1864 }
1865 case T_OPTDATA_IND: {
1866 struct T_optdata_req *tdr;
1867 void *opt;
1868 t_uscalar_t optlen;
1869
1870 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1871 optlen = tdr->OPT_length;
1872 if (optlen != 0) {
1873 t_uscalar_t ncontrollen;
1874 /*
1875 * Determine how large cmsg buffer is needed.
1876 */
1877 opt = sogetoff(mctlp,
1878 tpr->optdata_ind.OPT_offset, optlen,
1879 __TPI_ALIGN_SIZE);
1880
1881 if (opt == NULL) {
1882 freemsg(mctlp);
1883 error = EPROTO;
1884 eprintsoline(so, error);
1885 goto out;
1886 }
1887
1888 ncontrollen = so_cmsglen(mctlp, opt, optlen,
1889 !(flags & MSG_XPG4_2));
1890 if (controllen != 0)
1891 controllen = ncontrollen;
1892 else if (ncontrollen != 0)
1893 msg->msg_flags |= MSG_CTRUNC;
1894 } else {
1895 controllen = 0;
1896 }
1897
1898 if (controllen != 0) {
1899 /*
1900 * Return control msg to caller.
1901 * Caller handles truncation if length
1902 * exceeds msg_controllen.
1903 */
1904 control = kmem_zalloc(controllen, KM_SLEEP);
1905
1906 error = so_opt2cmsg(mctlp, opt, optlen,
1907 !(flags & MSG_XPG4_2), control, controllen);
1908 if (error) {
1909 freemsg(mctlp);
1910 kmem_free(control, controllen);
1911 eprintsoline(so, error);
1912 goto out;
1913 }
1914 msg->msg_control = control;
1915 msg->msg_controllen = controllen;
1916 }
1917
1918 /*
1919 * Set msg_flags to MSG_EOR based on
1920 * DATA_flag and MOREDATA.
1921 */
1922 mutex_enter(&so->so_lock);
1923 so->so_state &= ~SS_SAVEDEOR;
1924 if (!(tpr->data_ind.MORE_flag & 1)) {
1925 if (!(rval.r_val1 & MOREDATA))
1926 msg->msg_flags |= MSG_EOR;
1927 else
1928 so->so_state |= SS_SAVEDEOR;
1929 }
1930 freemsg(mctlp);
1931 /*
1932 * If some data was received (i.e. not EOF) and the
1933 * read/recv* has not been satisfied wait for some more.
1934 * Not possible to wait if control info was received.
1935 */
1936 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1937 controllen == 0 &&
1938 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1939 mutex_exit(&so->so_lock);
1940 flags |= MSG_NOMARK;
1941 goto retry;
1942 }
1943 goto out_locked;
1944 }
1945 default:
1946 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1947 tpr->type);
1948 freemsg(mctlp);
1949 error = EPROTO;
1950 ASSERT(0);
1951 }
1952 out:
1953 mutex_enter(&so->so_lock);
1954 out_locked:
1955 ret = sod_rcv_done(so, suiop, uiop);
1956 if (ret != 0 && error == 0)
1957 error = ret;
1958
1959 so_unlock_read(so); /* Clear SOREADLOCKED */
1960 mutex_exit(&so->so_lock);
1961
1962 SO_UNBLOCK_FALLBACK(so);
1963
1964 return (error);
1965 }
1966
1967 sonodeops_t so_sonodeops = {
1968 so_init, /* sop_init */
1969 so_accept, /* sop_accept */
1970 so_bind, /* sop_bind */
1971 so_listen, /* sop_listen */
1972 so_connect, /* sop_connect */
1973 so_recvmsg, /* sop_recvmsg */
1974 so_sendmsg, /* sop_sendmsg */
1975 so_sendmblk, /* sop_sendmblk */
1976 so_getpeername, /* sop_getpeername */
1977 so_getsockname, /* sop_getsockname */
1978 so_shutdown, /* sop_shutdown */
1979 so_getsockopt, /* sop_getsockopt */
1980 so_setsockopt, /* sop_setsockopt */
1981 so_ioctl, /* sop_ioctl */
1982 so_poll, /* sop_poll */
1983 so_close, /* sop_close */
1984 };
1985
1986 sock_upcalls_t so_upcalls = {
1987 so_newconn,
1988 so_connected,
1989 so_disconnected,
1990 so_opctl,
1991 so_queue_msg,
1992 so_set_prop,
1993 so_txq_full,
1994 so_signal_oob,
1995 so_zcopy_notify,
1996 so_set_error,
1997 so_closed,
1998 so_get_sock_pid_mblk
1999 };