1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 37 #include <sys/stropts.h> 38 #include <sys/socket.h> 39 #include <sys/socketvar.h> 40 #include <sys/fcntl.h> 41 42 #define _SUN_TPI_VERSION 2 43 #include <sys/tihdr.h> 44 #include <sys/sockio.h> 45 #include <sys/kmem_impl.h> 46 47 #include <sys/strsubr.h> 48 #include <sys/strsun.h> 49 #include <sys/ddi.h> 50 #include <netinet/in.h> 51 #include <inet/ip.h> 52 53 #include <fs/sockfs/sockcommon.h> 54 #include <fs/sockfs/sockfilter_impl.h> 55 56 #include <sys/socket_proto.h> 57 58 #include <fs/sockfs/socktpi_impl.h> 59 #include <fs/sockfs/sodirect.h> 60 #include <sys/tihdr.h> 61 #include <fs/sockfs/nl7c.h> 62 63 extern int xnet_skip_checks; 64 extern int xnet_check_print; 65 66 static void so_queue_oob(struct sonode *, mblk_t *, size_t); 67 68 69 /*ARGSUSED*/ 70 int 71 so_accept_notsupp(struct sonode *lso, int fflag, 72 struct cred *cr, struct sonode **nsop) 73 { 74 return (EOPNOTSUPP); 75 } 76 77 /*ARGSUSED*/ 78 int 79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 80 { 81 return (EOPNOTSUPP); 82 } 83 84 /*ARGSUSED*/ 85 int 86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 87 socklen_t *len, struct cred *cr) 88 { 89 return (EOPNOTSUPP); 90 } 91 92 /*ARGSUSED*/ 93 int 94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 95 socklen_t *addrlen, boolean_t accept, struct cred *cr) 96 { 97 return (EOPNOTSUPP); 98 } 99 100 /*ARGSUSED*/ 101 int 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 103 { 104 return (EOPNOTSUPP); 105 } 106 107 /*ARGSUSED*/ 108 int 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 110 struct cred *cr, mblk_t **mpp) 111 { 112 return (EOPNOTSUPP); 113 } 114 115 /* 116 * Generic Socket Ops 117 */ 118 119 /* ARGSUSED */ 120 int 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 122 { 123 return (socket_init_common(so, pso, flags, cr)); 124 } 125 126 int 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 128 int flags, struct cred *cr) 129 { 130 int error; 131 132 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 133 134 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 135 136 /* X/Open requires this check */ 137 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 138 if (xnet_check_print) { 139 printf("sockfs: X/Open bind state check " 140 "caused EINVAL\n"); 141 } 142 error = EINVAL; 143 goto done; 144 } 145 146 /* 147 * a bind to a NULL address is interpreted as unbind. So just 148 * do the downcall. 149 */ 150 if (name == NULL) 151 goto dobind; 152 153 switch (so->so_family) { 154 case AF_INET: 155 if ((size_t)namelen != sizeof (sin_t)) { 156 error = name->sa_family != so->so_family ? 157 EAFNOSUPPORT : EINVAL; 158 eprintsoline(so, error); 159 goto done; 160 } 161 162 if ((flags & _SOBIND_XPG4_2) && 163 (name->sa_family != so->so_family)) { 164 /* 165 * This check has to be made for X/Open 166 * sockets however application failures have 167 * been observed when it is applied to 168 * all sockets. 169 */ 170 error = EAFNOSUPPORT; 171 eprintsoline(so, error); 172 goto done; 173 } 174 /* 175 * Force a zero sa_family to match so_family. 176 * 177 * Some programs like inetd(1M) don't set the 178 * family field. Other programs leave 179 * sin_family set to garbage - SunOS 4.X does 180 * not check the family field on a bind. 181 * We use the family field that 182 * was passed in to the socket() call. 183 */ 184 name->sa_family = so->so_family; 185 break; 186 187 case AF_INET6: { 188 #ifdef DEBUG 189 sin6_t *sin6 = (sin6_t *)name; 190 #endif 191 if ((size_t)namelen != sizeof (sin6_t)) { 192 error = name->sa_family != so->so_family ? 193 EAFNOSUPPORT : EINVAL; 194 eprintsoline(so, error); 195 goto done; 196 } 197 198 if (name->sa_family != so->so_family) { 199 /* 200 * With IPv6 we require the family to match 201 * unlike in IPv4. 202 */ 203 error = EAFNOSUPPORT; 204 eprintsoline(so, error); 205 goto done; 206 } 207 #ifdef DEBUG 208 /* 209 * Verify that apps don't forget to clear 210 * sin6_scope_id etc 211 */ 212 if (sin6->sin6_scope_id != 0 && 213 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 214 zcmn_err(getzoneid(), CE_WARN, 215 "bind with uninitialized sin6_scope_id " 216 "(%d) on socket. Pid = %d\n", 217 (int)sin6->sin6_scope_id, 218 (int)curproc->p_pid); 219 } 220 if (sin6->__sin6_src_id != 0) { 221 zcmn_err(getzoneid(), CE_WARN, 222 "bind with uninitialized __sin6_src_id " 223 "(%d) on socket. Pid = %d\n", 224 (int)sin6->__sin6_src_id, 225 (int)curproc->p_pid); 226 } 227 #endif /* DEBUG */ 228 229 break; 230 } 231 default: 232 /* Just pass the request to the protocol */ 233 goto dobind; 234 } 235 236 /* 237 * First we check if either NCA or KSSL has been enabled for 238 * the requested address, and if so, we fall back to TPI. 239 * If neither of those two services are enabled, then we just 240 * pass the request to the protocol. 241 * 242 * Note that KSSL can only be enabled on a socket if NCA is NOT 243 * enabled for that socket, hence the else-statement below. 244 */ 245 if (nl7c_enabled && ((so->so_family == AF_INET || 246 so->so_family == AF_INET6) && 247 nl7c_lookup_addr(name, namelen) != NULL)) { 248 /* 249 * NL7C is not supported in non-global zones, 250 * we enforce this restriction here. 251 */ 252 if (so->so_zoneid == GLOBAL_ZONEID) { 253 /* NCA should be used, so fall back to TPI */ 254 error = so_tpi_fallback(so, cr); 255 SO_UNBLOCK_FALLBACK(so); 256 if (error) 257 return (error); 258 else 259 return (SOP_BIND(so, name, namelen, flags, cr)); 260 } 261 } 262 263 dobind: 264 if (so->so_filter_active == 0 || 265 (error = sof_filter_bind(so, name, &namelen, cr)) < 0) { 266 error = (*so->so_downcalls->sd_bind) 267 (so->so_proto_handle, name, namelen, cr); 268 } 269 done: 270 SO_UNBLOCK_FALLBACK(so); 271 272 return (error); 273 } 274 275 int 276 so_listen(struct sonode *so, int backlog, struct cred *cr) 277 { 278 int error = 0; 279 280 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 281 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 282 283 if ((so)->so_filter_active == 0 || 284 (error = sof_filter_listen(so, &backlog, cr)) < 0) 285 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, 286 backlog, cr); 287 288 SO_UNBLOCK_FALLBACK(so); 289 290 return (error); 291 } 292 293 294 int 295 so_connect(struct sonode *so, struct sockaddr *name, 296 socklen_t namelen, int fflag, int flags, struct cred *cr) 297 { 298 int error = 0; 299 sock_connid_t id; 300 301 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 302 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 303 304 /* 305 * If there is a pending error, return error 306 * This can happen if a non blocking operation caused an error. 307 */ 308 309 if (so->so_error != 0) { 310 mutex_enter(&so->so_lock); 311 error = sogeterr(so, B_TRUE); 312 mutex_exit(&so->so_lock); 313 if (error != 0) 314 goto done; 315 } 316 317 if (so->so_filter_active == 0 || 318 (error = sof_filter_connect(so, (struct sockaddr *)name, 319 &namelen, cr)) < 0) { 320 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 321 name, namelen, &id, cr); 322 323 if (error == EINPROGRESS) 324 error = so_wait_connected(so, 325 fflag & (FNONBLOCK|FNDELAY), id); 326 } 327 done: 328 SO_UNBLOCK_FALLBACK(so); 329 return (error); 330 } 331 332 /*ARGSUSED*/ 333 int 334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 335 { 336 int error = 0; 337 struct sonode *nso; 338 339 *nsop = NULL; 340 341 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 342 if ((so->so_state & SS_ACCEPTCONN) == 0) { 343 SO_UNBLOCK_FALLBACK(so); 344 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 345 EOPNOTSUPP : EINVAL); 346 } 347 348 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 349 &nso)) == 0) { 350 ASSERT(nso != NULL); 351 352 /* finish the accept */ 353 if ((so->so_filter_active > 0 && 354 (error = sof_filter_accept(nso, cr)) > 0) || 355 (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 356 nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) { 357 (void) socket_close(nso, 0, cr); 358 socket_destroy(nso); 359 } else { 360 *nsop = nso; 361 if (!(curproc->p_flag & SSYS)) 362 sonode_insert_pid(nso, curproc->p_pidp->pid_id); 363 } 364 } 365 366 SO_UNBLOCK_FALLBACK(so); 367 return (error); 368 } 369 370 int 371 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 372 struct cred *cr) 373 { 374 int error, flags; 375 boolean_t dontblock; 376 ssize_t orig_resid; 377 mblk_t *mp; 378 379 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 380 381 flags = msg->msg_flags; 382 error = 0; 383 dontblock = (flags & MSG_DONTWAIT) || 384 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 385 386 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 387 /* 388 * Old way of passing fd's is not supported 389 */ 390 SO_UNBLOCK_FALLBACK(so); 391 return (EOPNOTSUPP); 392 } 393 394 if ((so->so_mode & SM_ATOMIC) && 395 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 396 so->so_proto_props.sopp_maxpsz != -1) { 397 SO_UNBLOCK_FALLBACK(so); 398 return (EMSGSIZE); 399 } 400 401 /* 402 * For atomic sends we will only do one iteration. 403 */ 404 do { 405 if (so->so_state & SS_CANTSENDMORE) { 406 error = EPIPE; 407 break; 408 } 409 410 if (so->so_error != 0) { 411 mutex_enter(&so->so_lock); 412 error = sogeterr(so, B_TRUE); 413 mutex_exit(&so->so_lock); 414 if (error != 0) 415 break; 416 } 417 418 /* 419 * Send down OOB messages even if the send path is being 420 * flow controlled (assuming the protocol supports OOB data). 421 */ 422 if (flags & MSG_OOB) { 423 if ((so->so_mode & SM_EXDATA) == 0) { 424 error = EOPNOTSUPP; 425 break; 426 } 427 } else if (SO_SND_FLOWCTRLD(so)) { 428 /* 429 * Need to wait until the protocol is ready to receive 430 * more data for transmission. 431 */ 432 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 433 break; 434 } 435 436 /* 437 * Time to send data to the protocol. We either copy the 438 * data into mblks or pass the uio directly to the protocol. 439 * We decide what to do based on the available down calls. 440 */ 441 if (so->so_downcalls->sd_send_uio != NULL) { 442 error = (*so->so_downcalls->sd_send_uio) 443 (so->so_proto_handle, uiop, msg, cr); 444 if (error != 0) 445 break; 446 } else { 447 /* save the resid in case of failure */ 448 orig_resid = uiop->uio_resid; 449 450 if ((mp = socopyinuio(uiop, 451 so->so_proto_props.sopp_maxpsz, 452 so->so_proto_props.sopp_wroff, 453 so->so_proto_props.sopp_maxblk, 454 so->so_proto_props.sopp_tail, &error)) == NULL) { 455 break; 456 } 457 ASSERT(uiop->uio_resid >= 0); 458 459 if (so->so_filter_active > 0 && 460 ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr, 461 &error)) == NULL)) { 462 if (error != 0) 463 break; 464 continue; 465 } 466 error = (*so->so_downcalls->sd_send) 467 (so->so_proto_handle, mp, msg, cr); 468 if (error != 0) { 469 /* 470 * The send failed. We do not have to free the 471 * mblks, because that is the protocol's 472 * responsibility. However, uio_resid must 473 * remain accurate, so adjust that here. 474 */ 475 uiop->uio_resid = orig_resid; 476 break; 477 } 478 } 479 } while (uiop->uio_resid > 0); 480 481 SO_UNBLOCK_FALLBACK(so); 482 483 return (error); 484 } 485 486 int 487 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag, 488 struct cred *cr, mblk_t **mpp, sof_instance_t *fil, 489 boolean_t fil_inject) 490 { 491 int error; 492 boolean_t dontblock; 493 size_t size; 494 mblk_t *mp = *mpp; 495 496 if (so->so_downcalls->sd_send == NULL) 497 return (EOPNOTSUPP); 498 499 error = 0; 500 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 501 (fflag & (FNONBLOCK|FNDELAY)); 502 size = msgdsize(mp); 503 504 if ((so->so_mode & SM_ATOMIC) && 505 size > so->so_proto_props.sopp_maxpsz && 506 so->so_proto_props.sopp_maxpsz != -1) { 507 SO_UNBLOCK_FALLBACK(so); 508 return (EMSGSIZE); 509 } 510 511 while (mp != NULL) { 512 mblk_t *nmp, *last_mblk; 513 size_t mlen; 514 515 if (so->so_state & SS_CANTSENDMORE) { 516 error = EPIPE; 517 break; 518 } 519 if (so->so_error != 0) { 520 mutex_enter(&so->so_lock); 521 error = sogeterr(so, B_TRUE); 522 mutex_exit(&so->so_lock); 523 if (error != 0) 524 break; 525 } 526 /* Socket filters are not flow controlled */ 527 if (SO_SND_FLOWCTRLD(so) && !fil_inject) { 528 /* 529 * Need to wait until the protocol is ready to receive 530 * more data for transmission. 531 */ 532 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 533 break; 534 } 535 536 /* 537 * We only allow so_maxpsz of data to be sent down to 538 * the protocol at time. 539 */ 540 mlen = MBLKL(mp); 541 nmp = mp->b_cont; 542 last_mblk = mp; 543 while (nmp != NULL) { 544 mlen += MBLKL(nmp); 545 if (mlen > so->so_proto_props.sopp_maxpsz) { 546 last_mblk->b_cont = NULL; 547 break; 548 } 549 last_mblk = nmp; 550 nmp = nmp->b_cont; 551 } 552 553 if (so->so_filter_active > 0 && 554 (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg, 555 cr, &error)) == NULL) { 556 *mpp = mp = nmp; 557 if (error != 0) 558 break; 559 continue; 560 } 561 error = (*so->so_downcalls->sd_send) 562 (so->so_proto_handle, mp, msg, cr); 563 if (error != 0) { 564 /* 565 * The send failed. The protocol will free the mblks 566 * that were sent down. Let the caller deal with the 567 * rest. 568 */ 569 *mpp = nmp; 570 break; 571 } 572 573 *mpp = mp = nmp; 574 } 575 /* Let the filter know whether the protocol is flow controlled */ 576 if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so)) 577 error = ENOSPC; 578 579 return (error); 580 } 581 582 #pragma inline(so_sendmblk_impl) 583 584 int 585 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 586 struct cred *cr, mblk_t **mpp) 587 { 588 int error; 589 590 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 591 592 if ((so->so_mode & SM_SENDFILESUPP) == 0) { 593 SO_UNBLOCK_FALLBACK(so); 594 return (EOPNOTSUPP); 595 } 596 597 error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top, 598 B_FALSE); 599 600 SO_UNBLOCK_FALLBACK(so); 601 602 return (error); 603 } 604 605 int 606 so_shutdown(struct sonode *so, int how, struct cred *cr) 607 { 608 int error; 609 610 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 611 612 /* 613 * SunOS 4.X has no check for datagram sockets. 614 * 5.X checks that it is connected (ENOTCONN) 615 * X/Open requires that we check the connected state. 616 */ 617 if (!(so->so_state & SS_ISCONNECTED)) { 618 if (!xnet_skip_checks) { 619 error = ENOTCONN; 620 if (xnet_check_print) { 621 printf("sockfs: X/Open shutdown check " 622 "caused ENOTCONN\n"); 623 } 624 } 625 goto done; 626 } 627 628 if (so->so_filter_active == 0 || 629 (error = sof_filter_shutdown(so, &how, cr)) < 0) 630 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 631 how, cr)); 632 633 /* 634 * Protocol agreed to shutdown. We need to flush the 635 * receive buffer if the receive side is being shutdown. 636 */ 637 if (error == 0 && how != SHUT_WR) { 638 mutex_enter(&so->so_lock); 639 /* wait for active reader to finish */ 640 (void) so_lock_read(so, 0); 641 642 so_rcv_flush(so); 643 644 so_unlock_read(so); 645 mutex_exit(&so->so_lock); 646 } 647 648 done: 649 SO_UNBLOCK_FALLBACK(so); 650 return (error); 651 } 652 653 int 654 so_getsockname(struct sonode *so, struct sockaddr *addr, 655 socklen_t *addrlen, struct cred *cr) 656 { 657 int error; 658 659 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 660 661 if (so->so_filter_active == 0 || 662 (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0) 663 error = (*so->so_downcalls->sd_getsockname) 664 (so->so_proto_handle, addr, addrlen, cr); 665 666 SO_UNBLOCK_FALLBACK(so); 667 return (error); 668 } 669 670 int 671 so_getpeername(struct sonode *so, struct sockaddr *addr, 672 socklen_t *addrlen, boolean_t accept, struct cred *cr) 673 { 674 int error; 675 676 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 677 678 if (accept) { 679 error = (*so->so_downcalls->sd_getpeername) 680 (so->so_proto_handle, addr, addrlen, cr); 681 } else if (!(so->so_state & SS_ISCONNECTED)) { 682 error = ENOTCONN; 683 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 684 /* Added this check for X/Open */ 685 error = EINVAL; 686 if (xnet_check_print) { 687 printf("sockfs: X/Open getpeername check => EINVAL\n"); 688 } 689 } else if (so->so_filter_active == 0 || 690 (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) { 691 error = (*so->so_downcalls->sd_getpeername) 692 (so->so_proto_handle, addr, addrlen, cr); 693 } 694 695 SO_UNBLOCK_FALLBACK(so); 696 return (error); 697 } 698 699 int 700 so_getsockopt(struct sonode *so, int level, int option_name, 701 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 702 { 703 int error = 0; 704 705 if (level == SOL_FILTER) 706 return (sof_getsockopt(so, option_name, optval, optlenp, cr)); 707 708 SO_BLOCK_FALLBACK(so, 709 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 710 711 if ((so->so_filter_active == 0 || 712 (error = sof_filter_getsockopt(so, level, option_name, optval, 713 optlenp, cr)) < 0) && 714 (error = socket_getopt_common(so, level, option_name, optval, 715 optlenp, flags)) < 0) { 716 error = (*so->so_downcalls->sd_getsockopt) 717 (so->so_proto_handle, level, option_name, optval, optlenp, 718 cr); 719 if (error == ENOPROTOOPT) { 720 if (level == SOL_SOCKET) { 721 /* 722 * If a protocol does not support a particular 723 * socket option, set can fail (not allowed) 724 * but get can not fail. This is the previous 725 * sockfs bahvior. 726 */ 727 switch (option_name) { 728 case SO_LINGER: 729 if (*optlenp < (t_uscalar_t) 730 sizeof (struct linger)) { 731 error = EINVAL; 732 break; 733 } 734 error = 0; 735 bzero(optval, sizeof (struct linger)); 736 *optlenp = sizeof (struct linger); 737 break; 738 case SO_RCVTIMEO: 739 case SO_SNDTIMEO: 740 if (*optlenp < (t_uscalar_t) 741 sizeof (struct timeval)) { 742 error = EINVAL; 743 break; 744 } 745 error = 0; 746 bzero(optval, sizeof (struct timeval)); 747 *optlenp = sizeof (struct timeval); 748 break; 749 case SO_SND_BUFINFO: 750 if (*optlenp < (t_uscalar_t) 751 sizeof (struct so_snd_bufinfo)) { 752 error = EINVAL; 753 break; 754 } 755 error = 0; 756 bzero(optval, 757 sizeof (struct so_snd_bufinfo)); 758 *optlenp = 759 sizeof (struct so_snd_bufinfo); 760 break; 761 case SO_DEBUG: 762 case SO_REUSEADDR: 763 case SO_KEEPALIVE: 764 case SO_DONTROUTE: 765 case SO_BROADCAST: 766 case SO_USELOOPBACK: 767 case SO_OOBINLINE: 768 case SO_DGRAM_ERRIND: 769 case SO_SNDBUF: 770 case SO_RCVBUF: 771 error = 0; 772 *((int32_t *)optval) = 0; 773 *optlenp = sizeof (int32_t); 774 break; 775 default: 776 break; 777 } 778 } 779 } 780 } 781 782 SO_UNBLOCK_FALLBACK(so); 783 return (error); 784 } 785 786 int 787 so_setsockopt(struct sonode *so, int level, int option_name, 788 const void *optval, socklen_t optlen, struct cred *cr) 789 { 790 int error = 0; 791 struct timeval tl; 792 const void *opt = optval; 793 794 if (level == SOL_FILTER) 795 return (sof_setsockopt(so, option_name, optval, optlen, cr)); 796 797 SO_BLOCK_FALLBACK(so, 798 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 799 800 /* X/Open requires this check */ 801 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 802 SO_UNBLOCK_FALLBACK(so); 803 if (xnet_check_print) 804 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 805 return (EINVAL); 806 } 807 808 if (so->so_filter_active > 0 && 809 (error = sof_filter_setsockopt(so, level, option_name, 810 (void *)optval, &optlen, cr)) >= 0) 811 goto done; 812 813 if (level == SOL_SOCKET) { 814 switch (option_name) { 815 case SO_RCVTIMEO: 816 case SO_SNDTIMEO: { 817 /* 818 * We pass down these two options to protocol in order 819 * to support some third part protocols which need to 820 * know them. For those protocols which don't care 821 * these two options, simply return 0. 822 */ 823 clock_t t_usec; 824 825 if (get_udatamodel() == DATAMODEL_NONE || 826 get_udatamodel() == DATAMODEL_NATIVE) { 827 if (optlen != sizeof (struct timeval)) { 828 error = EINVAL; 829 goto done; 830 } 831 bcopy((struct timeval *)optval, &tl, 832 sizeof (struct timeval)); 833 } else { 834 if (optlen != sizeof (struct timeval32)) { 835 error = EINVAL; 836 goto done; 837 } 838 TIMEVAL32_TO_TIMEVAL(&tl, 839 (struct timeval32 *)optval); 840 } 841 opt = &tl; 842 optlen = sizeof (tl); 843 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 844 mutex_enter(&so->so_lock); 845 if (option_name == SO_RCVTIMEO) 846 so->so_rcvtimeo = drv_usectohz(t_usec); 847 else 848 so->so_sndtimeo = drv_usectohz(t_usec); 849 mutex_exit(&so->so_lock); 850 break; 851 } 852 case SO_RCVBUF: 853 /* 854 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 855 * sockfs since the transport might adjust the value 856 * and not return exactly what was set by the 857 * application. 858 */ 859 so->so_xpg_rcvbuf = *(int32_t *)optval; 860 break; 861 } 862 } 863 error = (*so->so_downcalls->sd_setsockopt) 864 (so->so_proto_handle, level, option_name, opt, optlen, cr); 865 done: 866 SO_UNBLOCK_FALLBACK(so); 867 return (error); 868 } 869 870 int 871 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 872 struct cred *cr, int32_t *rvalp) 873 { 874 int error = 0; 875 876 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 877 878 /* 879 * If there is a pending error, return error 880 * This can happen if a non blocking operation caused an error. 881 */ 882 if (so->so_error != 0) { 883 mutex_enter(&so->so_lock); 884 error = sogeterr(so, B_TRUE); 885 mutex_exit(&so->so_lock); 886 if (error != 0) 887 goto done; 888 } 889 890 /* 891 * calling strioc can result in the socket falling back to TPI, 892 * if that is supported. 893 */ 894 if ((so->so_filter_active == 0 || 895 (error = sof_filter_ioctl(so, cmd, arg, mode, 896 rvalp, cr)) < 0) && 897 (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 898 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 899 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 900 cmd, arg, mode, rvalp, cr); 901 } 902 903 done: 904 SO_UNBLOCK_FALLBACK(so); 905 906 return (error); 907 } 908 909 int 910 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 911 struct pollhead **phpp) 912 { 913 int state = so->so_state, mask; 914 *reventsp = 0; 915 916 /* 917 * In sockets the errors are represented as input/output events 918 */ 919 if (so->so_error != 0 && 920 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 921 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 922 return (0); 923 } 924 925 /* 926 * If the socket is in a state where it can send data 927 * turn on POLLWRBAND and POLLOUT events. 928 */ 929 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 930 /* 931 * out of band data is allowed even if the connection 932 * is flow controlled 933 */ 934 *reventsp |= POLLWRBAND & events; 935 if (!SO_SND_FLOWCTRLD(so)) { 936 /* 937 * As long as there is buffer to send data 938 * turn on POLLOUT events 939 */ 940 *reventsp |= POLLOUT & events; 941 } 942 } 943 944 /* 945 * Turn on POLLIN whenever there is data on the receive queue, 946 * or the socket is in a state where no more data will be received. 947 * Also, if the socket is accepting connections, flip the bit if 948 * there is something on the queue. 949 * 950 * We do an initial check for events without holding locks. However, 951 * if there are no event available, then we redo the check for POLLIN 952 * events under the lock. 953 */ 954 955 /* Pending connections */ 956 if (!list_is_empty(&so->so_acceptq_list)) 957 *reventsp |= (POLLIN|POLLRDNORM) & events; 958 959 /* 960 * If we're looking for POLLRDHUP, indicate it if we have sent the 961 * last rx signal for the socket. 962 */ 963 if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG)) 964 *reventsp |= POLLRDHUP; 965 966 /* Data */ 967 /* so_downcalls is null for sctp */ 968 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 969 *reventsp |= (*so->so_downcalls->sd_poll) 970 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 971 CRED()) & events; 972 ASSERT((*reventsp & ~events) == 0); 973 /* do not recheck events */ 974 events &= ~SO_PROTO_POLLEV; 975 } else { 976 if (SO_HAVE_DATA(so)) 977 *reventsp |= (POLLIN|POLLRDNORM) & events; 978 979 /* Urgent data */ 980 if ((state & SS_OOBPEND) != 0) { 981 *reventsp |= (POLLRDBAND | POLLPRI) & events; 982 } 983 984 /* 985 * If the socket has become disconnected, we set POLLHUP. 986 * Note that if we are in this state, we will have set POLLIN 987 * (SO_HAVE_DATA() is true on a disconnected socket), but not 988 * POLLOUT (SS_ISCONNECTED is false). This is in keeping with 989 * the semantics of POLLHUP, which is defined to be mutually 990 * exclusive with respect to POLLOUT but not POLLIN. We are 991 * therefore setting POLLHUP primarily for the benefit of 992 * those not polling on POLLIN, as they have no other way of 993 * knowing that the socket has been disconnected. 994 */ 995 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG; 996 997 if ((state & (mask | SS_ISCONNECTED)) == mask) 998 *reventsp |= POLLHUP; 999 } 1000 1001 if ((!*reventsp && !anyyet) || (events & POLLET)) { 1002 /* Check for read events again, but this time under lock */ 1003 if (events & (POLLIN|POLLRDNORM)) { 1004 mutex_enter(&so->so_lock); 1005 if (SO_HAVE_DATA(so) || 1006 !list_is_empty(&so->so_acceptq_list)) { 1007 if (events & POLLET) { 1008 so->so_pollev |= SO_POLLEV_IN; 1009 *phpp = &so->so_poll_list; 1010 } 1011 1012 mutex_exit(&so->so_lock); 1013 *reventsp |= (POLLIN|POLLRDNORM) & events; 1014 1015 return (0); 1016 } else { 1017 so->so_pollev |= SO_POLLEV_IN; 1018 mutex_exit(&so->so_lock); 1019 } 1020 } 1021 *phpp = &so->so_poll_list; 1022 } 1023 return (0); 1024 } 1025 1026 /* 1027 * Generic Upcalls 1028 */ 1029 void 1030 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 1031 cred_t *peer_cred, pid_t peer_cpid) 1032 { 1033 struct sonode *so = (struct sonode *)sock_handle; 1034 1035 mutex_enter(&so->so_lock); 1036 ASSERT(so->so_proto_handle != NULL); 1037 1038 if (peer_cred != NULL) { 1039 if (so->so_peercred != NULL) 1040 crfree(so->so_peercred); 1041 crhold(peer_cred); 1042 so->so_peercred = peer_cred; 1043 so->so_cpid = peer_cpid; 1044 } 1045 1046 so->so_proto_connid = id; 1047 soisconnected(so); 1048 /* 1049 * Wake ones who're waiting for conn to become established. 1050 */ 1051 so_notify_connected(so); 1052 } 1053 1054 int 1055 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 1056 { 1057 struct sonode *so = (struct sonode *)sock_handle; 1058 boolean_t connect_failed; 1059 1060 mutex_enter(&so->so_lock); 1061 1062 /* 1063 * If we aren't currently connected, then this isn't a disconnect but 1064 * rather a failure to connect. 1065 */ 1066 connect_failed = !(so->so_state & SS_ISCONNECTED); 1067 1068 so->so_proto_connid = id; 1069 soisdisconnected(so, error); 1070 so_notify_disconnected(so, connect_failed, error); 1071 1072 return (0); 1073 } 1074 1075 void 1076 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1077 uintptr_t arg) 1078 { 1079 struct sonode *so = (struct sonode *)sock_handle; 1080 1081 switch (action) { 1082 case SOCK_OPCTL_SHUT_SEND: 1083 mutex_enter(&so->so_lock); 1084 socantsendmore(so); 1085 so_notify_disconnecting(so); 1086 break; 1087 case SOCK_OPCTL_SHUT_RECV: { 1088 mutex_enter(&so->so_lock); 1089 socantrcvmore(so); 1090 so_notify_eof(so); 1091 break; 1092 } 1093 case SOCK_OPCTL_ENAB_ACCEPT: 1094 mutex_enter(&so->so_lock); 1095 so->so_state |= SS_ACCEPTCONN; 1096 so->so_backlog = (unsigned int)arg; 1097 /* 1098 * The protocol can stop generating newconn upcalls when 1099 * the backlog is full, so to make sure the listener does 1100 * not end up with a queue full of deferred connections 1101 * we reduce the backlog by one. Thus the listener will 1102 * start closing deferred connections before the backlog 1103 * is full. 1104 */ 1105 if (so->so_filter_active > 0) 1106 so->so_backlog = MAX(1, so->so_backlog - 1); 1107 mutex_exit(&so->so_lock); 1108 break; 1109 default: 1110 ASSERT(0); 1111 break; 1112 } 1113 } 1114 1115 void 1116 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1117 { 1118 struct sonode *so = (struct sonode *)sock_handle; 1119 1120 if (qfull) { 1121 so_snd_qfull(so); 1122 } else { 1123 so_snd_qnotfull(so); 1124 mutex_enter(&so->so_lock); 1125 /* so_notify_writable drops so_lock */ 1126 so_notify_writable(so); 1127 } 1128 } 1129 1130 sock_upper_handle_t 1131 so_newconn(sock_upper_handle_t parenthandle, 1132 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1133 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1134 { 1135 struct sonode *so = (struct sonode *)parenthandle; 1136 struct sonode *nso; 1137 int error; 1138 1139 ASSERT(proto_handle != NULL); 1140 1141 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1142 (so->so_acceptq_len >= so->so_backlog && 1143 (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) { 1144 return (NULL); 1145 } 1146 1147 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1148 &error); 1149 if (nso == NULL) 1150 return (NULL); 1151 1152 if (peer_cred != NULL) { 1153 crhold(peer_cred); 1154 nso->so_peercred = peer_cred; 1155 nso->so_cpid = peer_cpid; 1156 } 1157 nso->so_listener = so; 1158 1159 /* 1160 * The new socket (nso), proto_handle and sock_upcallsp are all 1161 * valid at this point. But as soon as nso is placed in the accept 1162 * queue that can no longer be assumed (since an accept() thread may 1163 * pull it off the queue and close the socket). 1164 */ 1165 *sock_upcallsp = &so_upcalls; 1166 1167 mutex_enter(&so->so_acceptq_lock); 1168 if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) { 1169 mutex_exit(&so->so_acceptq_lock); 1170 ASSERT(nso->so_count == 1); 1171 nso->so_count--; 1172 nso->so_listener = NULL; 1173 /* drop proto ref */ 1174 VN_RELE(SOTOV(nso)); 1175 socket_destroy(nso); 1176 return (NULL); 1177 } else { 1178 so->so_acceptq_len++; 1179 if (nso->so_state & SS_FIL_DEFER) { 1180 list_insert_tail(&so->so_acceptq_defer, nso); 1181 mutex_exit(&so->so_acceptq_lock); 1182 } else { 1183 list_insert_tail(&so->so_acceptq_list, nso); 1184 cv_signal(&so->so_acceptq_cv); 1185 mutex_exit(&so->so_acceptq_lock); 1186 mutex_enter(&so->so_lock); 1187 so_notify_newconn(so); 1188 } 1189 1190 return ((sock_upper_handle_t)nso); 1191 } 1192 } 1193 1194 void 1195 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1196 { 1197 struct sonode *so; 1198 1199 so = (struct sonode *)sock_handle; 1200 1201 mutex_enter(&so->so_lock); 1202 1203 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1204 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1205 if (soppp->sopp_flags & SOCKOPT_WROFF) 1206 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1207 if (soppp->sopp_flags & SOCKOPT_TAIL) 1208 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1209 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1210 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1211 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1212 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1213 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1214 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1215 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1216 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1217 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1218 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1219 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1220 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1221 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1222 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1223 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1224 } 1225 1226 if (soppp->sopp_zcopyflag & COPYCACHED) { 1227 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1228 } 1229 } 1230 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1231 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1232 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1233 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1234 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1235 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1236 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1237 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1238 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1239 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1240 1241 mutex_exit(&so->so_lock); 1242 1243 if (so->so_filter_active > 0) { 1244 sof_instance_t *inst; 1245 ssize_t maxblk; 1246 ushort_t wroff, tail; 1247 maxblk = so->so_proto_props.sopp_maxblk; 1248 wroff = so->so_proto_props.sopp_wroff; 1249 tail = so->so_proto_props.sopp_tail; 1250 for (inst = so->so_filter_bottom; inst != NULL; 1251 inst = inst->sofi_prev) { 1252 if (SOF_INTERESTED(inst, mblk_prop)) { 1253 (*inst->sofi_ops->sofop_mblk_prop)( 1254 (sof_handle_t)inst, inst->sofi_cookie, 1255 &maxblk, &wroff, &tail); 1256 } 1257 } 1258 mutex_enter(&so->so_lock); 1259 so->so_proto_props.sopp_maxblk = maxblk; 1260 so->so_proto_props.sopp_wroff = wroff; 1261 so->so_proto_props.sopp_tail = tail; 1262 mutex_exit(&so->so_lock); 1263 } 1264 #ifdef DEBUG 1265 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1266 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1267 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1268 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1269 SOCKOPT_LOOPBACK); 1270 ASSERT(soppp->sopp_flags == 0); 1271 #endif 1272 } 1273 1274 /* ARGSUSED */ 1275 ssize_t 1276 so_queue_msg_impl(struct sonode *so, mblk_t *mp, 1277 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp, 1278 sof_instance_t *filter) 1279 { 1280 boolean_t force_push = B_TRUE; 1281 int space_left; 1282 sodirect_t *sodp = so->so_direct; 1283 1284 ASSERT(errorp != NULL); 1285 *errorp = 0; 1286 if (mp == NULL) { 1287 if (so->so_downcalls->sd_recv_uio != NULL) { 1288 mutex_enter(&so->so_lock); 1289 /* the notify functions will drop the lock */ 1290 if (flags & MSG_OOB) 1291 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1292 else 1293 so_notify_data(so, msg_size); 1294 return (0); 1295 } 1296 ASSERT(msg_size == 0); 1297 mutex_enter(&so->so_lock); 1298 goto space_check; 1299 } 1300 1301 ASSERT(mp->b_next == NULL); 1302 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1303 ASSERT(msg_size == msgdsize(mp)); 1304 1305 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1306 /* The read pointer is not aligned correctly for TPI */ 1307 zcmn_err(getzoneid(), CE_WARN, 1308 "sockfs: Unaligned TPI message received. rptr = %p\n", 1309 (void *)mp->b_rptr); 1310 freemsg(mp); 1311 mutex_enter(&so->so_lock); 1312 if (sodp != NULL) 1313 SOD_UIOAFINI(sodp); 1314 goto space_check; 1315 } 1316 1317 if (so->so_filter_active > 0) { 1318 for (; filter != NULL; filter = filter->sofi_prev) { 1319 if (!SOF_INTERESTED(filter, data_in)) 1320 continue; 1321 mp = (*filter->sofi_ops->sofop_data_in)( 1322 (sof_handle_t)filter, filter->sofi_cookie, mp, 1323 flags, &msg_size); 1324 ASSERT(msgdsize(mp) == msg_size); 1325 DTRACE_PROBE2(filter__data, (sof_instance_t), filter, 1326 (mblk_t *), mp); 1327 /* Data was consumed/dropped, just do space check */ 1328 if (msg_size == 0) { 1329 mutex_enter(&so->so_lock); 1330 goto space_check; 1331 } 1332 } 1333 } 1334 1335 if (flags & MSG_OOB) { 1336 so_queue_oob(so, mp, msg_size); 1337 mutex_enter(&so->so_lock); 1338 goto space_check; 1339 } 1340 1341 if (force_pushp != NULL) 1342 force_push = *force_pushp; 1343 1344 mutex_enter(&so->so_lock); 1345 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1346 if (sodp != NULL) 1347 SOD_DISABLE(sodp); 1348 mutex_exit(&so->so_lock); 1349 *errorp = EOPNOTSUPP; 1350 return (-1); 1351 } 1352 if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) { 1353 freemsg(mp); 1354 if (sodp != NULL) 1355 SOD_DISABLE(sodp); 1356 mutex_exit(&so->so_lock); 1357 return (0); 1358 } 1359 1360 /* process the mblk via I/OAT if capable */ 1361 if (sodp != NULL && sodp->sod_enabled) { 1362 if (DB_TYPE(mp) == M_DATA) { 1363 sod_uioa_mblk_init(sodp, mp, msg_size); 1364 } else { 1365 SOD_UIOAFINI(sodp); 1366 } 1367 } 1368 1369 if (mp->b_next == NULL) { 1370 so_enqueue_msg(so, mp, msg_size); 1371 } else { 1372 do { 1373 mblk_t *nmp; 1374 1375 if ((nmp = mp->b_next) != NULL) { 1376 mp->b_next = NULL; 1377 } 1378 so_enqueue_msg(so, mp, msgdsize(mp)); 1379 mp = nmp; 1380 } while (mp != NULL); 1381 } 1382 1383 space_left = so->so_rcvbuf - so->so_rcv_queued; 1384 if (space_left <= 0) { 1385 so->so_flowctrld = B_TRUE; 1386 *errorp = ENOSPC; 1387 space_left = -1; 1388 } 1389 1390 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1391 so->so_rcv_queued >= so->so_rcv_wanted) { 1392 SOCKET_TIMER_CANCEL(so); 1393 /* 1394 * so_notify_data will release the lock 1395 */ 1396 so_notify_data(so, so->so_rcv_queued); 1397 1398 if (force_pushp != NULL) 1399 *force_pushp = B_TRUE; 1400 goto done; 1401 } else if (so->so_rcv_timer_tid == 0) { 1402 /* Make sure the recv push timer is running */ 1403 SOCKET_TIMER_START(so); 1404 } 1405 1406 done_unlock: 1407 mutex_exit(&so->so_lock); 1408 done: 1409 return (space_left); 1410 1411 space_check: 1412 space_left = so->so_rcvbuf - so->so_rcv_queued; 1413 if (space_left <= 0) { 1414 so->so_flowctrld = B_TRUE; 1415 *errorp = ENOSPC; 1416 space_left = -1; 1417 } 1418 goto done_unlock; 1419 } 1420 1421 #pragma inline(so_queue_msg_impl) 1422 1423 ssize_t 1424 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1425 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1426 { 1427 struct sonode *so = (struct sonode *)sock_handle; 1428 1429 return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp, 1430 so->so_filter_bottom)); 1431 } 1432 1433 /* 1434 * Set the offset of where the oob data is relative to the bytes in 1435 * queued. Also generate SIGURG 1436 */ 1437 void 1438 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1439 { 1440 struct sonode *so; 1441 1442 ASSERT(offset >= 0); 1443 so = (struct sonode *)sock_handle; 1444 mutex_enter(&so->so_lock); 1445 if (so->so_direct != NULL) 1446 SOD_UIOAFINI(so->so_direct); 1447 1448 /* 1449 * New urgent data on the way so forget about any old 1450 * urgent data. 1451 */ 1452 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1453 1454 /* 1455 * Record that urgent data is pending. 1456 */ 1457 so->so_state |= SS_OOBPEND; 1458 1459 if (so->so_oobmsg != NULL) { 1460 dprintso(so, 1, ("sock: discarding old oob\n")); 1461 freemsg(so->so_oobmsg); 1462 so->so_oobmsg = NULL; 1463 } 1464 1465 /* 1466 * set the offset where the urgent byte is 1467 */ 1468 so->so_oobmark = so->so_rcv_queued + offset; 1469 if (so->so_oobmark == 0) 1470 so->so_state |= SS_RCVATMARK; 1471 else 1472 so->so_state &= ~SS_RCVATMARK; 1473 1474 so_notify_oobsig(so); 1475 } 1476 1477 /* 1478 * Queue the OOB byte 1479 */ 1480 static void 1481 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len) 1482 { 1483 mutex_enter(&so->so_lock); 1484 if (so->so_direct != NULL) 1485 SOD_UIOAFINI(so->so_direct); 1486 1487 ASSERT(mp != NULL); 1488 if (!IS_SO_OOB_INLINE(so)) { 1489 so->so_oobmsg = mp; 1490 so->so_state |= SS_HAVEOOBDATA; 1491 } else { 1492 so_enqueue_msg(so, mp, len); 1493 } 1494 1495 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1496 } 1497 1498 int 1499 so_close(struct sonode *so, int flag, struct cred *cr) 1500 { 1501 int error; 1502 1503 /* 1504 * No new data will be enqueued once the CLOSING flag is set. 1505 */ 1506 mutex_enter(&so->so_lock); 1507 so->so_state |= SS_CLOSING; 1508 ASSERT(so_verify_oobstate(so)); 1509 so_rcv_flush(so); 1510 mutex_exit(&so->so_lock); 1511 1512 if (so->so_filter_active > 0) 1513 sof_sonode_closing(so); 1514 1515 if (so->so_state & SS_ACCEPTCONN) { 1516 /* 1517 * We grab and release the accept lock to ensure that any 1518 * thread about to insert a socket in so_newconn completes 1519 * before we flush the queue. Any thread calling so_newconn 1520 * after we drop the lock will observe the SS_CLOSING flag, 1521 * which will stop it from inserting the socket in the queue. 1522 */ 1523 mutex_enter(&so->so_acceptq_lock); 1524 mutex_exit(&so->so_acceptq_lock); 1525 1526 so_acceptq_flush(so, B_TRUE); 1527 } 1528 1529 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1530 switch (error) { 1531 default: 1532 /* Protocol made a synchronous close; remove proto ref */ 1533 VN_RELE(SOTOV(so)); 1534 break; 1535 case EINPROGRESS: 1536 /* 1537 * Protocol is in the process of closing, it will make a 1538 * 'closed' upcall to remove the reference. 1539 */ 1540 error = 0; 1541 break; 1542 } 1543 1544 return (error); 1545 } 1546 1547 /* 1548 * Upcall made by the protocol when it's doing an asynchronous close. It 1549 * will drop the protocol's reference on the socket. 1550 */ 1551 void 1552 so_closed(sock_upper_handle_t sock_handle) 1553 { 1554 struct sonode *so = (struct sonode *)sock_handle; 1555 1556 VN_RELE(SOTOV(so)); 1557 } 1558 1559 mblk_t * 1560 so_get_sock_pid_mblk(sock_upper_handle_t sock_handle) 1561 { 1562 ulong_t sz, n; 1563 mblk_t *mblk; 1564 pid_node_t *pn; 1565 pid_t *pids; 1566 conn_pid_info_t *cpi; 1567 struct sonode *so = (struct sonode *)sock_handle; 1568 1569 mutex_enter(&so->so_pid_tree_lock); 1570 1571 n = avl_numnodes(&so->so_pid_tree); 1572 sz = sizeof (conn_pid_info_t); 1573 sz += (n > 1) ? ((n - 1) * sizeof (pid_t)) : 0; 1574 if ((mblk = allocb(sz, BPRI_HI)) == NULL) { 1575 mutex_exit(&so->so_pid_tree_lock); 1576 return (NULL); 1577 } 1578 mblk->b_wptr += sz; 1579 cpi = (conn_pid_info_t *)mblk->b_datap->db_base; 1580 1581 cpi->cpi_contents = CONN_PID_INFO_SOC; 1582 cpi->cpi_pids_cnt = n; 1583 cpi->cpi_tot_size = sz; 1584 cpi->cpi_pids[0] = 0; 1585 1586 if (cpi->cpi_pids_cnt > 0) { 1587 pids = cpi->cpi_pids; 1588 for (pn = avl_first(&so->so_pid_tree); pn != NULL; 1589 pids++, pn = AVL_NEXT(&so->so_pid_tree, pn)) 1590 *pids = pn->pn_pid; 1591 } 1592 mutex_exit(&so->so_pid_tree_lock); 1593 return (mblk); 1594 } 1595 1596 void 1597 so_zcopy_notify(sock_upper_handle_t sock_handle) 1598 { 1599 struct sonode *so = (struct sonode *)sock_handle; 1600 1601 mutex_enter(&so->so_lock); 1602 so->so_copyflag |= STZCNOTIFY; 1603 cv_broadcast(&so->so_copy_cv); 1604 mutex_exit(&so->so_lock); 1605 } 1606 1607 void 1608 so_set_error(sock_upper_handle_t sock_handle, int error) 1609 { 1610 struct sonode *so = (struct sonode *)sock_handle; 1611 1612 mutex_enter(&so->so_lock); 1613 1614 soseterror(so, error); 1615 1616 so_notify_error(so); 1617 } 1618 1619 /* 1620 * so_recvmsg - read data from the socket 1621 * 1622 * There are two ways of obtaining data; either we ask the protocol to 1623 * copy directly into the supplied buffer, or we copy data from the 1624 * sonode's receive queue. The decision which one to use depends on 1625 * whether the protocol has a sd_recv_uio down call. 1626 */ 1627 int 1628 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1629 struct cred *cr) 1630 { 1631 rval_t rval; 1632 int flags = 0; 1633 t_uscalar_t controllen, namelen; 1634 int error = 0; 1635 int ret; 1636 mblk_t *mctlp = NULL; 1637 union T_primitives *tpr; 1638 void *control; 1639 ssize_t saved_resid; 1640 struct uio *suiop; 1641 1642 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1643 1644 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1645 (so->so_mode & SM_CONNREQUIRED)) { 1646 SO_UNBLOCK_FALLBACK(so); 1647 return (ENOTCONN); 1648 } 1649 1650 if (msg->msg_flags & MSG_PEEK) 1651 msg->msg_flags &= ~MSG_WAITALL; 1652 1653 if (so->so_mode & SM_ATOMIC) 1654 msg->msg_flags |= MSG_TRUNC; 1655 1656 if (msg->msg_flags & MSG_OOB) { 1657 if ((so->so_mode & SM_EXDATA) == 0) { 1658 error = EOPNOTSUPP; 1659 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1660 error = (*so->so_downcalls->sd_recv_uio) 1661 (so->so_proto_handle, uiop, msg, cr); 1662 } else { 1663 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1664 IS_SO_OOB_INLINE(so)); 1665 } 1666 SO_UNBLOCK_FALLBACK(so); 1667 return (error); 1668 } 1669 1670 /* 1671 * If the protocol has the recv down call, then pass the request 1672 * down. 1673 */ 1674 if (so->so_downcalls->sd_recv_uio != NULL) { 1675 error = (*so->so_downcalls->sd_recv_uio) 1676 (so->so_proto_handle, uiop, msg, cr); 1677 SO_UNBLOCK_FALLBACK(so); 1678 return (error); 1679 } 1680 1681 /* 1682 * Reading data from the socket buffer 1683 */ 1684 flags = msg->msg_flags; 1685 msg->msg_flags = 0; 1686 1687 /* 1688 * Set msg_controllen and msg_namelen to zero here to make it 1689 * simpler in the cases that no control or name is returned. 1690 */ 1691 controllen = msg->msg_controllen; 1692 namelen = msg->msg_namelen; 1693 msg->msg_controllen = 0; 1694 msg->msg_namelen = 0; 1695 1696 mutex_enter(&so->so_lock); 1697 /* Set SOREADLOCKED */ 1698 error = so_lock_read_intr(so, 1699 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1700 mutex_exit(&so->so_lock); 1701 if (error) { 1702 SO_UNBLOCK_FALLBACK(so); 1703 return (error); 1704 } 1705 1706 suiop = sod_rcv_init(so, flags, &uiop); 1707 retry: 1708 saved_resid = uiop->uio_resid; 1709 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1710 if (error != 0) { 1711 goto out; 1712 } 1713 /* 1714 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1715 * For non-datagrams MOREDATA is used to set MSG_EOR. 1716 */ 1717 ASSERT(!(rval.r_val1 & MORECTL)); 1718 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1719 msg->msg_flags |= MSG_TRUNC; 1720 if (mctlp == NULL) { 1721 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1722 1723 mutex_enter(&so->so_lock); 1724 /* Set MSG_EOR based on MOREDATA */ 1725 if (!(rval.r_val1 & MOREDATA)) { 1726 if (so->so_state & SS_SAVEDEOR) { 1727 msg->msg_flags |= MSG_EOR; 1728 so->so_state &= ~SS_SAVEDEOR; 1729 } 1730 } 1731 /* 1732 * If some data was received (i.e. not EOF) and the 1733 * read/recv* has not been satisfied wait for some more. 1734 */ 1735 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1736 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1737 mutex_exit(&so->so_lock); 1738 flags |= MSG_NOMARK; 1739 goto retry; 1740 } 1741 1742 goto out_locked; 1743 } 1744 /* so_queue_msg has already verified length and alignment */ 1745 tpr = (union T_primitives *)mctlp->b_rptr; 1746 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1747 switch (tpr->type) { 1748 case T_DATA_IND: { 1749 /* 1750 * Set msg_flags to MSG_EOR based on 1751 * MORE_flag and MOREDATA. 1752 */ 1753 mutex_enter(&so->so_lock); 1754 so->so_state &= ~SS_SAVEDEOR; 1755 if (!(tpr->data_ind.MORE_flag & 1)) { 1756 if (!(rval.r_val1 & MOREDATA)) 1757 msg->msg_flags |= MSG_EOR; 1758 else 1759 so->so_state |= SS_SAVEDEOR; 1760 } 1761 freemsg(mctlp); 1762 /* 1763 * If some data was received (i.e. not EOF) and the 1764 * read/recv* has not been satisfied wait for some more. 1765 */ 1766 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1767 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1768 mutex_exit(&so->so_lock); 1769 flags |= MSG_NOMARK; 1770 goto retry; 1771 } 1772 goto out_locked; 1773 } 1774 case T_UNITDATA_IND: { 1775 void *addr; 1776 t_uscalar_t addrlen; 1777 void *abuf; 1778 t_uscalar_t optlen; 1779 void *opt; 1780 1781 if (namelen != 0) { 1782 /* Caller wants source address */ 1783 addrlen = tpr->unitdata_ind.SRC_length; 1784 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1785 addrlen, 1); 1786 if (addr == NULL) { 1787 freemsg(mctlp); 1788 error = EPROTO; 1789 eprintsoline(so, error); 1790 goto out; 1791 } 1792 ASSERT(so->so_family != AF_UNIX); 1793 } 1794 optlen = tpr->unitdata_ind.OPT_length; 1795 if (optlen != 0) { 1796 t_uscalar_t ncontrollen; 1797 1798 /* 1799 * Extract any source address option. 1800 * Determine how large cmsg buffer is needed. 1801 */ 1802 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1803 optlen, __TPI_ALIGN_SIZE); 1804 1805 if (opt == NULL) { 1806 freemsg(mctlp); 1807 error = EPROTO; 1808 eprintsoline(so, error); 1809 goto out; 1810 } 1811 if (so->so_family == AF_UNIX) 1812 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1813 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1814 !(flags & MSG_XPG4_2)); 1815 if (controllen != 0) 1816 controllen = ncontrollen; 1817 else if (ncontrollen != 0) 1818 msg->msg_flags |= MSG_CTRUNC; 1819 } else { 1820 controllen = 0; 1821 } 1822 1823 if (namelen != 0) { 1824 /* 1825 * Return address to caller. 1826 * Caller handles truncation if length 1827 * exceeds msg_namelen. 1828 * NOTE: AF_UNIX NUL termination is ensured by 1829 * the sender's copyin_name(). 1830 */ 1831 abuf = kmem_alloc(addrlen, KM_SLEEP); 1832 1833 bcopy(addr, abuf, addrlen); 1834 msg->msg_name = abuf; 1835 msg->msg_namelen = addrlen; 1836 } 1837 1838 if (controllen != 0) { 1839 /* 1840 * Return control msg to caller. 1841 * Caller handles truncation if length 1842 * exceeds msg_controllen. 1843 */ 1844 control = kmem_zalloc(controllen, KM_SLEEP); 1845 1846 error = so_opt2cmsg(mctlp, opt, optlen, 1847 !(flags & MSG_XPG4_2), control, controllen); 1848 if (error) { 1849 freemsg(mctlp); 1850 if (msg->msg_namelen != 0) 1851 kmem_free(msg->msg_name, 1852 msg->msg_namelen); 1853 kmem_free(control, controllen); 1854 eprintsoline(so, error); 1855 goto out; 1856 } 1857 msg->msg_control = control; 1858 msg->msg_controllen = controllen; 1859 } 1860 1861 freemsg(mctlp); 1862 goto out; 1863 } 1864 case T_OPTDATA_IND: { 1865 struct T_optdata_req *tdr; 1866 void *opt; 1867 t_uscalar_t optlen; 1868 1869 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1870 optlen = tdr->OPT_length; 1871 if (optlen != 0) { 1872 t_uscalar_t ncontrollen; 1873 /* 1874 * Determine how large cmsg buffer is needed. 1875 */ 1876 opt = sogetoff(mctlp, 1877 tpr->optdata_ind.OPT_offset, optlen, 1878 __TPI_ALIGN_SIZE); 1879 1880 if (opt == NULL) { 1881 freemsg(mctlp); 1882 error = EPROTO; 1883 eprintsoline(so, error); 1884 goto out; 1885 } 1886 1887 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1888 !(flags & MSG_XPG4_2)); 1889 if (controllen != 0) 1890 controllen = ncontrollen; 1891 else if (ncontrollen != 0) 1892 msg->msg_flags |= MSG_CTRUNC; 1893 } else { 1894 controllen = 0; 1895 } 1896 1897 if (controllen != 0) { 1898 /* 1899 * Return control msg to caller. 1900 * Caller handles truncation if length 1901 * exceeds msg_controllen. 1902 */ 1903 control = kmem_zalloc(controllen, KM_SLEEP); 1904 1905 error = so_opt2cmsg(mctlp, opt, optlen, 1906 !(flags & MSG_XPG4_2), control, controllen); 1907 if (error) { 1908 freemsg(mctlp); 1909 kmem_free(control, controllen); 1910 eprintsoline(so, error); 1911 goto out; 1912 } 1913 msg->msg_control = control; 1914 msg->msg_controllen = controllen; 1915 } 1916 1917 /* 1918 * Set msg_flags to MSG_EOR based on 1919 * DATA_flag and MOREDATA. 1920 */ 1921 mutex_enter(&so->so_lock); 1922 so->so_state &= ~SS_SAVEDEOR; 1923 if (!(tpr->data_ind.MORE_flag & 1)) { 1924 if (!(rval.r_val1 & MOREDATA)) 1925 msg->msg_flags |= MSG_EOR; 1926 else 1927 so->so_state |= SS_SAVEDEOR; 1928 } 1929 freemsg(mctlp); 1930 /* 1931 * If some data was received (i.e. not EOF) and the 1932 * read/recv* has not been satisfied wait for some more. 1933 * Not possible to wait if control info was received. 1934 */ 1935 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1936 controllen == 0 && 1937 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1938 mutex_exit(&so->so_lock); 1939 flags |= MSG_NOMARK; 1940 goto retry; 1941 } 1942 goto out_locked; 1943 } 1944 default: 1945 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1946 tpr->type); 1947 freemsg(mctlp); 1948 error = EPROTO; 1949 ASSERT(0); 1950 } 1951 out: 1952 mutex_enter(&so->so_lock); 1953 out_locked: 1954 ret = sod_rcv_done(so, suiop, uiop); 1955 if (ret != 0 && error == 0) 1956 error = ret; 1957 1958 so_unlock_read(so); /* Clear SOREADLOCKED */ 1959 mutex_exit(&so->so_lock); 1960 1961 SO_UNBLOCK_FALLBACK(so); 1962 1963 return (error); 1964 } 1965 1966 sonodeops_t so_sonodeops = { 1967 so_init, /* sop_init */ 1968 so_accept, /* sop_accept */ 1969 so_bind, /* sop_bind */ 1970 so_listen, /* sop_listen */ 1971 so_connect, /* sop_connect */ 1972 so_recvmsg, /* sop_recvmsg */ 1973 so_sendmsg, /* sop_sendmsg */ 1974 so_sendmblk, /* sop_sendmblk */ 1975 so_getpeername, /* sop_getpeername */ 1976 so_getsockname, /* sop_getsockname */ 1977 so_shutdown, /* sop_shutdown */ 1978 so_getsockopt, /* sop_getsockopt */ 1979 so_setsockopt, /* sop_setsockopt */ 1980 so_ioctl, /* sop_ioctl */ 1981 so_poll, /* sop_poll */ 1982 so_close, /* sop_close */ 1983 }; 1984 1985 sock_upcalls_t so_upcalls = { 1986 so_newconn, 1987 so_connected, 1988 so_disconnected, 1989 so_opctl, 1990 so_queue_msg, 1991 so_set_prop, 1992 so_txq_full, 1993 so_signal_oob, 1994 so_zcopy_notify, 1995 so_set_error, 1996 so_closed, 1997 so_get_sock_pid_mblk 1998 };