1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/debug.h>
  35 #include <sys/cmn_err.h>
  36 
  37 #include <sys/stropts.h>
  38 #include <sys/socket.h>
  39 #include <sys/socketvar.h>
  40 #include <sys/fcntl.h>
  41 
  42 #define _SUN_TPI_VERSION        2
  43 #include <sys/tihdr.h>
  44 #include <sys/sockio.h>
  45 #include <sys/kmem_impl.h>
  46 
  47 #include <sys/strsubr.h>
  48 #include <sys/strsun.h>
  49 #include <sys/ddi.h>
  50 #include <netinet/in.h>
  51 #include <inet/ip.h>
  52 
  53 #include <fs/sockfs/sockcommon.h>
  54 #include <fs/sockfs/sockfilter_impl.h>
  55 
  56 #include <sys/socket_proto.h>
  57 
  58 #include <fs/sockfs/socktpi_impl.h>
  59 #include <fs/sockfs/sodirect.h>
  60 #include <sys/tihdr.h>
  61 #include <fs/sockfs/nl7c.h>
  62 
  63 extern int xnet_skip_checks;
  64 extern int xnet_check_print;
  65 
  66 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
  67 
  68 
  69 /*ARGSUSED*/
  70 int
  71 so_accept_notsupp(struct sonode *lso, int fflag,
  72     struct cred *cr, struct sonode **nsop)
  73 {
  74         return (EOPNOTSUPP);
  75 }
  76 
  77 /*ARGSUSED*/
  78 int
  79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
  80 {
  81         return (EOPNOTSUPP);
  82 }
  83 
  84 /*ARGSUSED*/
  85 int
  86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
  87     socklen_t *len, struct cred *cr)
  88 {
  89         return (EOPNOTSUPP);
  90 }
  91 
  92 /*ARGSUSED*/
  93 int
  94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
  95     socklen_t *addrlen, boolean_t accept, struct cred *cr)
  96 {
  97         return (EOPNOTSUPP);
  98 }
  99 
 100 /*ARGSUSED*/
 101 int
 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
 103 {
 104         return (EOPNOTSUPP);
 105 }
 106 
 107 /*ARGSUSED*/
 108 int
 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
 110     struct cred *cr, mblk_t **mpp)
 111 {
 112         return (EOPNOTSUPP);
 113 }
 114 
 115 /*
 116  * Generic Socket Ops
 117  */
 118 
 119 /* ARGSUSED */
 120 int
 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
 122 {
 123         return (socket_init_common(so, pso, flags, cr));
 124 }
 125 
 126 int
 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
 128     int flags, struct cred *cr)
 129 {
 130         int error;
 131 
 132         SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
 133 
 134         ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 135 
 136         /* X/Open requires this check */
 137         if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 138                 if (xnet_check_print) {
 139                         printf("sockfs: X/Open bind state check "
 140                             "caused EINVAL\n");
 141                 }
 142                 error = EINVAL;
 143                 goto done;
 144         }
 145 
 146         /*
 147          * a bind to a NULL address is interpreted as unbind. So just
 148          * do the downcall.
 149          */
 150         if (name == NULL)
 151                 goto dobind;
 152 
 153         switch (so->so_family) {
 154         case AF_INET:
 155                 if ((size_t)namelen != sizeof (sin_t)) {
 156                         error = name->sa_family != so->so_family ?
 157                             EAFNOSUPPORT : EINVAL;
 158                         eprintsoline(so, error);
 159                         goto done;
 160                 }
 161 
 162                 if ((flags & _SOBIND_XPG4_2) &&
 163                     (name->sa_family != so->so_family)) {
 164                         /*
 165                          * This check has to be made for X/Open
 166                          * sockets however application failures have
 167                          * been observed when it is applied to
 168                          * all sockets.
 169                          */
 170                         error = EAFNOSUPPORT;
 171                         eprintsoline(so, error);
 172                         goto done;
 173                 }
 174                 /*
 175                  * Force a zero sa_family to match so_family.
 176                  *
 177                  * Some programs like inetd(1M) don't set the
 178                  * family field. Other programs leave
 179                  * sin_family set to garbage - SunOS 4.X does
 180                  * not check the family field on a bind.
 181                  * We use the family field that
 182                  * was passed in to the socket() call.
 183                  */
 184                 name->sa_family = so->so_family;
 185                 break;
 186 
 187         case AF_INET6: {
 188 #ifdef DEBUG
 189                 sin6_t *sin6 = (sin6_t *)name;
 190 #endif
 191                 if ((size_t)namelen != sizeof (sin6_t)) {
 192                         error = name->sa_family != so->so_family ?
 193                             EAFNOSUPPORT : EINVAL;
 194                         eprintsoline(so, error);
 195                         goto done;
 196                 }
 197 
 198                 if (name->sa_family != so->so_family) {
 199                         /*
 200                          * With IPv6 we require the family to match
 201                          * unlike in IPv4.
 202                          */
 203                         error = EAFNOSUPPORT;
 204                         eprintsoline(so, error);
 205                         goto done;
 206                 }
 207 #ifdef DEBUG
 208                 /*
 209                  * Verify that apps don't forget to clear
 210                  * sin6_scope_id etc
 211                  */
 212                 if (sin6->sin6_scope_id != 0 &&
 213                     !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
 214                         zcmn_err(getzoneid(), CE_WARN,
 215                             "bind with uninitialized sin6_scope_id "
 216                             "(%d) on socket. Pid = %d\n",
 217                             (int)sin6->sin6_scope_id,
 218                             (int)curproc->p_pid);
 219                 }
 220                 if (sin6->__sin6_src_id != 0) {
 221                         zcmn_err(getzoneid(), CE_WARN,
 222                             "bind with uninitialized __sin6_src_id "
 223                             "(%d) on socket. Pid = %d\n",
 224                             (int)sin6->__sin6_src_id,
 225                             (int)curproc->p_pid);
 226                 }
 227 #endif /* DEBUG */
 228 
 229                 break;
 230         }
 231         default:
 232                 /* Just pass the request to the protocol */
 233                 goto dobind;
 234         }
 235 
 236         /*
 237          * First we check if either NCA or KSSL has been enabled for
 238          * the requested address, and if so, we fall back to TPI.
 239          * If neither of those two services are enabled, then we just
 240          * pass the request to the protocol.
 241          *
 242          * Note that KSSL can only be enabled on a socket if NCA is NOT
 243          * enabled for that socket, hence the else-statement below.
 244          */
 245         if (nl7c_enabled && ((so->so_family == AF_INET ||
 246             so->so_family == AF_INET6) &&
 247             nl7c_lookup_addr(name, namelen) != NULL)) {
 248                 /*
 249                  * NL7C is not supported in non-global zones,
 250                  * we enforce this restriction here.
 251                  */
 252                 if (so->so_zoneid == GLOBAL_ZONEID) {
 253                         /* NCA should be used, so fall back to TPI */
 254                         error = so_tpi_fallback(so, cr);
 255                         SO_UNBLOCK_FALLBACK(so);
 256                         if (error)
 257                                 return (error);
 258                         else
 259                                 return (SOP_BIND(so, name, namelen, flags, cr));
 260                 }
 261         }
 262 
 263 dobind:
 264         if (so->so_filter_active == 0 ||
 265             (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
 266                 error = (*so->so_downcalls->sd_bind)
 267                     (so->so_proto_handle, name, namelen, cr);
 268         }
 269 done:
 270         SO_UNBLOCK_FALLBACK(so);
 271 
 272         return (error);
 273 }
 274 
 275 int
 276 so_listen(struct sonode *so, int backlog, struct cred *cr)
 277 {
 278         int     error = 0;
 279 
 280         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 281         SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
 282 
 283         if ((so)->so_filter_active == 0 ||
 284             (error = sof_filter_listen(so, &backlog, cr)) < 0)
 285                 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
 286                     backlog, cr);
 287 
 288         SO_UNBLOCK_FALLBACK(so);
 289 
 290         return (error);
 291 }
 292 
 293 
 294 int
 295 so_connect(struct sonode *so, struct sockaddr *name,
 296     socklen_t namelen, int fflag, int flags, struct cred *cr)
 297 {
 298         int error = 0;
 299         sock_connid_t id;
 300 
 301         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 302         SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
 303 
 304         /*
 305          * If there is a pending error, return error
 306          * This can happen if a non blocking operation caused an error.
 307          */
 308 
 309         if (so->so_error != 0) {
 310                 mutex_enter(&so->so_lock);
 311                 error = sogeterr(so, B_TRUE);
 312                 mutex_exit(&so->so_lock);
 313                 if (error != 0)
 314                         goto done;
 315         }
 316 
 317         if (so->so_filter_active == 0 ||
 318             (error = sof_filter_connect(so, (struct sockaddr *)name,
 319             &namelen, cr)) < 0) {
 320                 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
 321                     name, namelen, &id, cr);
 322 
 323                 if (error == EINPROGRESS)
 324                         error = so_wait_connected(so,
 325                             fflag & (FNONBLOCK|FNDELAY), id);
 326         }
 327 done:
 328         SO_UNBLOCK_FALLBACK(so);
 329         return (error);
 330 }
 331 
 332 /*ARGSUSED*/
 333 int
 334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
 335 {
 336         int error = 0;
 337         struct sonode *nso;
 338 
 339         *nsop = NULL;
 340 
 341         SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
 342         if ((so->so_state & SS_ACCEPTCONN) == 0) {
 343                 SO_UNBLOCK_FALLBACK(so);
 344                 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
 345                     EOPNOTSUPP : EINVAL);
 346         }
 347 
 348         if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
 349             &nso)) == 0) {
 350                 ASSERT(nso != NULL);
 351 
 352                 /* finish the accept */
 353                 if ((so->so_filter_active > 0 &&
 354                     (error = sof_filter_accept(nso, cr)) > 0) ||
 355                     (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
 356                     nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
 357                         (void) socket_close(nso, 0, cr);
 358                         socket_destroy(nso);
 359                 } else {
 360                         *nsop = nso;
 361                         if (!(curproc->p_flag & SSYS))
 362                                 sonode_insert_pid(nso, curproc->p_pidp->pid_id);
 363                 }
 364         }
 365 
 366         SO_UNBLOCK_FALLBACK(so);
 367         return (error);
 368 }
 369 
 370 int
 371 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
 372     struct cred *cr)
 373 {
 374         int error, flags;
 375         boolean_t dontblock;
 376         ssize_t orig_resid;
 377         mblk_t  *mp;
 378 
 379         SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
 380 
 381         flags = msg->msg_flags;
 382         error = 0;
 383         dontblock = (flags & MSG_DONTWAIT) ||
 384             (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
 385 
 386         if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
 387                 /*
 388                  * Old way of passing fd's is not supported
 389                  */
 390                 SO_UNBLOCK_FALLBACK(so);
 391                 return (EOPNOTSUPP);
 392         }
 393 
 394         if ((so->so_mode & SM_ATOMIC) &&
 395             uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
 396             so->so_proto_props.sopp_maxpsz != -1) {
 397                 SO_UNBLOCK_FALLBACK(so);
 398                 return (EMSGSIZE);
 399         }
 400 
 401         /*
 402          * For atomic sends we will only do one iteration.
 403          */
 404         do {
 405                 if (so->so_state & SS_CANTSENDMORE) {
 406                         error = EPIPE;
 407                         break;
 408                 }
 409 
 410                 if (so->so_error != 0) {
 411                         mutex_enter(&so->so_lock);
 412                         error = sogeterr(so, B_TRUE);
 413                         mutex_exit(&so->so_lock);
 414                         if (error != 0)
 415                                 break;
 416                 }
 417 
 418                 /*
 419                  * Send down OOB messages even if the send path is being
 420                  * flow controlled (assuming the protocol supports OOB data).
 421                  */
 422                 if (flags & MSG_OOB) {
 423                         if ((so->so_mode & SM_EXDATA) == 0) {
 424                                 error = EOPNOTSUPP;
 425                                 break;
 426                         }
 427                 } else if (SO_SND_FLOWCTRLD(so)) {
 428                         /*
 429                          * Need to wait until the protocol is ready to receive
 430                          * more data for transmission.
 431                          */
 432                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 433                                 break;
 434                 }
 435 
 436                 /*
 437                  * Time to send data to the protocol. We either copy the
 438                  * data into mblks or pass the uio directly to the protocol.
 439                  * We decide what to do based on the available down calls.
 440                  */
 441                 if (so->so_downcalls->sd_send_uio != NULL) {
 442                         error = (*so->so_downcalls->sd_send_uio)
 443                             (so->so_proto_handle, uiop, msg, cr);
 444                         if (error != 0)
 445                                 break;
 446                 } else {
 447                         /* save the resid in case of failure */
 448                         orig_resid = uiop->uio_resid;
 449 
 450                         if ((mp = socopyinuio(uiop,
 451                             so->so_proto_props.sopp_maxpsz,
 452                             so->so_proto_props.sopp_wroff,
 453                             so->so_proto_props.sopp_maxblk,
 454                             so->so_proto_props.sopp_tail, &error)) == NULL) {
 455                                 break;
 456                         }
 457                         ASSERT(uiop->uio_resid >= 0);
 458 
 459                         if (so->so_filter_active > 0 &&
 460                             ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
 461                             &error)) == NULL)) {
 462                                 if (error != 0)
 463                                         break;
 464                                 continue;
 465                         }
 466                         error = (*so->so_downcalls->sd_send)
 467                             (so->so_proto_handle, mp, msg, cr);
 468                         if (error != 0) {
 469                                 /*
 470                                  * The send failed. We do not have to free the
 471                                  * mblks, because that is the protocol's
 472                                  * responsibility. However, uio_resid must
 473                                  * remain accurate, so adjust that here.
 474                                  */
 475                                 uiop->uio_resid = orig_resid;
 476                                         break;
 477                         }
 478                 }
 479         } while (uiop->uio_resid > 0);
 480 
 481         SO_UNBLOCK_FALLBACK(so);
 482 
 483         return (error);
 484 }
 485 
 486 int
 487 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
 488     struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
 489     boolean_t fil_inject)
 490 {
 491         int error;
 492         boolean_t dontblock;
 493         size_t size;
 494         mblk_t *mp = *mpp;
 495 
 496         if (so->so_downcalls->sd_send == NULL)
 497                 return (EOPNOTSUPP);
 498 
 499         error = 0;
 500         dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
 501             (fflag & (FNONBLOCK|FNDELAY));
 502         size = msgdsize(mp);
 503 
 504         if ((so->so_mode & SM_ATOMIC) &&
 505             size > so->so_proto_props.sopp_maxpsz &&
 506             so->so_proto_props.sopp_maxpsz != -1) {
 507                 SO_UNBLOCK_FALLBACK(so);
 508                 return (EMSGSIZE);
 509         }
 510 
 511         while (mp != NULL) {
 512                 mblk_t *nmp, *last_mblk;
 513                 size_t mlen;
 514 
 515                 if (so->so_state & SS_CANTSENDMORE) {
 516                         error = EPIPE;
 517                         break;
 518                 }
 519                 if (so->so_error != 0) {
 520                         mutex_enter(&so->so_lock);
 521                         error = sogeterr(so, B_TRUE);
 522                         mutex_exit(&so->so_lock);
 523                         if (error != 0)
 524                                 break;
 525                 }
 526                 /* Socket filters are not flow controlled */
 527                 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
 528                         /*
 529                          * Need to wait until the protocol is ready to receive
 530                          * more data for transmission.
 531                          */
 532                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 533                                 break;
 534                 }
 535 
 536                 /*
 537                  * We only allow so_maxpsz of data to be sent down to
 538                  * the protocol at time.
 539                  */
 540                 mlen = MBLKL(mp);
 541                 nmp = mp->b_cont;
 542                 last_mblk = mp;
 543                 while (nmp != NULL) {
 544                         mlen += MBLKL(nmp);
 545                         if (mlen > so->so_proto_props.sopp_maxpsz) {
 546                                 last_mblk->b_cont = NULL;
 547                                 break;
 548                         }
 549                         last_mblk = nmp;
 550                         nmp = nmp->b_cont;
 551                 }
 552 
 553                 if (so->so_filter_active > 0 &&
 554                     (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
 555                     cr, &error)) == NULL) {
 556                         *mpp = mp = nmp;
 557                         if (error != 0)
 558                                 break;
 559                         continue;
 560                 }
 561                 error = (*so->so_downcalls->sd_send)
 562                     (so->so_proto_handle, mp, msg, cr);
 563                 if (error != 0) {
 564                         /*
 565                          * The send failed. The protocol will free the mblks
 566                          * that were sent down. Let the caller deal with the
 567                          * rest.
 568                          */
 569                         *mpp = nmp;
 570                         break;
 571                 }
 572 
 573                 *mpp = mp = nmp;
 574         }
 575         /* Let the filter know whether the protocol is flow controlled */
 576         if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
 577                 error = ENOSPC;
 578 
 579         return (error);
 580 }
 581 
 582 #pragma inline(so_sendmblk_impl)
 583 
 584 int
 585 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
 586     struct cred *cr, mblk_t **mpp)
 587 {
 588         int error;
 589 
 590         SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
 591 
 592         if ((so->so_mode & SM_SENDFILESUPP) == 0) {
 593                 SO_UNBLOCK_FALLBACK(so);
 594                 return (EOPNOTSUPP);
 595         }
 596 
 597         error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
 598             B_FALSE);
 599 
 600         SO_UNBLOCK_FALLBACK(so);
 601 
 602         return (error);
 603 }
 604 
 605 int
 606 so_shutdown(struct sonode *so, int how, struct cred *cr)
 607 {
 608         int error;
 609 
 610         SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
 611 
 612         /*
 613          * SunOS 4.X has no check for datagram sockets.
 614          * 5.X checks that it is connected (ENOTCONN)
 615          * X/Open requires that we check the connected state.
 616          */
 617         if (!(so->so_state & SS_ISCONNECTED)) {
 618                 if (!xnet_skip_checks) {
 619                         error = ENOTCONN;
 620                         if (xnet_check_print) {
 621                                 printf("sockfs: X/Open shutdown check "
 622                                     "caused ENOTCONN\n");
 623                         }
 624                 }
 625                 goto done;
 626         }
 627 
 628         if (so->so_filter_active == 0 ||
 629             (error = sof_filter_shutdown(so, &how, cr)) < 0)
 630                 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
 631                     how, cr));
 632 
 633         /*
 634          * Protocol agreed to shutdown. We need to flush the
 635          * receive buffer if the receive side is being shutdown.
 636          */
 637         if (error == 0 && how != SHUT_WR) {
 638                 mutex_enter(&so->so_lock);
 639                 /* wait for active reader to finish */
 640                 (void) so_lock_read(so, 0);
 641 
 642                 so_rcv_flush(so);
 643 
 644                 so_unlock_read(so);
 645                 mutex_exit(&so->so_lock);
 646         }
 647 
 648 done:
 649         SO_UNBLOCK_FALLBACK(so);
 650         return (error);
 651 }
 652 
 653 int
 654 so_getsockname(struct sonode *so, struct sockaddr *addr,
 655     socklen_t *addrlen, struct cred *cr)
 656 {
 657         int error;
 658 
 659         SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 660 
 661         if (so->so_filter_active == 0 ||
 662             (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
 663                 error = (*so->so_downcalls->sd_getsockname)
 664                     (so->so_proto_handle, addr, addrlen, cr);
 665 
 666         SO_UNBLOCK_FALLBACK(so);
 667         return (error);
 668 }
 669 
 670 int
 671 so_getpeername(struct sonode *so, struct sockaddr *addr,
 672     socklen_t *addrlen, boolean_t accept, struct cred *cr)
 673 {
 674         int error;
 675 
 676         SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
 677 
 678         if (accept) {
 679                 error = (*so->so_downcalls->sd_getpeername)
 680                     (so->so_proto_handle, addr, addrlen, cr);
 681         } else if (!(so->so_state & SS_ISCONNECTED)) {
 682                 error = ENOTCONN;
 683         } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 684                 /* Added this check for X/Open */
 685                 error = EINVAL;
 686                 if (xnet_check_print) {
 687                         printf("sockfs: X/Open getpeername check => EINVAL\n");
 688                 }
 689         } else if (so->so_filter_active == 0 ||
 690             (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
 691                 error = (*so->so_downcalls->sd_getpeername)
 692                     (so->so_proto_handle, addr, addrlen, cr);
 693         }
 694 
 695         SO_UNBLOCK_FALLBACK(so);
 696         return (error);
 697 }
 698 
 699 int
 700 so_getsockopt(struct sonode *so, int level, int option_name,
 701     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
 702 {
 703         int error = 0;
 704 
 705         if (level == SOL_FILTER)
 706                 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 707 
 708         SO_BLOCK_FALLBACK(so,
 709             SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 710 
 711         if ((so->so_filter_active == 0 ||
 712             (error = sof_filter_getsockopt(so, level, option_name, optval,
 713             optlenp, cr)) < 0) &&
 714             (error = socket_getopt_common(so, level, option_name, optval,
 715             optlenp, flags)) < 0) {
 716                 error = (*so->so_downcalls->sd_getsockopt)
 717                     (so->so_proto_handle, level, option_name, optval, optlenp,
 718                     cr);
 719                 if (error ==  ENOPROTOOPT) {
 720                         if (level == SOL_SOCKET) {
 721                                 /*
 722                                  * If a protocol does not support a particular
 723                                  * socket option, set can fail (not allowed)
 724                                  * but get can not fail. This is the previous
 725                                  * sockfs bahvior.
 726                                  */
 727                                 switch (option_name) {
 728                                 case SO_LINGER:
 729                                         if (*optlenp < (t_uscalar_t)
 730                                             sizeof (struct linger)) {
 731                                                 error = EINVAL;
 732                                                 break;
 733                                         }
 734                                         error = 0;
 735                                         bzero(optval, sizeof (struct linger));
 736                                         *optlenp = sizeof (struct linger);
 737                                         break;
 738                                 case SO_RCVTIMEO:
 739                                 case SO_SNDTIMEO:
 740                                         if (*optlenp < (t_uscalar_t)
 741                                             sizeof (struct timeval)) {
 742                                                 error = EINVAL;
 743                                                 break;
 744                                         }
 745                                         error = 0;
 746                                         bzero(optval, sizeof (struct timeval));
 747                                         *optlenp = sizeof (struct timeval);
 748                                         break;
 749                                 case SO_SND_BUFINFO:
 750                                         if (*optlenp < (t_uscalar_t)
 751                                             sizeof (struct so_snd_bufinfo)) {
 752                                                 error = EINVAL;
 753                                                 break;
 754                                         }
 755                                         error = 0;
 756                                         bzero(optval,
 757                                             sizeof (struct so_snd_bufinfo));
 758                                         *optlenp =
 759                                             sizeof (struct so_snd_bufinfo);
 760                                         break;
 761                                 case SO_DEBUG:
 762                                 case SO_REUSEADDR:
 763                                 case SO_KEEPALIVE:
 764                                 case SO_DONTROUTE:
 765                                 case SO_BROADCAST:
 766                                 case SO_USELOOPBACK:
 767                                 case SO_OOBINLINE:
 768                                 case SO_DGRAM_ERRIND:
 769                                 case SO_SNDBUF:
 770                                 case SO_RCVBUF:
 771                                         error = 0;
 772                                         *((int32_t *)optval) = 0;
 773                                         *optlenp = sizeof (int32_t);
 774                                         break;
 775                                 default:
 776                                         break;
 777                                 }
 778                         }
 779                 }
 780         }
 781 
 782         SO_UNBLOCK_FALLBACK(so);
 783         return (error);
 784 }
 785 
 786 int
 787 so_setsockopt(struct sonode *so, int level, int option_name,
 788     const void *optval, socklen_t optlen, struct cred *cr)
 789 {
 790         int error = 0;
 791         struct timeval tl;
 792         const void *opt = optval;
 793 
 794         if (level == SOL_FILTER)
 795                 return (sof_setsockopt(so, option_name, optval, optlen, cr));
 796 
 797         SO_BLOCK_FALLBACK(so,
 798             SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 799 
 800         /* X/Open requires this check */
 801         if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
 802                 SO_UNBLOCK_FALLBACK(so);
 803                 if (xnet_check_print)
 804                         printf("sockfs: X/Open setsockopt check => EINVAL\n");
 805                 return (EINVAL);
 806         }
 807 
 808         if (so->so_filter_active > 0 &&
 809             (error = sof_filter_setsockopt(so, level, option_name,
 810             (void *)optval, &optlen, cr)) >= 0)
 811                 goto done;
 812 
 813         if (level == SOL_SOCKET) {
 814                 switch (option_name) {
 815                 case SO_RCVTIMEO:
 816                 case SO_SNDTIMEO: {
 817                         /*
 818                          * We pass down these two options to protocol in order
 819                          * to support some third part protocols which need to
 820                          * know them. For those protocols which don't care
 821                          * these two options, simply return 0.
 822                          */
 823                         clock_t t_usec;
 824 
 825                         if (get_udatamodel() == DATAMODEL_NONE ||
 826                             get_udatamodel() == DATAMODEL_NATIVE) {
 827                                 if (optlen != sizeof (struct timeval)) {
 828                                         error = EINVAL;
 829                                         goto done;
 830                                 }
 831                                 bcopy((struct timeval *)optval, &tl,
 832                                     sizeof (struct timeval));
 833                         } else {
 834                                 if (optlen != sizeof (struct timeval32)) {
 835                                         error = EINVAL;
 836                                         goto done;
 837                                 }
 838                                 TIMEVAL32_TO_TIMEVAL(&tl,
 839                                     (struct timeval32 *)optval);
 840                         }
 841                         opt = &tl;
 842                         optlen = sizeof (tl);
 843                         t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
 844                         mutex_enter(&so->so_lock);
 845                         if (option_name == SO_RCVTIMEO)
 846                                 so->so_rcvtimeo = drv_usectohz(t_usec);
 847                         else
 848                                 so->so_sndtimeo = drv_usectohz(t_usec);
 849                         mutex_exit(&so->so_lock);
 850                         break;
 851                 }
 852                 case SO_RCVBUF:
 853                         /*
 854                          * XXX XPG 4.2 applications retrieve SO_RCVBUF from
 855                          * sockfs since the transport might adjust the value
 856                          * and not return exactly what was set by the
 857                          * application.
 858                          */
 859                         so->so_xpg_rcvbuf = *(int32_t *)optval;
 860                         break;
 861                 }
 862         }
 863         error = (*so->so_downcalls->sd_setsockopt)
 864             (so->so_proto_handle, level, option_name, opt, optlen, cr);
 865 done:
 866         SO_UNBLOCK_FALLBACK(so);
 867         return (error);
 868 }
 869 
 870 int
 871 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
 872     struct cred *cr, int32_t *rvalp)
 873 {
 874         int error = 0;
 875 
 876         SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
 877 
 878         /*
 879          * If there is a pending error, return error
 880          * This can happen if a non blocking operation caused an error.
 881          */
 882         if (so->so_error != 0) {
 883                 mutex_enter(&so->so_lock);
 884                 error = sogeterr(so, B_TRUE);
 885                 mutex_exit(&so->so_lock);
 886                 if (error != 0)
 887                         goto done;
 888         }
 889 
 890         /*
 891          * calling strioc can result in the socket falling back to TPI,
 892          * if that is supported.
 893          */
 894         if ((so->so_filter_active == 0 ||
 895             (error = sof_filter_ioctl(so, cmd, arg, mode,
 896             rvalp, cr)) < 0) &&
 897             (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
 898             (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
 899                 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
 900                     cmd, arg, mode, rvalp, cr);
 901         }
 902 
 903 done:
 904         SO_UNBLOCK_FALLBACK(so);
 905 
 906         return (error);
 907 }
 908 
 909 int
 910 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
 911     struct pollhead **phpp)
 912 {
 913         int state = so->so_state, mask;
 914         *reventsp = 0;
 915 
 916         /*
 917          * In sockets the errors are represented as input/output events
 918          */
 919         if (so->so_error != 0 &&
 920             ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
 921                 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
 922                 return (0);
 923         }
 924 
 925         /*
 926          * If the socket is in a state where it can send data
 927          * turn on POLLWRBAND and POLLOUT events.
 928          */
 929         if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
 930                 /*
 931                  * out of band data is allowed even if the connection
 932                  * is flow controlled
 933                  */
 934                 *reventsp |= POLLWRBAND & events;
 935                 if (!SO_SND_FLOWCTRLD(so)) {
 936                         /*
 937                          * As long as there is buffer to send data
 938                          * turn on POLLOUT events
 939                          */
 940                         *reventsp |= POLLOUT & events;
 941                 }
 942         }
 943 
 944         /*
 945          * Turn on POLLIN whenever there is data on the receive queue,
 946          * or the socket is in a state where no more data will be received.
 947          * Also, if the socket is accepting connections, flip the bit if
 948          * there is something on the queue.
 949          *
 950          * We do an initial check for events without holding locks. However,
 951          * if there are no event available, then we redo the check for POLLIN
 952          * events under the lock.
 953          */
 954 
 955         /* Pending connections */
 956         if (!list_is_empty(&so->so_acceptq_list))
 957                 *reventsp |= (POLLIN|POLLRDNORM) & events;
 958 
 959         /*
 960          * If we're looking for POLLRDHUP, indicate it if we have sent the
 961          * last rx signal for the socket.
 962          */
 963         if ((events & POLLRDHUP) && (state & SS_SENTLASTREADSIG))
 964                 *reventsp |= POLLRDHUP;
 965 
 966         /* Data */
 967         /* so_downcalls is null for sctp */
 968         if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
 969                 *reventsp |= (*so->so_downcalls->sd_poll)
 970                     (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
 971                     CRED()) & events;
 972                 ASSERT((*reventsp & ~events) == 0);
 973                 /* do not recheck events */
 974                 events &= ~SO_PROTO_POLLEV;
 975         } else {
 976                 if (SO_HAVE_DATA(so))
 977                         *reventsp |= (POLLIN|POLLRDNORM) & events;
 978 
 979                 /* Urgent data */
 980                 if ((state & SS_OOBPEND) != 0) {
 981                         *reventsp |= (POLLRDBAND | POLLPRI) & events;
 982                 }
 983 
 984                 /*
 985                  * If the socket has become disconnected, we set POLLHUP.
 986                  * Note that if we are in this state, we will have set POLLIN
 987                  * (SO_HAVE_DATA() is true on a disconnected socket), but not
 988                  * POLLOUT (SS_ISCONNECTED is false).  This is in keeping with
 989                  * the semantics of POLLHUP, which is defined to be mutually
 990                  * exclusive with respect to POLLOUT but not POLLIN.  We are
 991                  * therefore setting POLLHUP primarily for the benefit of
 992                  * those not polling on POLLIN, as they have no other way of
 993                  * knowing that the socket has been disconnected.
 994                  */
 995                 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
 996 
 997                 if ((state & (mask | SS_ISCONNECTED)) == mask)
 998                         *reventsp |= POLLHUP;
 999         }
1000 
1001         if ((!*reventsp && !anyyet) || (events & POLLET)) {
1002                 /* Check for read events again, but this time under lock */
1003                 if (events & (POLLIN|POLLRDNORM)) {
1004                         mutex_enter(&so->so_lock);
1005                         if (SO_HAVE_DATA(so) ||
1006                             !list_is_empty(&so->so_acceptq_list)) {
1007                                 if (events & POLLET) {
1008                                         so->so_pollev |= SO_POLLEV_IN;
1009                                         *phpp = &so->so_poll_list;
1010                                 }
1011 
1012                                 mutex_exit(&so->so_lock);
1013                                 *reventsp |= (POLLIN|POLLRDNORM) & events;
1014 
1015                                 return (0);
1016                         } else {
1017                                 so->so_pollev |= SO_POLLEV_IN;
1018                                 mutex_exit(&so->so_lock);
1019                         }
1020                 }
1021                 *phpp = &so->so_poll_list;
1022         }
1023         return (0);
1024 }
1025 
1026 /*
1027  * Generic Upcalls
1028  */
1029 void
1030 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1031     cred_t *peer_cred, pid_t peer_cpid)
1032 {
1033         struct sonode *so = (struct sonode *)sock_handle;
1034 
1035         mutex_enter(&so->so_lock);
1036         ASSERT(so->so_proto_handle != NULL);
1037 
1038         if (peer_cred != NULL) {
1039                 if (so->so_peercred != NULL)
1040                         crfree(so->so_peercred);
1041                 crhold(peer_cred);
1042                 so->so_peercred = peer_cred;
1043                 so->so_cpid = peer_cpid;
1044         }
1045 
1046         so->so_proto_connid = id;
1047         soisconnected(so);
1048         /*
1049          * Wake ones who're waiting for conn to become established.
1050          */
1051         so_notify_connected(so);
1052 }
1053 
1054 int
1055 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1056 {
1057         struct sonode *so = (struct sonode *)sock_handle;
1058         boolean_t connect_failed;
1059 
1060         mutex_enter(&so->so_lock);
1061 
1062         /*
1063          * If we aren't currently connected, then this isn't a disconnect but
1064          * rather a failure to connect.
1065          */
1066         connect_failed = !(so->so_state & SS_ISCONNECTED);
1067 
1068         so->so_proto_connid = id;
1069         soisdisconnected(so, error);
1070         so_notify_disconnected(so, connect_failed, error);
1071 
1072         return (0);
1073 }
1074 
1075 void
1076 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1077     uintptr_t arg)
1078 {
1079         struct sonode *so = (struct sonode *)sock_handle;
1080 
1081         switch (action) {
1082         case SOCK_OPCTL_SHUT_SEND:
1083                 mutex_enter(&so->so_lock);
1084                 socantsendmore(so);
1085                 so_notify_disconnecting(so);
1086                 break;
1087         case SOCK_OPCTL_SHUT_RECV: {
1088                 mutex_enter(&so->so_lock);
1089                 socantrcvmore(so);
1090                 so_notify_eof(so);
1091                 break;
1092         }
1093         case SOCK_OPCTL_ENAB_ACCEPT:
1094                 mutex_enter(&so->so_lock);
1095                 so->so_state |= SS_ACCEPTCONN;
1096                 so->so_backlog = (unsigned int)arg;
1097                 /*
1098                  * The protocol can stop generating newconn upcalls when
1099                  * the backlog is full, so to make sure the listener does
1100                  * not end up with a queue full of deferred connections
1101                  * we reduce the backlog by one. Thus the listener will
1102                  * start closing deferred connections before the backlog
1103                  * is full.
1104                  */
1105                 if (so->so_filter_active > 0)
1106                         so->so_backlog = MAX(1, so->so_backlog - 1);
1107                 mutex_exit(&so->so_lock);
1108                 break;
1109         default:
1110                 ASSERT(0);
1111                 break;
1112         }
1113 }
1114 
1115 void
1116 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1117 {
1118         struct sonode *so = (struct sonode *)sock_handle;
1119 
1120         if (qfull) {
1121                 so_snd_qfull(so);
1122         } else {
1123                 so_snd_qnotfull(so);
1124                 mutex_enter(&so->so_lock);
1125                 /* so_notify_writable drops so_lock */
1126                 so_notify_writable(so);
1127         }
1128 }
1129 
1130 sock_upper_handle_t
1131 so_newconn(sock_upper_handle_t parenthandle,
1132     sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1133     struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1134 {
1135         struct sonode   *so = (struct sonode *)parenthandle;
1136         struct sonode   *nso;
1137         int error;
1138 
1139         ASSERT(proto_handle != NULL);
1140 
1141         if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1142             (so->so_acceptq_len >= so->so_backlog &&
1143             (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1144                         return (NULL);
1145         }
1146 
1147         nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1148             &error);
1149         if (nso == NULL)
1150                 return (NULL);
1151 
1152         if (peer_cred != NULL) {
1153                 crhold(peer_cred);
1154                 nso->so_peercred = peer_cred;
1155                 nso->so_cpid = peer_cpid;
1156         }
1157         nso->so_listener = so;
1158 
1159         /*
1160          * The new socket (nso), proto_handle and sock_upcallsp are all
1161          * valid at this point. But as soon as nso is placed in the accept
1162          * queue that can no longer be assumed (since an accept() thread may
1163          * pull it off the queue and close the socket).
1164          */
1165         *sock_upcallsp = &so_upcalls;
1166 
1167         mutex_enter(&so->so_acceptq_lock);
1168         if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1169                 mutex_exit(&so->so_acceptq_lock);
1170                 ASSERT(nso->so_count == 1);
1171                 nso->so_count--;
1172                 nso->so_listener = NULL;
1173                 /* drop proto ref */
1174                 VN_RELE(SOTOV(nso));
1175                 socket_destroy(nso);
1176                 return (NULL);
1177         } else {
1178                 so->so_acceptq_len++;
1179                 if (nso->so_state & SS_FIL_DEFER) {
1180                         list_insert_tail(&so->so_acceptq_defer, nso);
1181                         mutex_exit(&so->so_acceptq_lock);
1182                 } else {
1183                         list_insert_tail(&so->so_acceptq_list, nso);
1184                         cv_signal(&so->so_acceptq_cv);
1185                         mutex_exit(&so->so_acceptq_lock);
1186                         mutex_enter(&so->so_lock);
1187                         so_notify_newconn(so);
1188                 }
1189 
1190                 return ((sock_upper_handle_t)nso);
1191         }
1192 }
1193 
1194 void
1195 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1196 {
1197         struct sonode *so;
1198 
1199         so = (struct sonode *)sock_handle;
1200 
1201         mutex_enter(&so->so_lock);
1202 
1203         if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1204                 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1205         if (soppp->sopp_flags & SOCKOPT_WROFF)
1206                 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1207         if (soppp->sopp_flags & SOCKOPT_TAIL)
1208                 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1209         if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1210                 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1211         if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1212                 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1213         if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1214                 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1215         if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1216                 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1217         if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1218                 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1219                         so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1220                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1221                 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1222                         so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1223                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1224                 }
1225 
1226                 if (soppp->sopp_zcopyflag & COPYCACHED) {
1227                         so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1228                 }
1229         }
1230         if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1231                 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1232         if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1233                 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1234         if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1235                 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1236         if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1237                 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1238         if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1239                 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1240 
1241         mutex_exit(&so->so_lock);
1242 
1243         if (so->so_filter_active > 0) {
1244                 sof_instance_t *inst;
1245                 ssize_t maxblk;
1246                 ushort_t wroff, tail;
1247                 maxblk = so->so_proto_props.sopp_maxblk;
1248                 wroff = so->so_proto_props.sopp_wroff;
1249                 tail = so->so_proto_props.sopp_tail;
1250                 for (inst = so->so_filter_bottom; inst != NULL;
1251                     inst = inst->sofi_prev) {
1252                         if (SOF_INTERESTED(inst, mblk_prop)) {
1253                                 (*inst->sofi_ops->sofop_mblk_prop)(
1254                                     (sof_handle_t)inst, inst->sofi_cookie,
1255                                     &maxblk, &wroff, &tail);
1256                         }
1257                 }
1258                 mutex_enter(&so->so_lock);
1259                 so->so_proto_props.sopp_maxblk = maxblk;
1260                 so->so_proto_props.sopp_wroff = wroff;
1261                 so->so_proto_props.sopp_tail = tail;
1262                 mutex_exit(&so->so_lock);
1263         }
1264 #ifdef DEBUG
1265         soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1266             SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1267             SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1268             SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1269             SOCKOPT_LOOPBACK);
1270         ASSERT(soppp->sopp_flags == 0);
1271 #endif
1272 }
1273 
1274 /* ARGSUSED */
1275 ssize_t
1276 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1277     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp,
1278     sof_instance_t *filter)
1279 {
1280         boolean_t force_push = B_TRUE;
1281         int space_left;
1282         sodirect_t *sodp = so->so_direct;
1283 
1284         ASSERT(errorp != NULL);
1285         *errorp = 0;
1286         if (mp == NULL) {
1287                 if (so->so_downcalls->sd_recv_uio != NULL) {
1288                         mutex_enter(&so->so_lock);
1289                         /* the notify functions will drop the lock */
1290                         if (flags & MSG_OOB)
1291                                 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1292                         else
1293                                 so_notify_data(so, msg_size);
1294                         return (0);
1295                 }
1296                 ASSERT(msg_size == 0);
1297                 mutex_enter(&so->so_lock);
1298                 goto space_check;
1299         }
1300 
1301         ASSERT(mp->b_next == NULL);
1302         ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1303         ASSERT(msg_size == msgdsize(mp));
1304 
1305         if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1306                 /* The read pointer is not aligned correctly for TPI */
1307                 zcmn_err(getzoneid(), CE_WARN,
1308                     "sockfs: Unaligned TPI message received. rptr = %p\n",
1309                     (void *)mp->b_rptr);
1310                 freemsg(mp);
1311                 mutex_enter(&so->so_lock);
1312                 if (sodp != NULL)
1313                         SOD_UIOAFINI(sodp);
1314                 goto space_check;
1315         }
1316 
1317         if (so->so_filter_active > 0) {
1318                 for (; filter != NULL; filter = filter->sofi_prev) {
1319                         if (!SOF_INTERESTED(filter, data_in))
1320                                 continue;
1321                         mp = (*filter->sofi_ops->sofop_data_in)(
1322                             (sof_handle_t)filter, filter->sofi_cookie, mp,
1323                             flags, &msg_size);
1324                         ASSERT(msgdsize(mp) == msg_size);
1325                         DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1326                             (mblk_t *), mp);
1327                         /* Data was consumed/dropped, just do space check */
1328                         if (msg_size == 0) {
1329                                 mutex_enter(&so->so_lock);
1330                                 goto space_check;
1331                         }
1332                 }
1333         }
1334 
1335         if (flags & MSG_OOB) {
1336                 so_queue_oob(so, mp, msg_size);
1337                 mutex_enter(&so->so_lock);
1338                 goto space_check;
1339         }
1340 
1341         if (force_pushp != NULL)
1342                 force_push = *force_pushp;
1343 
1344         mutex_enter(&so->so_lock);
1345         if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1346                 if (sodp != NULL)
1347                         SOD_DISABLE(sodp);
1348                 mutex_exit(&so->so_lock);
1349                 *errorp = EOPNOTSUPP;
1350                 return (-1);
1351         }
1352         if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1353                 freemsg(mp);
1354                 if (sodp != NULL)
1355                         SOD_DISABLE(sodp);
1356                 mutex_exit(&so->so_lock);
1357                 return (0);
1358         }
1359 
1360         /* process the mblk via I/OAT if capable */
1361         if (sodp != NULL && sodp->sod_enabled) {
1362                 if (DB_TYPE(mp) == M_DATA) {
1363                         sod_uioa_mblk_init(sodp, mp, msg_size);
1364                 } else {
1365                         SOD_UIOAFINI(sodp);
1366                 }
1367         }
1368 
1369         if (mp->b_next == NULL) {
1370                 so_enqueue_msg(so, mp, msg_size);
1371         } else {
1372                 do {
1373                         mblk_t *nmp;
1374 
1375                         if ((nmp = mp->b_next) != NULL) {
1376                                 mp->b_next = NULL;
1377                         }
1378                         so_enqueue_msg(so, mp, msgdsize(mp));
1379                         mp = nmp;
1380                 } while (mp != NULL);
1381         }
1382 
1383         space_left = so->so_rcvbuf - so->so_rcv_queued;
1384         if (space_left <= 0) {
1385                 so->so_flowctrld = B_TRUE;
1386                 *errorp = ENOSPC;
1387                 space_left = -1;
1388         }
1389 
1390         if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1391             so->so_rcv_queued >= so->so_rcv_wanted) {
1392                 SOCKET_TIMER_CANCEL(so);
1393                 /*
1394                  * so_notify_data will release the lock
1395                  */
1396                 so_notify_data(so, so->so_rcv_queued);
1397 
1398                 if (force_pushp != NULL)
1399                         *force_pushp = B_TRUE;
1400                 goto done;
1401         } else if (so->so_rcv_timer_tid == 0) {
1402                 /* Make sure the recv push timer is running */
1403                 SOCKET_TIMER_START(so);
1404         }
1405 
1406 done_unlock:
1407         mutex_exit(&so->so_lock);
1408 done:
1409         return (space_left);
1410 
1411 space_check:
1412         space_left = so->so_rcvbuf - so->so_rcv_queued;
1413         if (space_left <= 0) {
1414                 so->so_flowctrld = B_TRUE;
1415                 *errorp = ENOSPC;
1416                 space_left = -1;
1417         }
1418         goto done_unlock;
1419 }
1420 
1421 #pragma inline(so_queue_msg_impl)
1422 
1423 ssize_t
1424 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1425     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1426 {
1427         struct sonode *so = (struct sonode *)sock_handle;
1428 
1429         return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1430             so->so_filter_bottom));
1431 }
1432 
1433 /*
1434  * Set the offset of where the oob data is relative to the bytes in
1435  * queued. Also generate SIGURG
1436  */
1437 void
1438 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1439 {
1440         struct sonode *so;
1441 
1442         ASSERT(offset >= 0);
1443         so = (struct sonode *)sock_handle;
1444         mutex_enter(&so->so_lock);
1445         if (so->so_direct != NULL)
1446                 SOD_UIOAFINI(so->so_direct);
1447 
1448         /*
1449          * New urgent data on the way so forget about any old
1450          * urgent data.
1451          */
1452         so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1453 
1454         /*
1455          * Record that urgent data is pending.
1456          */
1457         so->so_state |= SS_OOBPEND;
1458 
1459         if (so->so_oobmsg != NULL) {
1460                 dprintso(so, 1, ("sock: discarding old oob\n"));
1461                 freemsg(so->so_oobmsg);
1462                 so->so_oobmsg = NULL;
1463         }
1464 
1465         /*
1466          * set the offset where the urgent byte is
1467          */
1468         so->so_oobmark = so->so_rcv_queued + offset;
1469         if (so->so_oobmark == 0)
1470                 so->so_state |= SS_RCVATMARK;
1471         else
1472                 so->so_state &= ~SS_RCVATMARK;
1473 
1474         so_notify_oobsig(so);
1475 }
1476 
1477 /*
1478  * Queue the OOB byte
1479  */
1480 static void
1481 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1482 {
1483         mutex_enter(&so->so_lock);
1484         if (so->so_direct != NULL)
1485                 SOD_UIOAFINI(so->so_direct);
1486 
1487         ASSERT(mp != NULL);
1488         if (!IS_SO_OOB_INLINE(so)) {
1489                 so->so_oobmsg = mp;
1490                 so->so_state |= SS_HAVEOOBDATA;
1491         } else {
1492                 so_enqueue_msg(so, mp, len);
1493         }
1494 
1495         so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1496 }
1497 
1498 int
1499 so_close(struct sonode *so, int flag, struct cred *cr)
1500 {
1501         int error;
1502 
1503         /*
1504          * No new data will be enqueued once the CLOSING flag is set.
1505          */
1506         mutex_enter(&so->so_lock);
1507         so->so_state |= SS_CLOSING;
1508         ASSERT(so_verify_oobstate(so));
1509         so_rcv_flush(so);
1510         mutex_exit(&so->so_lock);
1511 
1512         if (so->so_filter_active > 0)
1513                 sof_sonode_closing(so);
1514 
1515         if (so->so_state & SS_ACCEPTCONN) {
1516                 /*
1517                  * We grab and release the accept lock to ensure that any
1518                  * thread about to insert a socket in so_newconn completes
1519                  * before we flush the queue. Any thread calling so_newconn
1520                  * after we drop the lock will observe the SS_CLOSING flag,
1521                  * which will stop it from inserting the socket in the queue.
1522                  */
1523                 mutex_enter(&so->so_acceptq_lock);
1524                 mutex_exit(&so->so_acceptq_lock);
1525 
1526                 so_acceptq_flush(so, B_TRUE);
1527         }
1528 
1529         error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1530         switch (error) {
1531         default:
1532                 /* Protocol made a synchronous close; remove proto ref */
1533                 VN_RELE(SOTOV(so));
1534                 break;
1535         case EINPROGRESS:
1536                 /*
1537                  * Protocol is in the process of closing, it will make a
1538                  * 'closed' upcall to remove the reference.
1539                  */
1540                 error = 0;
1541                 break;
1542         }
1543 
1544         return (error);
1545 }
1546 
1547 /*
1548  * Upcall made by the protocol when it's doing an asynchronous close. It
1549  * will drop the protocol's reference on the socket.
1550  */
1551 void
1552 so_closed(sock_upper_handle_t sock_handle)
1553 {
1554         struct sonode *so = (struct sonode *)sock_handle;
1555 
1556         VN_RELE(SOTOV(so));
1557 }
1558 
1559 mblk_t *
1560 so_get_sock_pid_mblk(sock_upper_handle_t sock_handle)
1561 {
1562         ulong_t sz, n;
1563         mblk_t *mblk;
1564         pid_node_t *pn;
1565         pid_t *pids;
1566         conn_pid_info_t *cpi;
1567         struct sonode *so = (struct sonode *)sock_handle;
1568 
1569         mutex_enter(&so->so_pid_tree_lock);
1570 
1571         n = avl_numnodes(&so->so_pid_tree);
1572         sz = sizeof (conn_pid_info_t);
1573         sz += (n > 1) ? ((n - 1) * sizeof (pid_t)) : 0;
1574         if ((mblk = allocb(sz, BPRI_HI)) == NULL) {
1575                 mutex_exit(&so->so_pid_tree_lock);
1576                 return (NULL);
1577         }
1578         mblk->b_wptr += sz;
1579         cpi = (conn_pid_info_t *)mblk->b_datap->db_base;
1580 
1581         cpi->cpi_contents = CONN_PID_INFO_SOC;
1582         cpi->cpi_pids_cnt = n;
1583         cpi->cpi_tot_size = sz;
1584         cpi->cpi_pids[0] = 0;
1585 
1586         if (cpi->cpi_pids_cnt > 0) {
1587                 pids = cpi->cpi_pids;
1588                 for (pn = avl_first(&so->so_pid_tree); pn != NULL;
1589                     pids++, pn = AVL_NEXT(&so->so_pid_tree, pn))
1590                         *pids = pn->pn_pid;
1591         }
1592         mutex_exit(&so->so_pid_tree_lock);
1593         return (mblk);
1594 }
1595 
1596 void
1597 so_zcopy_notify(sock_upper_handle_t sock_handle)
1598 {
1599         struct sonode *so = (struct sonode *)sock_handle;
1600 
1601         mutex_enter(&so->so_lock);
1602         so->so_copyflag |= STZCNOTIFY;
1603         cv_broadcast(&so->so_copy_cv);
1604         mutex_exit(&so->so_lock);
1605 }
1606 
1607 void
1608 so_set_error(sock_upper_handle_t sock_handle, int error)
1609 {
1610         struct sonode *so = (struct sonode *)sock_handle;
1611 
1612         mutex_enter(&so->so_lock);
1613 
1614         soseterror(so, error);
1615 
1616         so_notify_error(so);
1617 }
1618 
1619 /*
1620  * so_recvmsg - read data from the socket
1621  *
1622  * There are two ways of obtaining data; either we ask the protocol to
1623  * copy directly into the supplied buffer, or we copy data from the
1624  * sonode's receive queue. The decision which one to use depends on
1625  * whether the protocol has a sd_recv_uio down call.
1626  */
1627 int
1628 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1629     struct cred *cr)
1630 {
1631         rval_t          rval;
1632         int             flags = 0;
1633         t_uscalar_t     controllen, namelen;
1634         int             error = 0;
1635         int ret;
1636         mblk_t          *mctlp = NULL;
1637         union T_primitives *tpr;
1638         void            *control;
1639         ssize_t         saved_resid;
1640         struct uio      *suiop;
1641 
1642         SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1643 
1644         if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1645             (so->so_mode & SM_CONNREQUIRED)) {
1646                 SO_UNBLOCK_FALLBACK(so);
1647                 return (ENOTCONN);
1648         }
1649 
1650         if (msg->msg_flags & MSG_PEEK)
1651                 msg->msg_flags &= ~MSG_WAITALL;
1652 
1653         if (so->so_mode & SM_ATOMIC)
1654                 msg->msg_flags |= MSG_TRUNC;
1655 
1656         if (msg->msg_flags & MSG_OOB) {
1657                 if ((so->so_mode & SM_EXDATA) == 0) {
1658                         error = EOPNOTSUPP;
1659                 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1660                         error = (*so->so_downcalls->sd_recv_uio)
1661                             (so->so_proto_handle, uiop, msg, cr);
1662                 } else {
1663                         error = sorecvoob(so, msg, uiop, msg->msg_flags,
1664                             IS_SO_OOB_INLINE(so));
1665                 }
1666                 SO_UNBLOCK_FALLBACK(so);
1667                 return (error);
1668         }
1669 
1670         /*
1671          * If the protocol has the recv down call, then pass the request
1672          * down.
1673          */
1674         if (so->so_downcalls->sd_recv_uio != NULL) {
1675                 error = (*so->so_downcalls->sd_recv_uio)
1676                     (so->so_proto_handle, uiop, msg, cr);
1677                 SO_UNBLOCK_FALLBACK(so);
1678                 return (error);
1679         }
1680 
1681         /*
1682          * Reading data from the socket buffer
1683          */
1684         flags = msg->msg_flags;
1685         msg->msg_flags = 0;
1686 
1687         /*
1688          * Set msg_controllen and msg_namelen to zero here to make it
1689          * simpler in the cases that no control or name is returned.
1690          */
1691         controllen = msg->msg_controllen;
1692         namelen = msg->msg_namelen;
1693         msg->msg_controllen = 0;
1694         msg->msg_namelen = 0;
1695 
1696         mutex_enter(&so->so_lock);
1697         /* Set SOREADLOCKED */
1698         error = so_lock_read_intr(so,
1699             uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1700         mutex_exit(&so->so_lock);
1701         if (error) {
1702                 SO_UNBLOCK_FALLBACK(so);
1703                 return (error);
1704         }
1705 
1706         suiop = sod_rcv_init(so, flags, &uiop);
1707 retry:
1708         saved_resid = uiop->uio_resid;
1709         error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1710         if (error != 0) {
1711                 goto out;
1712         }
1713         /*
1714          * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1715          * For non-datagrams MOREDATA is used to set MSG_EOR.
1716          */
1717         ASSERT(!(rval.r_val1 & MORECTL));
1718         if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1719                 msg->msg_flags |= MSG_TRUNC;
1720         if (mctlp == NULL) {
1721                 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1722 
1723                 mutex_enter(&so->so_lock);
1724                 /* Set MSG_EOR based on MOREDATA */
1725                 if (!(rval.r_val1 & MOREDATA)) {
1726                         if (so->so_state & SS_SAVEDEOR) {
1727                                 msg->msg_flags |= MSG_EOR;
1728                                 so->so_state &= ~SS_SAVEDEOR;
1729                         }
1730                 }
1731                 /*
1732                  * If some data was received (i.e. not EOF) and the
1733                  * read/recv* has not been satisfied wait for some more.
1734                  */
1735                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1736                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1737                         mutex_exit(&so->so_lock);
1738                         flags |= MSG_NOMARK;
1739                         goto retry;
1740                 }
1741 
1742                 goto out_locked;
1743         }
1744         /* so_queue_msg has already verified length and alignment */
1745         tpr = (union T_primitives *)mctlp->b_rptr;
1746         dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1747         switch (tpr->type) {
1748         case T_DATA_IND: {
1749                 /*
1750                  * Set msg_flags to MSG_EOR based on
1751                  * MORE_flag and MOREDATA.
1752                  */
1753                 mutex_enter(&so->so_lock);
1754                 so->so_state &= ~SS_SAVEDEOR;
1755                 if (!(tpr->data_ind.MORE_flag & 1)) {
1756                         if (!(rval.r_val1 & MOREDATA))
1757                                 msg->msg_flags |= MSG_EOR;
1758                         else
1759                                 so->so_state |= SS_SAVEDEOR;
1760                 }
1761                 freemsg(mctlp);
1762                 /*
1763                  * If some data was received (i.e. not EOF) and the
1764                  * read/recv* has not been satisfied wait for some more.
1765                  */
1766                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1767                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1768                         mutex_exit(&so->so_lock);
1769                         flags |= MSG_NOMARK;
1770                         goto retry;
1771                 }
1772                 goto out_locked;
1773         }
1774         case T_UNITDATA_IND: {
1775                 void *addr;
1776                 t_uscalar_t addrlen;
1777                 void *abuf;
1778                 t_uscalar_t optlen;
1779                 void *opt;
1780 
1781                 if (namelen != 0) {
1782                         /* Caller wants source address */
1783                         addrlen = tpr->unitdata_ind.SRC_length;
1784                         addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1785                             addrlen, 1);
1786                         if (addr == NULL) {
1787                                 freemsg(mctlp);
1788                                 error = EPROTO;
1789                                 eprintsoline(so, error);
1790                                 goto out;
1791                         }
1792                         ASSERT(so->so_family != AF_UNIX);
1793                 }
1794                 optlen = tpr->unitdata_ind.OPT_length;
1795                 if (optlen != 0) {
1796                         t_uscalar_t ncontrollen;
1797 
1798                         /*
1799                          * Extract any source address option.
1800                          * Determine how large cmsg buffer is needed.
1801                          */
1802                         opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1803                             optlen, __TPI_ALIGN_SIZE);
1804 
1805                         if (opt == NULL) {
1806                                 freemsg(mctlp);
1807                                 error = EPROTO;
1808                                 eprintsoline(so, error);
1809                                 goto out;
1810                         }
1811                         if (so->so_family == AF_UNIX)
1812                                 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1813                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1814                             !(flags & MSG_XPG4_2));
1815                         if (controllen != 0)
1816                                 controllen = ncontrollen;
1817                         else if (ncontrollen != 0)
1818                                 msg->msg_flags |= MSG_CTRUNC;
1819                 } else {
1820                         controllen = 0;
1821                 }
1822 
1823                 if (namelen != 0) {
1824                         /*
1825                          * Return address to caller.
1826                          * Caller handles truncation if length
1827                          * exceeds msg_namelen.
1828                          * NOTE: AF_UNIX NUL termination is ensured by
1829                          * the sender's copyin_name().
1830                          */
1831                         abuf = kmem_alloc(addrlen, KM_SLEEP);
1832 
1833                         bcopy(addr, abuf, addrlen);
1834                         msg->msg_name = abuf;
1835                         msg->msg_namelen = addrlen;
1836                 }
1837 
1838                 if (controllen != 0) {
1839                         /*
1840                          * Return control msg to caller.
1841                          * Caller handles truncation if length
1842                          * exceeds msg_controllen.
1843                          */
1844                         control = kmem_zalloc(controllen, KM_SLEEP);
1845 
1846                         error = so_opt2cmsg(mctlp, opt, optlen,
1847                             !(flags & MSG_XPG4_2), control, controllen);
1848                         if (error) {
1849                                 freemsg(mctlp);
1850                                 if (msg->msg_namelen != 0)
1851                                         kmem_free(msg->msg_name,
1852                                             msg->msg_namelen);
1853                                 kmem_free(control, controllen);
1854                                 eprintsoline(so, error);
1855                                 goto out;
1856                         }
1857                         msg->msg_control = control;
1858                         msg->msg_controllen = controllen;
1859                 }
1860 
1861                 freemsg(mctlp);
1862                 goto out;
1863         }
1864         case T_OPTDATA_IND: {
1865                 struct T_optdata_req *tdr;
1866                 void *opt;
1867                 t_uscalar_t optlen;
1868 
1869                 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1870                 optlen = tdr->OPT_length;
1871                 if (optlen != 0) {
1872                         t_uscalar_t ncontrollen;
1873                         /*
1874                          * Determine how large cmsg buffer is needed.
1875                          */
1876                         opt = sogetoff(mctlp,
1877                             tpr->optdata_ind.OPT_offset, optlen,
1878                             __TPI_ALIGN_SIZE);
1879 
1880                         if (opt == NULL) {
1881                                 freemsg(mctlp);
1882                                 error = EPROTO;
1883                                 eprintsoline(so, error);
1884                                 goto out;
1885                         }
1886 
1887                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1888                             !(flags & MSG_XPG4_2));
1889                         if (controllen != 0)
1890                                 controllen = ncontrollen;
1891                         else if (ncontrollen != 0)
1892                                 msg->msg_flags |= MSG_CTRUNC;
1893                 } else {
1894                         controllen = 0;
1895                 }
1896 
1897                 if (controllen != 0) {
1898                         /*
1899                          * Return control msg to caller.
1900                          * Caller handles truncation if length
1901                          * exceeds msg_controllen.
1902                          */
1903                         control = kmem_zalloc(controllen, KM_SLEEP);
1904 
1905                         error = so_opt2cmsg(mctlp, opt, optlen,
1906                             !(flags & MSG_XPG4_2), control, controllen);
1907                         if (error) {
1908                                 freemsg(mctlp);
1909                                 kmem_free(control, controllen);
1910                                 eprintsoline(so, error);
1911                                 goto out;
1912                         }
1913                         msg->msg_control = control;
1914                         msg->msg_controllen = controllen;
1915                 }
1916 
1917                 /*
1918                  * Set msg_flags to MSG_EOR based on
1919                  * DATA_flag and MOREDATA.
1920                  */
1921                 mutex_enter(&so->so_lock);
1922                 so->so_state &= ~SS_SAVEDEOR;
1923                 if (!(tpr->data_ind.MORE_flag & 1)) {
1924                         if (!(rval.r_val1 & MOREDATA))
1925                                 msg->msg_flags |= MSG_EOR;
1926                         else
1927                                 so->so_state |= SS_SAVEDEOR;
1928                 }
1929                 freemsg(mctlp);
1930                 /*
1931                  * If some data was received (i.e. not EOF) and the
1932                  * read/recv* has not been satisfied wait for some more.
1933                  * Not possible to wait if control info was received.
1934                  */
1935                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1936                     controllen == 0 &&
1937                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1938                         mutex_exit(&so->so_lock);
1939                         flags |= MSG_NOMARK;
1940                         goto retry;
1941                 }
1942                 goto out_locked;
1943         }
1944         default:
1945                 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1946                     tpr->type);
1947                 freemsg(mctlp);
1948                 error = EPROTO;
1949                 ASSERT(0);
1950         }
1951 out:
1952         mutex_enter(&so->so_lock);
1953 out_locked:
1954         ret = sod_rcv_done(so, suiop, uiop);
1955         if (ret != 0 && error == 0)
1956                 error = ret;
1957 
1958         so_unlock_read(so);     /* Clear SOREADLOCKED */
1959         mutex_exit(&so->so_lock);
1960 
1961         SO_UNBLOCK_FALLBACK(so);
1962 
1963         return (error);
1964 }
1965 
1966 sonodeops_t so_sonodeops = {
1967         so_init,                /* sop_init     */
1968         so_accept,              /* sop_accept   */
1969         so_bind,                /* sop_bind     */
1970         so_listen,              /* sop_listen   */
1971         so_connect,             /* sop_connect  */
1972         so_recvmsg,             /* sop_recvmsg  */
1973         so_sendmsg,             /* sop_sendmsg  */
1974         so_sendmblk,            /* sop_sendmblk */
1975         so_getpeername,         /* sop_getpeername */
1976         so_getsockname,         /* sop_getsockname */
1977         so_shutdown,            /* sop_shutdown */
1978         so_getsockopt,          /* sop_getsockopt */
1979         so_setsockopt,          /* sop_setsockopt */
1980         so_ioctl,               /* sop_ioctl    */
1981         so_poll,                /* sop_poll     */
1982         so_close,               /* sop_close */
1983 };
1984 
1985 sock_upcalls_t so_upcalls = {
1986         so_newconn,
1987         so_connected,
1988         so_disconnected,
1989         so_opctl,
1990         so_queue_msg,
1991         so_set_prop,
1992         so_txq_full,
1993         so_signal_oob,
1994         so_zcopy_notify,
1995         so_set_error,
1996         so_closed,
1997         so_get_sock_pid_mblk
1998 };