1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2014, Joyent, Inc.  All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/debug.h>
  35 #include <sys/cmn_err.h>
  36 
  37 #include <sys/stropts.h>
  38 #include <sys/socket.h>
  39 #include <sys/socketvar.h>
  40 #include <sys/fcntl.h>
  41 
  42 #define _SUN_TPI_VERSION        2
  43 #include <sys/tihdr.h>
  44 #include <sys/sockio.h>
  45 #include <sys/kmem_impl.h>
  46 
  47 #include <sys/strsubr.h>
  48 #include <sys/strsun.h>
  49 #include <sys/ddi.h>
  50 #include <netinet/in.h>
  51 #include <inet/ip.h>
  52 
  53 #include <fs/sockfs/sockcommon.h>
  54 #include <fs/sockfs/sockfilter_impl.h>
  55 
  56 #include <sys/socket_proto.h>
  57 
  58 #include <fs/sockfs/socktpi_impl.h>
  59 #include <fs/sockfs/sodirect.h>
  60 #include <sys/tihdr.h>
  61 #include <fs/sockfs/nl7c.h>
  62 
  63 extern int xnet_skip_checks;
  64 extern int xnet_check_print;
  65 
  66 static void so_queue_oob(struct sonode *, mblk_t *, size_t);
  67 
  68 
  69 /*ARGSUSED*/
  70 int
  71 so_accept_notsupp(struct sonode *lso, int fflag,
  72     struct cred *cr, struct sonode **nsop)
  73 {
  74         return (EOPNOTSUPP);
  75 }
  76 
  77 /*ARGSUSED*/
  78 int
  79 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr)
  80 {
  81         return (EOPNOTSUPP);
  82 }
  83 
  84 /*ARGSUSED*/
  85 int
  86 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa,
  87     socklen_t *len, struct cred *cr)
  88 {
  89         return (EOPNOTSUPP);
  90 }
  91 
  92 /*ARGSUSED*/
  93 int
  94 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr,
  95     socklen_t *addrlen, boolean_t accept, struct cred *cr)
  96 {
  97         return (EOPNOTSUPP);
  98 }
  99 
 100 /*ARGSUSED*/
 101 int
 102 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr)
 103 {
 104         return (EOPNOTSUPP);
 105 }
 106 
 107 /*ARGSUSED*/
 108 int
 109 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag,
 110     struct cred *cr, mblk_t **mpp)
 111 {
 112         return (EOPNOTSUPP);
 113 }
 114 
 115 /*
 116  * Generic Socket Ops
 117  */
 118 
 119 /* ARGSUSED */
 120 int
 121 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags)
 122 {
 123         return (socket_init_common(so, pso, flags, cr));
 124 }
 125 
 126 int
 127 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
 128     int flags, struct cred *cr)
 129 {
 130         int error;
 131 
 132         SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
 133 
 134         ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
 135 
 136         /* X/Open requires this check */
 137         if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 138                 if (xnet_check_print) {
 139                         printf("sockfs: X/Open bind state check "
 140                             "caused EINVAL\n");
 141                 }
 142                 error = EINVAL;
 143                 goto done;
 144         }
 145 
 146         /*
 147          * a bind to a NULL address is interpreted as unbind. So just
 148          * do the downcall.
 149          */
 150         if (name == NULL)
 151                 goto dobind;
 152 
 153         switch (so->so_family) {
 154         case AF_INET:
 155                 if ((size_t)namelen != sizeof (sin_t)) {
 156                         error = name->sa_family != so->so_family ?
 157                             EAFNOSUPPORT : EINVAL;
 158                         eprintsoline(so, error);
 159                         goto done;
 160                 }
 161 
 162                 if ((flags & _SOBIND_XPG4_2) &&
 163                     (name->sa_family != so->so_family)) {
 164                         /*
 165                          * This check has to be made for X/Open
 166                          * sockets however application failures have
 167                          * been observed when it is applied to
 168                          * all sockets.
 169                          */
 170                         error = EAFNOSUPPORT;
 171                         eprintsoline(so, error);
 172                         goto done;
 173                 }
 174                 /*
 175                  * Force a zero sa_family to match so_family.
 176                  *
 177                  * Some programs like inetd(1M) don't set the
 178                  * family field. Other programs leave
 179                  * sin_family set to garbage - SunOS 4.X does
 180                  * not check the family field on a bind.
 181                  * We use the family field that
 182                  * was passed in to the socket() call.
 183                  */
 184                 name->sa_family = so->so_family;
 185                 break;
 186 
 187         case AF_INET6: {
 188 #ifdef DEBUG
 189                 sin6_t *sin6 = (sin6_t *)name;
 190 #endif
 191                 if ((size_t)namelen != sizeof (sin6_t)) {
 192                         error = name->sa_family != so->so_family ?
 193                             EAFNOSUPPORT : EINVAL;
 194                         eprintsoline(so, error);
 195                         goto done;
 196                 }
 197 
 198                 if (name->sa_family != so->so_family) {
 199                         /*
 200                          * With IPv6 we require the family to match
 201                          * unlike in IPv4.
 202                          */
 203                         error = EAFNOSUPPORT;
 204                         eprintsoline(so, error);
 205                         goto done;
 206                 }
 207 #ifdef DEBUG
 208                 /*
 209                  * Verify that apps don't forget to clear
 210                  * sin6_scope_id etc
 211                  */
 212                 if (sin6->sin6_scope_id != 0 &&
 213                     !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
 214                         zcmn_err(getzoneid(), CE_WARN,
 215                             "bind with uninitialized sin6_scope_id "
 216                             "(%d) on socket. Pid = %d\n",
 217                             (int)sin6->sin6_scope_id,
 218                             (int)curproc->p_pid);
 219                 }
 220                 if (sin6->__sin6_src_id != 0) {
 221                         zcmn_err(getzoneid(), CE_WARN,
 222                             "bind with uninitialized __sin6_src_id "
 223                             "(%d) on socket. Pid = %d\n",
 224                             (int)sin6->__sin6_src_id,
 225                             (int)curproc->p_pid);
 226                 }
 227 #endif /* DEBUG */
 228 
 229                 break;
 230         }
 231         default:
 232                 /* Just pass the request to the protocol */
 233                 goto dobind;
 234         }
 235 
 236         /*
 237          * First we check if either NCA or KSSL has been enabled for
 238          * the requested address, and if so, we fall back to TPI.
 239          * If neither of those two services are enabled, then we just
 240          * pass the request to the protocol.
 241          *
 242          * Note that KSSL can only be enabled on a socket if NCA is NOT
 243          * enabled for that socket, hence the else-statement below.
 244          */
 245         if (nl7c_enabled && ((so->so_family == AF_INET ||
 246             so->so_family == AF_INET6) &&
 247             nl7c_lookup_addr(name, namelen) != NULL)) {
 248                 /*
 249                  * NL7C is not supported in non-global zones,
 250                  * we enforce this restriction here.
 251                  */
 252                 if (so->so_zoneid == GLOBAL_ZONEID) {
 253                         /* NCA should be used, so fall back to TPI */
 254                         error = so_tpi_fallback(so, cr);
 255                         SO_UNBLOCK_FALLBACK(so);
 256                         if (error)
 257                                 return (error);
 258                         else
 259                                 return (SOP_BIND(so, name, namelen, flags, cr));
 260                 }
 261         }
 262 
 263 dobind:
 264         if (so->so_filter_active == 0 ||
 265             (error = sof_filter_bind(so, name, &namelen, cr)) < 0) {
 266                 error = (*so->so_downcalls->sd_bind)
 267                     (so->so_proto_handle, name, namelen, cr);
 268         }
 269 done:
 270         SO_UNBLOCK_FALLBACK(so);
 271 
 272         return (error);
 273 }
 274 
 275 int
 276 so_listen(struct sonode *so, int backlog, struct cred *cr)
 277 {
 278         int     error = 0;
 279 
 280         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 281         SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr));
 282 
 283         if ((so)->so_filter_active == 0 ||
 284             (error = sof_filter_listen(so, &backlog, cr)) < 0)
 285                 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle,
 286                     backlog, cr);
 287 
 288         SO_UNBLOCK_FALLBACK(so);
 289 
 290         return (error);
 291 }
 292 
 293 
 294 int
 295 so_connect(struct sonode *so, struct sockaddr *name,
 296     socklen_t namelen, int fflag, int flags, struct cred *cr)
 297 {
 298         int error = 0;
 299         sock_connid_t id;
 300 
 301         ASSERT(MUTEX_NOT_HELD(&so->so_lock));
 302         SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr));
 303 
 304         /*
 305          * If there is a pending error, return error
 306          * This can happen if a non blocking operation caused an error.
 307          */
 308 
 309         if (so->so_error != 0) {
 310                 mutex_enter(&so->so_lock);
 311                 error = sogeterr(so, B_TRUE);
 312                 mutex_exit(&so->so_lock);
 313                 if (error != 0)
 314                         goto done;
 315         }
 316 
 317         if (so->so_filter_active == 0 ||
 318             (error = sof_filter_connect(so, (struct sockaddr *)name,
 319             &namelen, cr)) < 0) {
 320                 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle,
 321                     name, namelen, &id, cr);
 322 
 323                 if (error == EINPROGRESS)
 324                         error = so_wait_connected(so,
 325                             fflag & (FNONBLOCK|FNDELAY), id);
 326         }
 327 done:
 328         SO_UNBLOCK_FALLBACK(so);
 329         return (error);
 330 }
 331 
 332 /*ARGSUSED*/
 333 int
 334 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop)
 335 {
 336         int error = 0;
 337         struct sonode *nso;
 338 
 339         *nsop = NULL;
 340 
 341         SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop));
 342         if ((so->so_state & SS_ACCEPTCONN) == 0) {
 343                 SO_UNBLOCK_FALLBACK(so);
 344                 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ?
 345                     EOPNOTSUPP : EINVAL);
 346         }
 347 
 348         if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)),
 349             &nso)) == 0) {
 350                 ASSERT(nso != NULL);
 351 
 352                 /* finish the accept */
 353                 if ((so->so_filter_active > 0 &&
 354                     (error = sof_filter_accept(nso, cr)) > 0) ||
 355                     (error = (*so->so_downcalls->sd_accept)(so->so_proto_handle,
 356                     nso->so_proto_handle, (sock_upper_handle_t)nso, cr)) != 0) {
 357                         (void) socket_close(nso, 0, cr);
 358                         socket_destroy(nso);
 359                 } else {
 360                         *nsop = nso;
 361                         sonode_insert_pid(nso, curproc);
 362                 }
 363         }
 364 
 365         SO_UNBLOCK_FALLBACK(so);
 366         return (error);
 367 }
 368 
 369 int
 370 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
 371     struct cred *cr)
 372 {
 373         int error, flags;
 374         boolean_t dontblock;
 375         ssize_t orig_resid;
 376         mblk_t  *mp;
 377 
 378         SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr));
 379 
 380         flags = msg->msg_flags;
 381         error = 0;
 382         dontblock = (flags & MSG_DONTWAIT) ||
 383             (uiop->uio_fmode & (FNONBLOCK|FNDELAY));
 384 
 385         if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) {
 386                 /*
 387                  * Old way of passing fd's is not supported
 388                  */
 389                 SO_UNBLOCK_FALLBACK(so);
 390                 return (EOPNOTSUPP);
 391         }
 392 
 393         if ((so->so_mode & SM_ATOMIC) &&
 394             uiop->uio_resid > so->so_proto_props.sopp_maxpsz &&
 395             so->so_proto_props.sopp_maxpsz != -1) {
 396                 SO_UNBLOCK_FALLBACK(so);
 397                 return (EMSGSIZE);
 398         }
 399 
 400         /*
 401          * For atomic sends we will only do one iteration.
 402          */
 403         do {
 404                 if (so->so_state & SS_CANTSENDMORE) {
 405                         error = EPIPE;
 406                         break;
 407                 }
 408 
 409                 if (so->so_error != 0) {
 410                         mutex_enter(&so->so_lock);
 411                         error = sogeterr(so, B_TRUE);
 412                         mutex_exit(&so->so_lock);
 413                         if (error != 0)
 414                                 break;
 415                 }
 416 
 417                 /*
 418                  * Send down OOB messages even if the send path is being
 419                  * flow controlled (assuming the protocol supports OOB data).
 420                  */
 421                 if (flags & MSG_OOB) {
 422                         if ((so->so_mode & SM_EXDATA) == 0) {
 423                                 error = EOPNOTSUPP;
 424                                 break;
 425                         }
 426                 } else if (SO_SND_FLOWCTRLD(so)) {
 427                         /*
 428                          * Need to wait until the protocol is ready to receive
 429                          * more data for transmission.
 430                          */
 431                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 432                                 break;
 433                 }
 434 
 435                 /*
 436                  * Time to send data to the protocol. We either copy the
 437                  * data into mblks or pass the uio directly to the protocol.
 438                  * We decide what to do based on the available down calls.
 439                  */
 440                 if (so->so_downcalls->sd_send_uio != NULL) {
 441                         error = (*so->so_downcalls->sd_send_uio)
 442                             (so->so_proto_handle, uiop, msg, cr);
 443                         if (error != 0)
 444                                 break;
 445                 } else {
 446                         /* save the resid in case of failure */
 447                         orig_resid = uiop->uio_resid;
 448 
 449                         if ((mp = socopyinuio(uiop,
 450                             so->so_proto_props.sopp_maxpsz,
 451                             so->so_proto_props.sopp_wroff,
 452                             so->so_proto_props.sopp_maxblk,
 453                             so->so_proto_props.sopp_tail, &error)) == NULL) {
 454                                 break;
 455                         }
 456                         ASSERT(uiop->uio_resid >= 0);
 457 
 458                         if (so->so_filter_active > 0 &&
 459                             ((mp = SOF_FILTER_DATA_OUT(so, mp, msg, cr,
 460                             &error)) == NULL)) {
 461                                 if (error != 0)
 462                                         break;
 463                                 continue;
 464                         }
 465                         error = (*so->so_downcalls->sd_send)
 466                             (so->so_proto_handle, mp, msg, cr);
 467                         if (error != 0) {
 468                                 /*
 469                                  * The send failed. We do not have to free the
 470                                  * mblks, because that is the protocol's
 471                                  * responsibility. However, uio_resid must
 472                                  * remain accurate, so adjust that here.
 473                                  */
 474                                 uiop->uio_resid = orig_resid;
 475                                         break;
 476                         }
 477                 }
 478         } while (uiop->uio_resid > 0);
 479 
 480         SO_UNBLOCK_FALLBACK(so);
 481 
 482         return (error);
 483 }
 484 
 485 int
 486 so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
 487     struct cred *cr, mblk_t **mpp, sof_instance_t *fil,
 488     boolean_t fil_inject)
 489 {
 490         int error;
 491         boolean_t dontblock;
 492         size_t size;
 493         mblk_t *mp = *mpp;
 494 
 495         if (so->so_downcalls->sd_send == NULL)
 496                 return (EOPNOTSUPP);
 497 
 498         error = 0;
 499         dontblock = (msg->msg_flags & MSG_DONTWAIT) ||
 500             (fflag & (FNONBLOCK|FNDELAY));
 501         size = msgdsize(mp);
 502 
 503         if ((so->so_mode & SM_ATOMIC) &&
 504             size > so->so_proto_props.sopp_maxpsz &&
 505             so->so_proto_props.sopp_maxpsz != -1) {
 506                 SO_UNBLOCK_FALLBACK(so);
 507                 return (EMSGSIZE);
 508         }
 509 
 510         while (mp != NULL) {
 511                 mblk_t *nmp, *last_mblk;
 512                 size_t mlen;
 513 
 514                 if (so->so_state & SS_CANTSENDMORE) {
 515                         error = EPIPE;
 516                         break;
 517                 }
 518                 if (so->so_error != 0) {
 519                         mutex_enter(&so->so_lock);
 520                         error = sogeterr(so, B_TRUE);
 521                         mutex_exit(&so->so_lock);
 522                         if (error != 0)
 523                                 break;
 524                 }
 525                 /* Socket filters are not flow controlled */
 526                 if (SO_SND_FLOWCTRLD(so) && !fil_inject) {
 527                         /*
 528                          * Need to wait until the protocol is ready to receive
 529                          * more data for transmission.
 530                          */
 531                         if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0)
 532                                 break;
 533                 }
 534 
 535                 /*
 536                  * We only allow so_maxpsz of data to be sent down to
 537                  * the protocol at time.
 538                  */
 539                 mlen = MBLKL(mp);
 540                 nmp = mp->b_cont;
 541                 last_mblk = mp;
 542                 while (nmp != NULL) {
 543                         mlen += MBLKL(nmp);
 544                         if (mlen > so->so_proto_props.sopp_maxpsz) {
 545                                 last_mblk->b_cont = NULL;
 546                                 break;
 547                         }
 548                         last_mblk = nmp;
 549                         nmp = nmp->b_cont;
 550                 }
 551 
 552                 if (so->so_filter_active > 0 &&
 553                     (mp = SOF_FILTER_DATA_OUT_FROM(so, fil, mp, msg,
 554                     cr, &error)) == NULL) {
 555                         *mpp = mp = nmp;
 556                         if (error != 0)
 557                                 break;
 558                         continue;
 559                 }
 560                 error = (*so->so_downcalls->sd_send)
 561                     (so->so_proto_handle, mp, msg, cr);
 562                 if (error != 0) {
 563                         /*
 564                          * The send failed. The protocol will free the mblks
 565                          * that were sent down. Let the caller deal with the
 566                          * rest.
 567                          */
 568                         *mpp = nmp;
 569                         break;
 570                 }
 571 
 572                 *mpp = mp = nmp;
 573         }
 574         /* Let the filter know whether the protocol is flow controlled */
 575         if (fil_inject && error == 0 && SO_SND_FLOWCTRLD(so))
 576                 error = ENOSPC;
 577 
 578         return (error);
 579 }
 580 
 581 #pragma inline(so_sendmblk_impl)
 582 
 583 int
 584 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
 585     struct cred *cr, mblk_t **mpp)
 586 {
 587         int error;
 588 
 589         SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
 590 
 591         if ((so->so_mode & SM_SENDFILESUPP) == 0) {
 592                 SO_UNBLOCK_FALLBACK(so);
 593                 return (EOPNOTSUPP);
 594         }
 595 
 596         error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
 597             B_FALSE);
 598 
 599         SO_UNBLOCK_FALLBACK(so);
 600 
 601         return (error);
 602 }
 603 
 604 int
 605 so_shutdown(struct sonode *so, int how, struct cred *cr)
 606 {
 607         int error;
 608 
 609         SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr));
 610 
 611         /*
 612          * SunOS 4.X has no check for datagram sockets.
 613          * 5.X checks that it is connected (ENOTCONN)
 614          * X/Open requires that we check the connected state.
 615          */
 616         if (!(so->so_state & SS_ISCONNECTED)) {
 617                 if (!xnet_skip_checks) {
 618                         error = ENOTCONN;
 619                         if (xnet_check_print) {
 620                                 printf("sockfs: X/Open shutdown check "
 621                                     "caused ENOTCONN\n");
 622                         }
 623                 }
 624                 goto done;
 625         }
 626 
 627         if (so->so_filter_active == 0 ||
 628             (error = sof_filter_shutdown(so, &how, cr)) < 0)
 629                 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle,
 630                     how, cr));
 631 
 632         /*
 633          * Protocol agreed to shutdown. We need to flush the
 634          * receive buffer if the receive side is being shutdown.
 635          */
 636         if (error == 0 && how != SHUT_WR) {
 637                 mutex_enter(&so->so_lock);
 638                 /* wait for active reader to finish */
 639                 (void) so_lock_read(so, 0);
 640 
 641                 so_rcv_flush(so);
 642 
 643                 so_unlock_read(so);
 644                 mutex_exit(&so->so_lock);
 645         }
 646 
 647 done:
 648         SO_UNBLOCK_FALLBACK(so);
 649         return (error);
 650 }
 651 
 652 int
 653 so_getsockname(struct sonode *so, struct sockaddr *addr,
 654     socklen_t *addrlen, struct cred *cr)
 655 {
 656         int error;
 657 
 658         SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
 659 
 660         if (so->so_filter_active == 0 ||
 661             (error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
 662                 error = (*so->so_downcalls->sd_getsockname)
 663                     (so->so_proto_handle, addr, addrlen, cr);
 664 
 665         SO_UNBLOCK_FALLBACK(so);
 666         return (error);
 667 }
 668 
 669 int
 670 so_getpeername(struct sonode *so, struct sockaddr *addr,
 671     socklen_t *addrlen, boolean_t accept, struct cred *cr)
 672 {
 673         int error;
 674 
 675         SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr));
 676 
 677         if (accept) {
 678                 error = (*so->so_downcalls->sd_getpeername)
 679                     (so->so_proto_handle, addr, addrlen, cr);
 680         } else if (!(so->so_state & SS_ISCONNECTED)) {
 681                 error = ENOTCONN;
 682         } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) {
 683                 /* Added this check for X/Open */
 684                 error = EINVAL;
 685                 if (xnet_check_print) {
 686                         printf("sockfs: X/Open getpeername check => EINVAL\n");
 687                 }
 688         } else if (so->so_filter_active == 0 ||
 689             (error = sof_filter_getpeername(so, addr, addrlen, cr)) < 0) {
 690                 error = (*so->so_downcalls->sd_getpeername)
 691                     (so->so_proto_handle, addr, addrlen, cr);
 692         }
 693 
 694         SO_UNBLOCK_FALLBACK(so);
 695         return (error);
 696 }
 697 
 698 int
 699 so_getsockopt(struct sonode *so, int level, int option_name,
 700     void *optval, socklen_t *optlenp, int flags, struct cred *cr)
 701 {
 702         int error = 0;
 703 
 704         if (level == SOL_FILTER)
 705                 return (sof_getsockopt(so, option_name, optval, optlenp, cr));
 706 
 707         SO_BLOCK_FALLBACK(so,
 708             SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
 709 
 710         if ((so->so_filter_active == 0 ||
 711             (error = sof_filter_getsockopt(so, level, option_name, optval,
 712             optlenp, cr)) < 0) &&
 713             (error = socket_getopt_common(so, level, option_name, optval,
 714             optlenp, flags)) < 0) {
 715                 error = (*so->so_downcalls->sd_getsockopt)
 716                     (so->so_proto_handle, level, option_name, optval, optlenp,
 717                     cr);
 718                 if (error ==  ENOPROTOOPT) {
 719                         if (level == SOL_SOCKET) {
 720                                 /*
 721                                  * If a protocol does not support a particular
 722                                  * socket option, set can fail (not allowed)
 723                                  * but get can not fail. This is the previous
 724                                  * sockfs bahvior.
 725                                  */
 726                                 switch (option_name) {
 727                                 case SO_LINGER:
 728                                         if (*optlenp < (t_uscalar_t)
 729                                             sizeof (struct linger)) {
 730                                                 error = EINVAL;
 731                                                 break;
 732                                         }
 733                                         error = 0;
 734                                         bzero(optval, sizeof (struct linger));
 735                                         *optlenp = sizeof (struct linger);
 736                                         break;
 737                                 case SO_RCVTIMEO:
 738                                 case SO_SNDTIMEO:
 739                                         if (*optlenp < (t_uscalar_t)
 740                                             sizeof (struct timeval)) {
 741                                                 error = EINVAL;
 742                                                 break;
 743                                         }
 744                                         error = 0;
 745                                         bzero(optval, sizeof (struct timeval));
 746                                         *optlenp = sizeof (struct timeval);
 747                                         break;
 748                                 case SO_SND_BUFINFO:
 749                                         if (*optlenp < (t_uscalar_t)
 750                                             sizeof (struct so_snd_bufinfo)) {
 751                                                 error = EINVAL;
 752                                                 break;
 753                                         }
 754                                         error = 0;
 755                                         bzero(optval,
 756                                             sizeof (struct so_snd_bufinfo));
 757                                         *optlenp =
 758                                             sizeof (struct so_snd_bufinfo);
 759                                         break;
 760                                 case SO_DEBUG:
 761                                 case SO_REUSEADDR:
 762                                 case SO_KEEPALIVE:
 763                                 case SO_DONTROUTE:
 764                                 case SO_BROADCAST:
 765                                 case SO_USELOOPBACK:
 766                                 case SO_OOBINLINE:
 767                                 case SO_DGRAM_ERRIND:
 768                                 case SO_SNDBUF:
 769                                 case SO_RCVBUF:
 770                                         error = 0;
 771                                         *((int32_t *)optval) = 0;
 772                                         *optlenp = sizeof (int32_t);
 773                                         break;
 774                                 default:
 775                                         break;
 776                                 }
 777                         }
 778                 }
 779         }
 780 
 781         SO_UNBLOCK_FALLBACK(so);
 782         return (error);
 783 }
 784 
 785 int
 786 so_setsockopt(struct sonode *so, int level, int option_name,
 787     const void *optval, socklen_t optlen, struct cred *cr)
 788 {
 789         int error = 0;
 790         struct timeval tl;
 791         const void *opt = optval;
 792 
 793         if (level == SOL_FILTER)
 794                 return (sof_setsockopt(so, option_name, optval, optlen, cr));
 795 
 796         SO_BLOCK_FALLBACK(so,
 797             SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
 798 
 799         /* X/Open requires this check */
 800         if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) {
 801                 SO_UNBLOCK_FALLBACK(so);
 802                 if (xnet_check_print)
 803                         printf("sockfs: X/Open setsockopt check => EINVAL\n");
 804                 return (EINVAL);
 805         }
 806 
 807         if (so->so_filter_active > 0 &&
 808             (error = sof_filter_setsockopt(so, level, option_name,
 809             (void *)optval, &optlen, cr)) >= 0)
 810                 goto done;
 811 
 812         if (level == SOL_SOCKET) {
 813                 switch (option_name) {
 814                 case SO_RCVTIMEO:
 815                 case SO_SNDTIMEO: {
 816                         /*
 817                          * We pass down these two options to protocol in order
 818                          * to support some third part protocols which need to
 819                          * know them. For those protocols which don't care
 820                          * these two options, simply return 0.
 821                          */
 822                         clock_t t_usec;
 823 
 824                         if (get_udatamodel() == DATAMODEL_NONE ||
 825                             get_udatamodel() == DATAMODEL_NATIVE) {
 826                                 if (optlen != sizeof (struct timeval)) {
 827                                         error = EINVAL;
 828                                         goto done;
 829                                 }
 830                                 bcopy((struct timeval *)optval, &tl,
 831                                     sizeof (struct timeval));
 832                         } else {
 833                                 if (optlen != sizeof (struct timeval32)) {
 834                                         error = EINVAL;
 835                                         goto done;
 836                                 }
 837                                 TIMEVAL32_TO_TIMEVAL(&tl,
 838                                     (struct timeval32 *)optval);
 839                         }
 840                         opt = &tl;
 841                         optlen = sizeof (tl);
 842                         t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec;
 843                         mutex_enter(&so->so_lock);
 844                         if (option_name == SO_RCVTIMEO)
 845                                 so->so_rcvtimeo = drv_usectohz(t_usec);
 846                         else
 847                                 so->so_sndtimeo = drv_usectohz(t_usec);
 848                         mutex_exit(&so->so_lock);
 849                         break;
 850                 }
 851                 case SO_RCVBUF:
 852                         /*
 853                          * XXX XPG 4.2 applications retrieve SO_RCVBUF from
 854                          * sockfs since the transport might adjust the value
 855                          * and not return exactly what was set by the
 856                          * application.
 857                          */
 858                         so->so_xpg_rcvbuf = *(int32_t *)optval;
 859                         break;
 860                 }
 861         }
 862         error = (*so->so_downcalls->sd_setsockopt)
 863             (so->so_proto_handle, level, option_name, opt, optlen, cr);
 864 done:
 865         SO_UNBLOCK_FALLBACK(so);
 866         return (error);
 867 }
 868 
 869 int
 870 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
 871     struct cred *cr, int32_t *rvalp)
 872 {
 873         int error = 0;
 874 
 875         SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp));
 876 
 877         /*
 878          * If there is a pending error, return error
 879          * This can happen if a non blocking operation caused an error.
 880          */
 881         if (so->so_error != 0) {
 882                 mutex_enter(&so->so_lock);
 883                 error = sogeterr(so, B_TRUE);
 884                 mutex_exit(&so->so_lock);
 885                 if (error != 0)
 886                         goto done;
 887         }
 888 
 889         /*
 890          * calling strioc can result in the socket falling back to TPI,
 891          * if that is supported.
 892          */
 893         if ((so->so_filter_active == 0 ||
 894             (error = sof_filter_ioctl(so, cmd, arg, mode,
 895             rvalp, cr)) < 0) &&
 896             (error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 &&
 897             (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) {
 898                 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle,
 899                     cmd, arg, mode, rvalp, cr);
 900         }
 901 
 902 done:
 903         SO_UNBLOCK_FALLBACK(so);
 904 
 905         return (error);
 906 }
 907 
 908 int
 909 so_poll(struct sonode *so, short events, int anyyet, short *reventsp,
 910     struct pollhead **phpp)
 911 {
 912         int state = so->so_state, mask;
 913         *reventsp = 0;
 914 
 915         /*
 916          * In sockets the errors are represented as input/output events
 917          */
 918         if (so->so_error != 0 &&
 919             ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) {
 920                 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events;
 921                 return (0);
 922         }
 923 
 924         /*
 925          * If the socket is in a state where it can send data
 926          * turn on POLLWRBAND and POLLOUT events.
 927          */
 928         if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) {
 929                 /*
 930                  * out of band data is allowed even if the connection
 931                  * is flow controlled
 932                  */
 933                 *reventsp |= POLLWRBAND & events;
 934                 if (!SO_SND_FLOWCTRLD(so)) {
 935                         /*
 936                          * As long as there is buffer to send data
 937                          * turn on POLLOUT events
 938                          */
 939                         *reventsp |= POLLOUT & events;
 940                 }
 941         }
 942 
 943         /*
 944          * Turn on POLLIN whenever there is data on the receive queue,
 945          * or the socket is in a state where no more data will be received.
 946          * Also, if the socket is accepting connections, flip the bit if
 947          * there is something on the queue.
 948          *
 949          * We do an initial check for events without holding locks. However,
 950          * if there are no event available, then we redo the check for POLLIN
 951          * events under the lock.
 952          */
 953 
 954         /* Pending connections */
 955         if (!list_is_empty(&so->so_acceptq_list))
 956                 *reventsp |= (POLLIN|POLLRDNORM) & events;
 957 
 958         /* Data */
 959         /* so_downcalls is null for sctp */
 960         if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) {
 961                 *reventsp |= (*so->so_downcalls->sd_poll)
 962                     (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet,
 963                     CRED()) & events;
 964                 ASSERT((*reventsp & ~events) == 0);
 965                 /* do not recheck events */
 966                 events &= ~SO_PROTO_POLLEV;
 967         } else {
 968                 if (SO_HAVE_DATA(so))
 969                         *reventsp |= (POLLIN|POLLRDNORM) & events;
 970 
 971                 /* Urgent data */
 972                 if ((state & SS_OOBPEND) != 0) {
 973                         *reventsp |= (POLLRDBAND | POLLPRI) & events;
 974                 }
 975 
 976                 /*
 977                  * If the socket has become disconnected, we set POLLHUP.
 978                  * Note that if we are in this state, we will have set POLLIN
 979                  * (SO_HAVE_DATA() is true on a disconnected socket), but not
 980                  * POLLOUT (SS_ISCONNECTED is false).  This is in keeping with
 981                  * the semantics of POLLHUP, which is defined to be mutually
 982                  * exclusive with respect to POLLOUT but not POLLIN.  We are
 983                  * therefore setting POLLHUP primarily for the benefit of
 984                  * those not polling on POLLIN, as they have no other way of
 985                  * knowing that the socket has been disconnected.
 986                  */
 987                 mask = SS_SENTLASTREADSIG | SS_SENTLASTWRITESIG;
 988 
 989                 if ((state & (mask | SS_ISCONNECTED)) == mask)
 990                         *reventsp |= POLLHUP;
 991         }
 992 
 993         if (!*reventsp && !anyyet) {
 994                 /* Check for read events again, but this time under lock */
 995                 if (events & (POLLIN|POLLRDNORM)) {
 996                         mutex_enter(&so->so_lock);
 997                         if (SO_HAVE_DATA(so) ||
 998                             !list_is_empty(&so->so_acceptq_list)) {
 999                                 mutex_exit(&so->so_lock);
1000                                 *reventsp |= (POLLIN|POLLRDNORM) & events;
1001                                 return (0);
1002                         } else {
1003                                 so->so_pollev |= SO_POLLEV_IN;
1004                                 mutex_exit(&so->so_lock);
1005                         }
1006                 }
1007                 *phpp = &so->so_poll_list;
1008         }
1009         return (0);
1010 }
1011 
1012 /*
1013  * Generic Upcalls
1014  */
1015 void
1016 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id,
1017     cred_t *peer_cred, pid_t peer_cpid)
1018 {
1019         struct sonode *so = (struct sonode *)sock_handle;
1020 
1021         mutex_enter(&so->so_lock);
1022         ASSERT(so->so_proto_handle != NULL);
1023 
1024         if (peer_cred != NULL) {
1025                 if (so->so_peercred != NULL)
1026                         crfree(so->so_peercred);
1027                 crhold(peer_cred);
1028                 so->so_peercred = peer_cred;
1029                 so->so_cpid = peer_cpid;
1030         }
1031 
1032         so->so_proto_connid = id;
1033         soisconnected(so);
1034         /*
1035          * Wake ones who're waiting for conn to become established.
1036          */
1037         so_notify_connected(so);
1038 }
1039 
1040 int
1041 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error)
1042 {
1043         struct sonode *so = (struct sonode *)sock_handle;
1044         boolean_t connect_failed;
1045 
1046         mutex_enter(&so->so_lock);
1047 
1048         /*
1049          * If we aren't currently connected, then this isn't a disconnect but
1050          * rather a failure to connect.
1051          */
1052         connect_failed = !(so->so_state & SS_ISCONNECTED);
1053 
1054         so->so_proto_connid = id;
1055         soisdisconnected(so, error);
1056         so_notify_disconnected(so, connect_failed, error);
1057 
1058         return (0);
1059 }
1060 
1061 void
1062 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action,
1063     uintptr_t arg)
1064 {
1065         struct sonode *so = (struct sonode *)sock_handle;
1066 
1067         switch (action) {
1068         case SOCK_OPCTL_SHUT_SEND:
1069                 mutex_enter(&so->so_lock);
1070                 socantsendmore(so);
1071                 so_notify_disconnecting(so);
1072                 break;
1073         case SOCK_OPCTL_SHUT_RECV: {
1074                 mutex_enter(&so->so_lock);
1075                 socantrcvmore(so);
1076                 so_notify_eof(so);
1077                 break;
1078         }
1079         case SOCK_OPCTL_ENAB_ACCEPT:
1080                 mutex_enter(&so->so_lock);
1081                 so->so_state |= SS_ACCEPTCONN;
1082                 so->so_backlog = (unsigned int)arg;
1083                 /*
1084                  * The protocol can stop generating newconn upcalls when
1085                  * the backlog is full, so to make sure the listener does
1086                  * not end up with a queue full of deferred connections
1087                  * we reduce the backlog by one. Thus the listener will
1088                  * start closing deferred connections before the backlog
1089                  * is full.
1090                  */
1091                 if (so->so_filter_active > 0)
1092                         so->so_backlog = MAX(1, so->so_backlog - 1);
1093                 mutex_exit(&so->so_lock);
1094                 break;
1095         default:
1096                 ASSERT(0);
1097                 break;
1098         }
1099 }
1100 
1101 void
1102 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull)
1103 {
1104         struct sonode *so = (struct sonode *)sock_handle;
1105 
1106         if (qfull) {
1107                 so_snd_qfull(so);
1108         } else {
1109                 so_snd_qnotfull(so);
1110                 mutex_enter(&so->so_lock);
1111                 /* so_notify_writable drops so_lock */
1112                 so_notify_writable(so);
1113         }
1114 }
1115 
1116 sock_upper_handle_t
1117 so_newconn(sock_upper_handle_t parenthandle,
1118     sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls,
1119     struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp)
1120 {
1121         struct sonode   *so = (struct sonode *)parenthandle;
1122         struct sonode   *nso;
1123         int error;
1124 
1125         ASSERT(proto_handle != NULL);
1126 
1127         if ((so->so_state & SS_ACCEPTCONN) == 0 ||
1128             (so->so_acceptq_len >= so->so_backlog &&
1129             (so->so_filter_active == 0 || !sof_sonode_drop_deferred(so)))) {
1130                         return (NULL);
1131         }
1132 
1133         nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP,
1134             &error);
1135         if (nso == NULL)
1136                 return (NULL);
1137 
1138         if (peer_cred != NULL) {
1139                 crhold(peer_cred);
1140                 nso->so_peercred = peer_cred;
1141                 nso->so_cpid = peer_cpid;
1142         }
1143         nso->so_listener = so;
1144 
1145         /*
1146          * The new socket (nso), proto_handle and sock_upcallsp are all
1147          * valid at this point. But as soon as nso is placed in the accept
1148          * queue that can no longer be assumed (since an accept() thread may
1149          * pull it off the queue and close the socket).
1150          */
1151         *sock_upcallsp = &so_upcalls;
1152 
1153         mutex_enter(&so->so_acceptq_lock);
1154         if (so->so_state & (SS_CLOSING|SS_FALLBACK_PENDING|SS_FALLBACK_COMP)) {
1155                 mutex_exit(&so->so_acceptq_lock);
1156                 ASSERT(nso->so_count == 1);
1157                 nso->so_count--;
1158                 nso->so_listener = NULL;
1159                 /* drop proto ref */
1160                 VN_RELE(SOTOV(nso));
1161                 socket_destroy(nso);
1162                 return (NULL);
1163         } else {
1164                 so->so_acceptq_len++;
1165                 if (nso->so_state & SS_FIL_DEFER) {
1166                         list_insert_tail(&so->so_acceptq_defer, nso);
1167                         mutex_exit(&so->so_acceptq_lock);
1168                 } else {
1169                         list_insert_tail(&so->so_acceptq_list, nso);
1170                         cv_signal(&so->so_acceptq_cv);
1171                         mutex_exit(&so->so_acceptq_lock);
1172                         mutex_enter(&so->so_lock);
1173                         so_notify_newconn(so);
1174                 }
1175 
1176                 return ((sock_upper_handle_t)nso);
1177         }
1178 }
1179 
1180 void
1181 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp)
1182 {
1183         struct sonode *so;
1184 
1185         so = (struct sonode *)sock_handle;
1186 
1187         mutex_enter(&so->so_lock);
1188 
1189         if (soppp->sopp_flags & SOCKOPT_MAXBLK)
1190                 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk;
1191         if (soppp->sopp_flags & SOCKOPT_WROFF)
1192                 so->so_proto_props.sopp_wroff = soppp->sopp_wroff;
1193         if (soppp->sopp_flags & SOCKOPT_TAIL)
1194                 so->so_proto_props.sopp_tail = soppp->sopp_tail;
1195         if (soppp->sopp_flags & SOCKOPT_RCVHIWAT)
1196                 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat;
1197         if (soppp->sopp_flags & SOCKOPT_RCVLOWAT)
1198                 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat;
1199         if (soppp->sopp_flags & SOCKOPT_MAXPSZ)
1200                 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz;
1201         if (soppp->sopp_flags & SOCKOPT_MINPSZ)
1202                 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz;
1203         if (soppp->sopp_flags & SOCKOPT_ZCOPY) {
1204                 if (soppp->sopp_zcopyflag & ZCVMSAFE) {
1205                         so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE;
1206                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE;
1207                 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) {
1208                         so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE;
1209                         so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE;
1210                 }
1211 
1212                 if (soppp->sopp_zcopyflag & COPYCACHED) {
1213                         so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED;
1214                 }
1215         }
1216         if (soppp->sopp_flags & SOCKOPT_OOBINLINE)
1217                 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline;
1218         if (soppp->sopp_flags & SOCKOPT_RCVTIMER)
1219                 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer;
1220         if (soppp->sopp_flags & SOCKOPT_RCVTHRESH)
1221                 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh;
1222         if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN)
1223                 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen;
1224         if (soppp->sopp_flags & SOCKOPT_LOOPBACK)
1225                 so->so_proto_props.sopp_loopback = soppp->sopp_loopback;
1226 
1227         mutex_exit(&so->so_lock);
1228 
1229         if (so->so_filter_active > 0) {
1230                 sof_instance_t *inst;
1231                 ssize_t maxblk;
1232                 ushort_t wroff, tail;
1233                 maxblk = so->so_proto_props.sopp_maxblk;
1234                 wroff = so->so_proto_props.sopp_wroff;
1235                 tail = so->so_proto_props.sopp_tail;
1236                 for (inst = so->so_filter_bottom; inst != NULL;
1237                     inst = inst->sofi_prev) {
1238                         if (SOF_INTERESTED(inst, mblk_prop)) {
1239                                 (*inst->sofi_ops->sofop_mblk_prop)(
1240                                     (sof_handle_t)inst, inst->sofi_cookie,
1241                                     &maxblk, &wroff, &tail);
1242                         }
1243                 }
1244                 mutex_enter(&so->so_lock);
1245                 so->so_proto_props.sopp_maxblk = maxblk;
1246                 so->so_proto_props.sopp_wroff = wroff;
1247                 so->so_proto_props.sopp_tail = tail;
1248                 mutex_exit(&so->so_lock);
1249         }
1250 #ifdef DEBUG
1251         soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL |
1252             SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ |
1253             SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER |
1254             SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ |
1255             SOCKOPT_LOOPBACK);
1256         ASSERT(soppp->sopp_flags == 0);
1257 #endif
1258 }
1259 
1260 /* ARGSUSED */
1261 ssize_t
1262 so_queue_msg_impl(struct sonode *so, mblk_t *mp,
1263     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp,
1264     sof_instance_t *filter)
1265 {
1266         boolean_t force_push = B_TRUE;
1267         int space_left;
1268         sodirect_t *sodp = so->so_direct;
1269 
1270         ASSERT(errorp != NULL);
1271         *errorp = 0;
1272         if (mp == NULL) {
1273                 if (so->so_downcalls->sd_recv_uio != NULL) {
1274                         mutex_enter(&so->so_lock);
1275                         /* the notify functions will drop the lock */
1276                         if (flags & MSG_OOB)
1277                                 so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1278                         else
1279                                 so_notify_data(so, msg_size);
1280                         return (0);
1281                 }
1282                 ASSERT(msg_size == 0);
1283                 mutex_enter(&so->so_lock);
1284                 goto space_check;
1285         }
1286 
1287         ASSERT(mp->b_next == NULL);
1288         ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO);
1289         ASSERT(msg_size == msgdsize(mp));
1290 
1291         if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1292                 /* The read pointer is not aligned correctly for TPI */
1293                 zcmn_err(getzoneid(), CE_WARN,
1294                     "sockfs: Unaligned TPI message received. rptr = %p\n",
1295                     (void *)mp->b_rptr);
1296                 freemsg(mp);
1297                 mutex_enter(&so->so_lock);
1298                 if (sodp != NULL)
1299                         SOD_UIOAFINI(sodp);
1300                 goto space_check;
1301         }
1302 
1303         if (so->so_filter_active > 0) {
1304                 for (; filter != NULL; filter = filter->sofi_prev) {
1305                         if (!SOF_INTERESTED(filter, data_in))
1306                                 continue;
1307                         mp = (*filter->sofi_ops->sofop_data_in)(
1308                             (sof_handle_t)filter, filter->sofi_cookie, mp,
1309                             flags, &msg_size);
1310                         ASSERT(msgdsize(mp) == msg_size);
1311                         DTRACE_PROBE2(filter__data, (sof_instance_t), filter,
1312                             (mblk_t *), mp);
1313                         /* Data was consumed/dropped, just do space check */
1314                         if (msg_size == 0) {
1315                                 mutex_enter(&so->so_lock);
1316                                 goto space_check;
1317                         }
1318                 }
1319         }
1320 
1321         if (flags & MSG_OOB) {
1322                 so_queue_oob(so, mp, msg_size);
1323                 mutex_enter(&so->so_lock);
1324                 goto space_check;
1325         }
1326 
1327         if (force_pushp != NULL)
1328                 force_push = *force_pushp;
1329 
1330         mutex_enter(&so->so_lock);
1331         if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) {
1332                 if (sodp != NULL)
1333                         SOD_DISABLE(sodp);
1334                 mutex_exit(&so->so_lock);
1335                 *errorp = EOPNOTSUPP;
1336                 return (-1);
1337         }
1338         if (so->so_state & (SS_CANTRCVMORE | SS_CLOSING)) {
1339                 freemsg(mp);
1340                 if (sodp != NULL)
1341                         SOD_DISABLE(sodp);
1342                 mutex_exit(&so->so_lock);
1343                 return (0);
1344         }
1345 
1346         /* process the mblk via I/OAT if capable */
1347         if (sodp != NULL && sodp->sod_enabled) {
1348                 if (DB_TYPE(mp) == M_DATA) {
1349                         sod_uioa_mblk_init(sodp, mp, msg_size);
1350                 } else {
1351                         SOD_UIOAFINI(sodp);
1352                 }
1353         }
1354 
1355         if (mp->b_next == NULL) {
1356                 so_enqueue_msg(so, mp, msg_size);
1357         } else {
1358                 do {
1359                         mblk_t *nmp;
1360 
1361                         if ((nmp = mp->b_next) != NULL) {
1362                                 mp->b_next = NULL;
1363                         }
1364                         so_enqueue_msg(so, mp, msgdsize(mp));
1365                         mp = nmp;
1366                 } while (mp != NULL);
1367         }
1368 
1369         space_left = so->so_rcvbuf - so->so_rcv_queued;
1370         if (space_left <= 0) {
1371                 so->so_flowctrld = B_TRUE;
1372                 *errorp = ENOSPC;
1373                 space_left = -1;
1374         }
1375 
1376         if (force_push || so->so_rcv_queued >= so->so_rcv_thresh ||
1377             so->so_rcv_queued >= so->so_rcv_wanted) {
1378                 SOCKET_TIMER_CANCEL(so);
1379                 /*
1380                  * so_notify_data will release the lock
1381                  */
1382                 so_notify_data(so, so->so_rcv_queued);
1383 
1384                 if (force_pushp != NULL)
1385                         *force_pushp = B_TRUE;
1386                 goto done;
1387         } else if (so->so_rcv_timer_tid == 0) {
1388                 /* Make sure the recv push timer is running */
1389                 SOCKET_TIMER_START(so);
1390         }
1391 
1392 done_unlock:
1393         mutex_exit(&so->so_lock);
1394 done:
1395         return (space_left);
1396 
1397 space_check:
1398         space_left = so->so_rcvbuf - so->so_rcv_queued;
1399         if (space_left <= 0) {
1400                 so->so_flowctrld = B_TRUE;
1401                 *errorp = ENOSPC;
1402                 space_left = -1;
1403         }
1404         goto done_unlock;
1405 }
1406 
1407 #pragma inline(so_queue_msg_impl)
1408 
1409 ssize_t
1410 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp,
1411     size_t msg_size, int flags, int *errorp,  boolean_t *force_pushp)
1412 {
1413         struct sonode *so = (struct sonode *)sock_handle;
1414 
1415         return (so_queue_msg_impl(so, mp, msg_size, flags, errorp, force_pushp,
1416             so->so_filter_bottom));
1417 }
1418 
1419 /*
1420  * Set the offset of where the oob data is relative to the bytes in
1421  * queued. Also generate SIGURG
1422  */
1423 void
1424 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset)
1425 {
1426         struct sonode *so;
1427 
1428         ASSERT(offset >= 0);
1429         so = (struct sonode *)sock_handle;
1430         mutex_enter(&so->so_lock);
1431         if (so->so_direct != NULL)
1432                 SOD_UIOAFINI(so->so_direct);
1433 
1434         /*
1435          * New urgent data on the way so forget about any old
1436          * urgent data.
1437          */
1438         so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1439 
1440         /*
1441          * Record that urgent data is pending.
1442          */
1443         so->so_state |= SS_OOBPEND;
1444 
1445         if (so->so_oobmsg != NULL) {
1446                 dprintso(so, 1, ("sock: discarding old oob\n"));
1447                 freemsg(so->so_oobmsg);
1448                 so->so_oobmsg = NULL;
1449         }
1450 
1451         /*
1452          * set the offset where the urgent byte is
1453          */
1454         so->so_oobmark = so->so_rcv_queued + offset;
1455         if (so->so_oobmark == 0)
1456                 so->so_state |= SS_RCVATMARK;
1457         else
1458                 so->so_state &= ~SS_RCVATMARK;
1459 
1460         so_notify_oobsig(so);
1461 }
1462 
1463 /*
1464  * Queue the OOB byte
1465  */
1466 static void
1467 so_queue_oob(struct sonode *so, mblk_t *mp, size_t len)
1468 {
1469         mutex_enter(&so->so_lock);
1470         if (so->so_direct != NULL)
1471                 SOD_UIOAFINI(so->so_direct);
1472 
1473         ASSERT(mp != NULL);
1474         if (!IS_SO_OOB_INLINE(so)) {
1475                 so->so_oobmsg = mp;
1476                 so->so_state |= SS_HAVEOOBDATA;
1477         } else {
1478                 so_enqueue_msg(so, mp, len);
1479         }
1480 
1481         so_notify_oobdata(so, IS_SO_OOB_INLINE(so));
1482 }
1483 
1484 int
1485 so_close(struct sonode *so, int flag, struct cred *cr)
1486 {
1487         int error;
1488 
1489         /*
1490          * No new data will be enqueued once the CLOSING flag is set.
1491          */
1492         mutex_enter(&so->so_lock);
1493         so->so_state |= SS_CLOSING;
1494         ASSERT(so_verify_oobstate(so));
1495         so_rcv_flush(so);
1496         mutex_exit(&so->so_lock);
1497 
1498         if (so->so_filter_active > 0)
1499                 sof_sonode_closing(so);
1500 
1501         if (so->so_state & SS_ACCEPTCONN) {
1502                 /*
1503                  * We grab and release the accept lock to ensure that any
1504                  * thread about to insert a socket in so_newconn completes
1505                  * before we flush the queue. Any thread calling so_newconn
1506                  * after we drop the lock will observe the SS_CLOSING flag,
1507                  * which will stop it from inserting the socket in the queue.
1508                  */
1509                 mutex_enter(&so->so_acceptq_lock);
1510                 mutex_exit(&so->so_acceptq_lock);
1511 
1512                 so_acceptq_flush(so, B_TRUE);
1513         }
1514 
1515         error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr);
1516         switch (error) {
1517         default:
1518                 /* Protocol made a synchronous close; remove proto ref */
1519                 VN_RELE(SOTOV(so));
1520                 break;
1521         case EINPROGRESS:
1522                 /*
1523                  * Protocol is in the process of closing, it will make a
1524                  * 'closed' upcall to remove the reference.
1525                  */
1526                 error = 0;
1527                 break;
1528         }
1529 
1530         return (error);
1531 }
1532 
1533 /*
1534  * Upcall made by the protocol when it's doing an asynchronous close. It
1535  * will drop the protocol's reference on the socket.
1536  */
1537 void
1538 so_closed(sock_upper_handle_t sock_handle)
1539 {
1540         struct sonode *so = (struct sonode *)sock_handle;
1541 
1542         VN_RELE(SOTOV(so));
1543 }
1544 
1545 conn_pid_node_list_hdr_t *
1546 so_get_sock_pid_list(sock_upper_handle_t sock_handle)
1547 {
1548         int sz, n = 0;
1549         pid_node_t                      *pn;
1550         conn_pid_node_t                 *cpn;
1551         conn_pid_node_list_hdr_t        *cph;
1552         struct sonode                   *so = (struct sonode *)sock_handle;
1553 
1554         mutex_enter(&so->so_pid_list_lock);
1555 
1556         n = list_size(&so->so_pid_list);
1557         sz = sizeof (conn_pid_node_list_hdr_t);
1558         sz += (n > 1)?((n - 1) * sizeof (conn_pid_node_t)):0;
1559         cph = kmem_zalloc(sz, KM_SLEEP);
1560 
1561         cph->cph_magic = CONN_PID_NODE_LIST_HDR_MAGIC;
1562         cph->cph_contents = CONN_PID_NODE_LIST_HDR_SOC;
1563         cph->cph_pn_cnt = n;
1564         cph->cph_tot_size = sz;
1565         cph->cph_flags = 0;
1566         cph->cph_optional1 = 0;
1567         cph->cph_optional2 = 0;
1568 
1569         if (cph->cph_pn_cnt > 0) {
1570                 cpn = cph->cph_cpns;
1571                 pn = list_head(&so->so_pid_list);
1572                 while (pn != NULL) {
1573                         PIDNODE2CONNPIDNODE(pn, cpn);
1574                         pn = list_next(&so->so_pid_list, pn);
1575                         cpn++;
1576                 }
1577         }
1578 
1579         mutex_exit(&so->so_pid_list_lock);
1580 
1581         return (cph);
1582 }
1583 
1584 void
1585 so_zcopy_notify(sock_upper_handle_t sock_handle)
1586 {
1587         struct sonode *so = (struct sonode *)sock_handle;
1588 
1589         mutex_enter(&so->so_lock);
1590         so->so_copyflag |= STZCNOTIFY;
1591         cv_broadcast(&so->so_copy_cv);
1592         mutex_exit(&so->so_lock);
1593 }
1594 
1595 void
1596 so_set_error(sock_upper_handle_t sock_handle, int error)
1597 {
1598         struct sonode *so = (struct sonode *)sock_handle;
1599 
1600         mutex_enter(&so->so_lock);
1601 
1602         soseterror(so, error);
1603 
1604         so_notify_error(so);
1605 }
1606 
1607 /*
1608  * so_recvmsg - read data from the socket
1609  *
1610  * There are two ways of obtaining data; either we ask the protocol to
1611  * copy directly into the supplied buffer, or we copy data from the
1612  * sonode's receive queue. The decision which one to use depends on
1613  * whether the protocol has a sd_recv_uio down call.
1614  */
1615 int
1616 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
1617     struct cred *cr)
1618 {
1619         rval_t          rval;
1620         int             flags = 0;
1621         t_uscalar_t     controllen, namelen;
1622         int             error = 0;
1623         int ret;
1624         mblk_t          *mctlp = NULL;
1625         union T_primitives *tpr;
1626         void            *control;
1627         ssize_t         saved_resid;
1628         struct uio      *suiop;
1629 
1630         SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr));
1631 
1632         if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 &&
1633             (so->so_mode & SM_CONNREQUIRED)) {
1634                 SO_UNBLOCK_FALLBACK(so);
1635                 return (ENOTCONN);
1636         }
1637 
1638         if (msg->msg_flags & MSG_PEEK)
1639                 msg->msg_flags &= ~MSG_WAITALL;
1640 
1641         if (so->so_mode & SM_ATOMIC)
1642                 msg->msg_flags |= MSG_TRUNC;
1643 
1644         if (msg->msg_flags & MSG_OOB) {
1645                 if ((so->so_mode & SM_EXDATA) == 0) {
1646                         error = EOPNOTSUPP;
1647                 } else if (so->so_downcalls->sd_recv_uio != NULL) {
1648                         error = (*so->so_downcalls->sd_recv_uio)
1649                             (so->so_proto_handle, uiop, msg, cr);
1650                 } else {
1651                         error = sorecvoob(so, msg, uiop, msg->msg_flags,
1652                             IS_SO_OOB_INLINE(so));
1653                 }
1654                 SO_UNBLOCK_FALLBACK(so);
1655                 return (error);
1656         }
1657 
1658         /*
1659          * If the protocol has the recv down call, then pass the request
1660          * down.
1661          */
1662         if (so->so_downcalls->sd_recv_uio != NULL) {
1663                 error = (*so->so_downcalls->sd_recv_uio)
1664                     (so->so_proto_handle, uiop, msg, cr);
1665                 SO_UNBLOCK_FALLBACK(so);
1666                 return (error);
1667         }
1668 
1669         /*
1670          * Reading data from the socket buffer
1671          */
1672         flags = msg->msg_flags;
1673         msg->msg_flags = 0;
1674 
1675         /*
1676          * Set msg_controllen and msg_namelen to zero here to make it
1677          * simpler in the cases that no control or name is returned.
1678          */
1679         controllen = msg->msg_controllen;
1680         namelen = msg->msg_namelen;
1681         msg->msg_controllen = 0;
1682         msg->msg_namelen = 0;
1683 
1684         mutex_enter(&so->so_lock);
1685         /* Set SOREADLOCKED */
1686         error = so_lock_read_intr(so,
1687             uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0));
1688         mutex_exit(&so->so_lock);
1689         if (error) {
1690                 SO_UNBLOCK_FALLBACK(so);
1691                 return (error);
1692         }
1693 
1694         suiop = sod_rcv_init(so, flags, &uiop);
1695 retry:
1696         saved_resid = uiop->uio_resid;
1697         error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags);
1698         if (error != 0) {
1699                 goto out;
1700         }
1701         /*
1702          * For datagrams the MOREDATA flag is used to set MSG_TRUNC.
1703          * For non-datagrams MOREDATA is used to set MSG_EOR.
1704          */
1705         ASSERT(!(rval.r_val1 & MORECTL));
1706         if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC))
1707                 msg->msg_flags |= MSG_TRUNC;
1708         if (mctlp == NULL) {
1709                 dprintso(so, 1, ("so_recvmsg: got M_DATA\n"));
1710 
1711                 mutex_enter(&so->so_lock);
1712                 /* Set MSG_EOR based on MOREDATA */
1713                 if (!(rval.r_val1 & MOREDATA)) {
1714                         if (so->so_state & SS_SAVEDEOR) {
1715                                 msg->msg_flags |= MSG_EOR;
1716                                 so->so_state &= ~SS_SAVEDEOR;
1717                         }
1718                 }
1719                 /*
1720                  * If some data was received (i.e. not EOF) and the
1721                  * read/recv* has not been satisfied wait for some more.
1722                  */
1723                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1724                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1725                         mutex_exit(&so->so_lock);
1726                         flags |= MSG_NOMARK;
1727                         goto retry;
1728                 }
1729 
1730                 goto out_locked;
1731         }
1732         /* so_queue_msg has already verified length and alignment */
1733         tpr = (union T_primitives *)mctlp->b_rptr;
1734         dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type));
1735         switch (tpr->type) {
1736         case T_DATA_IND: {
1737                 /*
1738                  * Set msg_flags to MSG_EOR based on
1739                  * MORE_flag and MOREDATA.
1740                  */
1741                 mutex_enter(&so->so_lock);
1742                 so->so_state &= ~SS_SAVEDEOR;
1743                 if (!(tpr->data_ind.MORE_flag & 1)) {
1744                         if (!(rval.r_val1 & MOREDATA))
1745                                 msg->msg_flags |= MSG_EOR;
1746                         else
1747                                 so->so_state |= SS_SAVEDEOR;
1748                 }
1749                 freemsg(mctlp);
1750                 /*
1751                  * If some data was received (i.e. not EOF) and the
1752                  * read/recv* has not been satisfied wait for some more.
1753                  */
1754                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1755                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1756                         mutex_exit(&so->so_lock);
1757                         flags |= MSG_NOMARK;
1758                         goto retry;
1759                 }
1760                 goto out_locked;
1761         }
1762         case T_UNITDATA_IND: {
1763                 void *addr;
1764                 t_uscalar_t addrlen;
1765                 void *abuf;
1766                 t_uscalar_t optlen;
1767                 void *opt;
1768 
1769                 if (namelen != 0) {
1770                         /* Caller wants source address */
1771                         addrlen = tpr->unitdata_ind.SRC_length;
1772                         addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset,
1773                             addrlen, 1);
1774                         if (addr == NULL) {
1775                                 freemsg(mctlp);
1776                                 error = EPROTO;
1777                                 eprintsoline(so, error);
1778                                 goto out;
1779                         }
1780                         ASSERT(so->so_family != AF_UNIX);
1781                 }
1782                 optlen = tpr->unitdata_ind.OPT_length;
1783                 if (optlen != 0) {
1784                         t_uscalar_t ncontrollen;
1785 
1786                         /*
1787                          * Extract any source address option.
1788                          * Determine how large cmsg buffer is needed.
1789                          */
1790                         opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset,
1791                             optlen, __TPI_ALIGN_SIZE);
1792 
1793                         if (opt == NULL) {
1794                                 freemsg(mctlp);
1795                                 error = EPROTO;
1796                                 eprintsoline(so, error);
1797                                 goto out;
1798                         }
1799                         if (so->so_family == AF_UNIX)
1800                                 so_getopt_srcaddr(opt, optlen, &addr, &addrlen);
1801                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1802                             !(flags & MSG_XPG4_2));
1803                         if (controllen != 0)
1804                                 controllen = ncontrollen;
1805                         else if (ncontrollen != 0)
1806                                 msg->msg_flags |= MSG_CTRUNC;
1807                 } else {
1808                         controllen = 0;
1809                 }
1810 
1811                 if (namelen != 0) {
1812                         /*
1813                          * Return address to caller.
1814                          * Caller handles truncation if length
1815                          * exceeds msg_namelen.
1816                          * NOTE: AF_UNIX NUL termination is ensured by
1817                          * the sender's copyin_name().
1818                          */
1819                         abuf = kmem_alloc(addrlen, KM_SLEEP);
1820 
1821                         bcopy(addr, abuf, addrlen);
1822                         msg->msg_name = abuf;
1823                         msg->msg_namelen = addrlen;
1824                 }
1825 
1826                 if (controllen != 0) {
1827                         /*
1828                          * Return control msg to caller.
1829                          * Caller handles truncation if length
1830                          * exceeds msg_controllen.
1831                          */
1832                         control = kmem_zalloc(controllen, KM_SLEEP);
1833 
1834                         error = so_opt2cmsg(mctlp, opt, optlen,
1835                             !(flags & MSG_XPG4_2), control, controllen);
1836                         if (error) {
1837                                 freemsg(mctlp);
1838                                 if (msg->msg_namelen != 0)
1839                                         kmem_free(msg->msg_name,
1840                                             msg->msg_namelen);
1841                                 kmem_free(control, controllen);
1842                                 eprintsoline(so, error);
1843                                 goto out;
1844                         }
1845                         msg->msg_control = control;
1846                         msg->msg_controllen = controllen;
1847                 }
1848 
1849                 freemsg(mctlp);
1850                 goto out;
1851         }
1852         case T_OPTDATA_IND: {
1853                 struct T_optdata_req *tdr;
1854                 void *opt;
1855                 t_uscalar_t optlen;
1856 
1857                 tdr = (struct T_optdata_req *)mctlp->b_rptr;
1858                 optlen = tdr->OPT_length;
1859                 if (optlen != 0) {
1860                         t_uscalar_t ncontrollen;
1861                         /*
1862                          * Determine how large cmsg buffer is needed.
1863                          */
1864                         opt = sogetoff(mctlp,
1865                             tpr->optdata_ind.OPT_offset, optlen,
1866                             __TPI_ALIGN_SIZE);
1867 
1868                         if (opt == NULL) {
1869                                 freemsg(mctlp);
1870                                 error = EPROTO;
1871                                 eprintsoline(so, error);
1872                                 goto out;
1873                         }
1874 
1875                         ncontrollen = so_cmsglen(mctlp, opt, optlen,
1876                             !(flags & MSG_XPG4_2));
1877                         if (controllen != 0)
1878                                 controllen = ncontrollen;
1879                         else if (ncontrollen != 0)
1880                                 msg->msg_flags |= MSG_CTRUNC;
1881                 } else {
1882                         controllen = 0;
1883                 }
1884 
1885                 if (controllen != 0) {
1886                         /*
1887                          * Return control msg to caller.
1888                          * Caller handles truncation if length
1889                          * exceeds msg_controllen.
1890                          */
1891                         control = kmem_zalloc(controllen, KM_SLEEP);
1892 
1893                         error = so_opt2cmsg(mctlp, opt, optlen,
1894                             !(flags & MSG_XPG4_2), control, controllen);
1895                         if (error) {
1896                                 freemsg(mctlp);
1897                                 kmem_free(control, controllen);
1898                                 eprintsoline(so, error);
1899                                 goto out;
1900                         }
1901                         msg->msg_control = control;
1902                         msg->msg_controllen = controllen;
1903                 }
1904 
1905                 /*
1906                  * Set msg_flags to MSG_EOR based on
1907                  * DATA_flag and MOREDATA.
1908                  */
1909                 mutex_enter(&so->so_lock);
1910                 so->so_state &= ~SS_SAVEDEOR;
1911                 if (!(tpr->data_ind.MORE_flag & 1)) {
1912                         if (!(rval.r_val1 & MOREDATA))
1913                                 msg->msg_flags |= MSG_EOR;
1914                         else
1915                                 so->so_state |= SS_SAVEDEOR;
1916                 }
1917                 freemsg(mctlp);
1918                 /*
1919                  * If some data was received (i.e. not EOF) and the
1920                  * read/recv* has not been satisfied wait for some more.
1921                  * Not possible to wait if control info was received.
1922                  */
1923                 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) &&
1924                     controllen == 0 &&
1925                     uiop->uio_resid != saved_resid && uiop->uio_resid > 0) {
1926                         mutex_exit(&so->so_lock);
1927                         flags |= MSG_NOMARK;
1928                         goto retry;
1929                 }
1930                 goto out_locked;
1931         }
1932         default:
1933                 cmn_err(CE_CONT, "so_recvmsg bad type %x \n",
1934                     tpr->type);
1935                 freemsg(mctlp);
1936                 error = EPROTO;
1937                 ASSERT(0);
1938         }
1939 out:
1940         mutex_enter(&so->so_lock);
1941 out_locked:
1942         ret = sod_rcv_done(so, suiop, uiop);
1943         if (ret != 0 && error == 0)
1944                 error = ret;
1945 
1946         so_unlock_read(so);     /* Clear SOREADLOCKED */
1947         mutex_exit(&so->so_lock);
1948 
1949         SO_UNBLOCK_FALLBACK(so);
1950 
1951         return (error);
1952 }
1953 
1954 sonodeops_t so_sonodeops = {
1955         so_init,                /* sop_init     */
1956         so_accept,              /* sop_accept   */
1957         so_bind,                /* sop_bind     */
1958         so_listen,              /* sop_listen   */
1959         so_connect,             /* sop_connect  */
1960         so_recvmsg,             /* sop_recvmsg  */
1961         so_sendmsg,             /* sop_sendmsg  */
1962         so_sendmblk,            /* sop_sendmblk */
1963         so_getpeername,         /* sop_getpeername */
1964         so_getsockname,         /* sop_getsockname */
1965         so_shutdown,            /* sop_shutdown */
1966         so_getsockopt,          /* sop_getsockopt */
1967         so_setsockopt,          /* sop_setsockopt */
1968         so_ioctl,               /* sop_ioctl    */
1969         so_poll,                /* sop_poll     */
1970         so_close,               /* sop_close */
1971 };
1972 
1973 sock_upcalls_t so_upcalls = {
1974         so_newconn,
1975         so_connected,
1976         so_disconnected,
1977         so_opctl,
1978         so_queue_msg,
1979         so_set_prop,
1980         so_txq_full,
1981         so_signal_oob,
1982         so_zcopy_notify,
1983         so_set_error,
1984         so_closed,
1985         so_get_sock_pid_list
1986 };