1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2012 by Delphix. All rights reserved.
  28  * Copyright 2020 Joyent, Inc.
  29  */
  30 
  31 /*
  32  * Multithreaded STREAMS Local Transport Provider.
  33  *
  34  * OVERVIEW
  35  * ========
  36  *
  37  * This driver provides TLI as well as socket semantics.  It provides
  38  * connectionless, connection oriented, and connection oriented with orderly
  39  * release transports for TLI and sockets. Each transport type has separate name
  40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
  41  * this removes any name space conflicts when binding to socket style transport
  42  * addresses.
  43  *
  44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
  45  * the same namespace. In fact, sockets always use ticotsord type transport.
  46  *
  47  * The driver mode is specified during open() by the minor number used for
  48  * open.
  49  *
  50  *  The sockets in addition have the following semantic differences:
  51  *  No support for passing up credentials (TL_SET[U]CRED).
  52  *
  53  *      Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
  54  *      from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
  55  *      T_OPTDATA_IND.
  56  *
  57  *      The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
  58  *      a T_CONN_RES is received from the acceptor. This means that a socket
  59  *      connect will complete before the peer has called accept.
  60  *
  61  *
  62  * MULTITHREADING
  63  * ==============
  64  *
  65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
  66  * generic "serializer" abstraction. Most of the operations are executed behind
  67  * the serializer and are, essentially single-threaded. All functions executed
  68  * behind the same serializer are strictly serialized. So if one thread calls
  69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
  70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
  71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
  72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
  73  * same time.
  74  *
  75  * Connectionless transport use a single serializer per transport type (one for
  76  * TLI and one for sockets. Connection-oriented transports use finer-grained
  77  * serializers.
  78  *
  79  * All COTS-type endpoints start their life with private serializers. During
  80  * connection request processing the endpoint serializer is switched to the
  81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
  82  * listener serializer. During T_CONN_RES processing the eager serializer is
  83  * switched from listener to acceptor serializer and after that point all
  84  * processing for eager and acceptor happens on this serializer. To avoid races
  85  * with endpoint closes while its serializer may be changing closes are blocked
  86  * while serializers are manipulated.
  87  *
  88  * References accounting
  89  * ---------------------
  90  *
  91  * Endpoints are reference counted and freed when the last reference is
  92  * dropped. Functions within the serializer may access an endpoint state even
  93  * after an endpoint closed. The te_closing being set on the endpoint indicates
  94  * that the endpoint entered its close routine.
  95  *
  96  * One reference is held for each opened endpoint instance. The reference
  97  * counter is incremented when the endpoint is linked to another endpoint and
  98  * decremented when the link disappears. It is also incremented when the
  99  * endpoint is found by the hash table lookup. This increment is atomic with the
 100  * lookup itself and happens while the hash table read lock is held.
 101  *
 102  * Close synchronization
 103  * ---------------------
 104  *
 105  * During close the endpoint as marked as closing using te_closing flag. It is
 106  * usually enough to check for te_closing flag since all other state changes
 107  * happen after this flag is set and the close entered serializer. Immediately
 108  * after setting te_closing flag tl_close() enters serializer and waits until
 109  * the callback finishes. This allows all functions called within serializer to
 110  * simply check te_closing without any locks.
 111  *
 112  * Serializer management.
 113  * ---------------------
 114  *
 115  * For COTS transports serializers are created when the endpoint is constructed
 116  * and destroyed when the endpoint is destructed. CLTS transports use global
 117  * serializers - one for sockets and one for TLI.
 118  *
 119  * COTS serializers have separate reference counts to deal with several
 120  * endpoints sharing the same serializer. There is a subtle problem related to
 121  * the serializer destruction. The serializer should never be destroyed by any
 122  * function executed inside serializer. This means that close has to wait till
 123  * all serializer activity for this endpoint is finished before it can drop the
 124  * last reference on the endpoint (which may as well free the serializer).  This
 125  * is only relevant for COTS transports which manage serializers
 126  * dynamically. For CLTS transports close may complete without waiting for all
 127  * serializer activity to finish since serializer is only destroyed at driver
 128  * detach time.
 129  *
 130  * COTS endpoints keep track of the number of outstanding requests on the
 131  * serializer for the endpoint. The code handling accept() avoids changing
 132  * client serializer if it has any pending messages on the serializer and
 133  * instead moves acceptor to listener's serializer.
 134  *
 135  *
 136  * Use of hash tables
 137  * ------------------
 138  *
 139  * The driver uses modhash hash table implementation. Each transport uses two
 140  * hash tables - one for finding endpoints by acceptor ID and another one for
 141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
 142  * pair of hash tables since sockets only use TICOTSORD.
 143  *
 144  * All hash tables lookups increment a reference count for returned endpoints,
 145  * so we may safely check the endpoint state even when the endpoint is removed
 146  * from the hash by another thread immediately after it is found.
 147  *
 148  *
 149  * CLOSE processing
 150  * ================
 151  *
 152  * The driver enters serializer twice on close(). The close sequence is the
 153  * following:
 154  *
 155  * 1) Wait until closing is safe (te_closewait becomes zero)
 156  *      This step is needed to prevent close during serializer switches. In most
 157  *      cases (close happening after connection establishment) te_closewait is
 158  *      zero.
 159  * 1) Set te_closing.
 160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
 161  *
 162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
 163  *      It also needs to clear write-side q_next pointers - this should be done
 164  *      before qprocsoff().
 165  *
 166  *    This synchronous serializer entry during close is needed to ensure that
 167  *    the queue is valid everywhere inside the serializer.
 168  *
 169  *    Note that in many cases close will execute tl_close_ser() synchronously,
 170  *    so it will not wait at all.
 171  *
 172  * 3) Calls qprocsoff().
 173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
 174  *      complete (for COTS transports). For CLTS transport there is no wait.
 175  *
 176  *      tl_close_finish_ser() Finishes the close process and wakes up waiting
 177  *      close if there is any.
 178  *
 179  *    Note that in most cases close will enter te_close_ser_finish()
 180  *    synchronously and will not wait at all.
 181  *
 182  *
 183  * Flow Control
 184  * ============
 185  *
 186  * The driver implements both read and write side service routines. No one calls
 187  * putq() on the read queue. The read side service routine tl_rsrv() is called
 188  * when the read side stream is back-enabled. It enters serializer synchronously
 189  * (waits till serializer processing is complete). Within serializer it
 190  * back-enables all endpoints blocked by the queue for connection-less
 191  * transports and enables write side service processing for the peer for
 192  * connection-oriented transports.
 193  *
 194  * Read and write side service routines use special mblk_sized space in the
 195  * endpoint structure to enter perimeter.
 196  *
 197  * Write-side flow control
 198  * -----------------------
 199  *
 200  * Write side flow control is a bit tricky. The driver needs to deal with two
 201  * message queues - the explicit STREAMS message queue maintained by
 202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
 203  * queues should be synchronized to preserve message ordering and should
 204  * maintain a single order determined by the order in which messages enter
 205  * tl_wput(). In order to maintain the ordering between these two queues the
 206  * STREAMS queue is only manipulated within the serializer, so the ordering is
 207  * provided by the serializer.
 208  *
 209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
 210  * immediately stop any further processing of the STREAMS message queues the
 211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
 212  * side service processing stops when the flag is set.
 213  *
 214  * The tl_wsrv() function enters serializer synchronously and waits for it to
 215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
 216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
 217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
 218  * always bounded by the amount of messages on the STREAMS queue at the time
 219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
 220  * queue from another serialized entry which can't happen in parallel. This
 221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
 222  * of it draining forever while writer places new messages on the STREAMS
 223  * queue).
 224  *
 225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
 226  *
 227  *
 228  * Unix Domain Sockets
 229  * ===================
 230  *
 231  * The driver knows the structure of Unix Domain sockets addresses and treats
 232  * them differently from generic TLI addresses. For sockets implicit binds are
 233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
 234  * instead of using address length of zero. Explicit binds specify
 235  * SOU_MAGIC_EXPLICIT as magic.
 236  *
 237  * For implicit binds we always use minor number as soua_vp part of the address
 238  * and avoid any hash table lookups. This saves two hash tables lookups per
 239  * anonymous bind.
 240  *
 241  * For explicit address we hash the vnode pointer instead of hashing the
 242  * full-scale address+zone+length. Hashing by pointer is more efficient then
 243  * hashing by the full address.
 244  *
 245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
 246  * tep structure, so it should be never freed.
 247  *
 248  * Also for sockets the driver always uses minor number as acceptor id.
 249  *
 250  * TPI VIOLATIONS
 251  * --------------
 252  *
 253  * This driver violates TPI in several respects for Unix Domain Sockets:
 254  *
 255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
 256  *      is requested and the endpoint is already in use. There is no point in
 257  *      generating an unused address since this address will be rejected by
 258  *      sockfs anyway. For implicit binds it always generates a new address
 259  *      (sets soua_vp to its minor number).
 260  *
 261  * 2) It always uses minor number as acceptor ID and never uses queue
 262  *      pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
 263  *      message and they do not use the queue pointer.
 264  *
 265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
 266  *      followed by listen(). The listen() should be issued with non-zero
 267  *      backlog, so sotpi_listen() issues unbind request followed by bind
 268  *      request to the same address but with a non-zero qlen value. Both
 269  *      tl_bind() and tl_unbind() require write lock on the hash table to
 270  *      insert/remove the address. The driver does not remove the address from
 271  *      the hash for endpoints that are bound to the explicit address and have
 272  *      backlog of zero. During T_BIND_REQ processing if the address requested
 273  *      is equal to the address the endpoint already has it updates the backlog
 274  *      without reinserting the address in the hash table. This optimization
 275  *      avoids two hash table updates for each listener created. It always
 276  *      avoids the problem of a "stolen" address when another listener may use
 277  *      the same address between the unbind and bind and suddenly listen() fails
 278  *      because address is in use even though the bind() succeeded.
 279  *
 280  *
 281  * CONNECTIONLESS TRANSPORTS
 282  * =========================
 283  *
 284  * Connectionless transports all share the same serializer (one for TLI and one
 285  * for Sockets). Functions executing behind serializer can check or modify state
 286  * of any endpoint.
 287  *
 288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
 289  * te_lastep field. The next time X talks to some address A it checks whether A
 290  * is the same as Y's address and if it is there is no need to lookup Y. If the
 291  * address is different or the state of Y is not appropriate (e.g. closed or not
 292  * idle) X does a lookup using tl_find_peer() and caches the new address.
 293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
 294  * on the endpoint found.
 295  *
 296  * During close of endpoint Y it doesn't try to remove itself from other
 297  * endpoints caches. They will detect that Y is gone and will search the peer
 298  * endpoint again.
 299  *
 300  * Flow Control Handling.
 301  * ----------------------
 302  *
 303  * Each connectionless endpoint keeps a list of endpoints which are
 304  * flow-controlled by its queue. It also keeps a pointer to the queue which
 305  * flow-controls itself.  Whenever flow control releases for endpoint X it
 306  * enables all queues from the list. During close it also back-enables everyone
 307  * in the list. If X is flow-controlled when it is closing it removes it from
 308  * the peers list.
 309  *
 310  * DATA STRUCTURES
 311  * ===============
 312  *
 313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
 314  * endpoint state. For connection-oriented transports it has a keeps a list
 315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
 316  * list of endpoints flow controlled by this one.
 317  *
 318  * Each transport type is represented by a per-transport data structure
 319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
 320  * endpoint address hash tables for each transport. It also contains pointer to
 321  * transport serializer for connectionless transports.
 322  *
 323  * Each endpoint keeps a link to its transport structure, so the code can find
 324  * all per-transport information quickly.
 325  */
 326 
 327 #include        <sys/types.h>
 328 #include        <sys/inttypes.h>
 329 #include        <sys/stream.h>
 330 #include        <sys/stropts.h>
 331 #define _SUN_TPI_VERSION 2
 332 #include        <sys/tihdr.h>
 333 #include        <sys/strlog.h>
 334 #include        <sys/debug.h>
 335 #include        <sys/cred.h>
 336 #include        <sys/errno.h>
 337 #include        <sys/kmem.h>
 338 #include        <sys/id_space.h>
 339 #include        <sys/modhash.h>
 340 #include        <sys/mkdev.h>
 341 #include        <sys/tl.h>
 342 #include        <sys/stat.h>
 343 #include        <sys/conf.h>
 344 #include        <sys/modctl.h>
 345 #include        <sys/strsun.h>
 346 #include        <sys/socket.h>
 347 #include        <sys/socketvar.h>
 348 #include        <sys/sysmacros.h>
 349 #include        <sys/xti_xtiopt.h>
 350 #include        <sys/ddi.h>
 351 #include        <sys/sunddi.h>
 352 #include        <sys/zone.h>
 353 #include        <inet/common.h>   /* typedef int (*pfi_t)() for inet/optcom.h */
 354 #include        <inet/optcom.h>
 355 #include        <sys/strsubr.h>
 356 #include        <sys/ucred.h>
 357 #include        <sys/suntpi.h>
 358 #include        <sys/list.h>
 359 #include        <sys/serializer.h>
 360 
 361 /*
 362  * TBD List
 363  * 14 Eliminate state changes through table
 364  * 16. AF_UNIX socket options
 365  * 17. connect() for ticlts
 366  * 18. support for "netstat" to show AF_UNIX plus TLI local
 367  *      transport connections
 368  * 21. sanity check to flushing on sending M_ERROR
 369  */
 370 
 371 /*
 372  * CONSTANT DECLARATIONS
 373  * --------------------
 374  */
 375 
 376 /*
 377  * Local declarations
 378  */
 379 #define BADSEQNUM       (-1)    /* initial seq number used by T_DISCON_IND */
 380 #define TL_BUFWAIT      (10000) /* usecs to wait for allocb buffer timeout */
 381 #define TL_TIDUSZ (64*1024)     /* tidu size when "strmsgz" is unlimited (0) */
 382 /*
 383  * Hash tables size.
 384  */
 385 #define TL_HASH_SIZE 311
 386 
 387 /*
 388  * Definitions for module_info
 389  */
 390 #define         TL_ID           (104)           /* module ID number */
 391 #define         TL_NAME         "tl"            /* module name */
 392 #define         TL_MINPSZ       (0)             /* min packet size */
 393 #define         TL_MAXPSZ       INFPSZ          /* max packet size ZZZ */
 394 #define         TL_HIWAT        (16*1024)       /* hi water mark */
 395 #define         TL_LOWAT        (256)           /* lo water mark */
 396 /*
 397  * Definition of minor numbers/modes for new transport provider modes.
 398  * We view the socket use as a separate mode to get a separate name space.
 399  */
 400 #define         TL_TICOTS       0       /* connection oriented transport */
 401 #define         TL_TICOTSORD    1       /* COTS w/ orderly release */
 402 #define         TL_TICLTS       2       /* connectionless transport */
 403 #define         TL_UNUSED       3
 404 #define         TL_SOCKET       4       /* Socket */
 405 #define         TL_SOCK_COTS    (TL_SOCKET | TL_TICOTS)
 406 #define         TL_SOCK_COTSORD (TL_SOCKET | TL_TICOTSORD)
 407 #define         TL_SOCK_CLTS    (TL_SOCKET | TL_TICLTS)
 408 
 409 #define         TL_MINOR_MASK   0x7
 410 #define         TL_MINOR_START  (TL_TICLTS + 1)
 411 
 412 /*
 413  * LOCAL MACROS
 414  */
 415 #define T_ALIGN(p)      P2ROUNDUP((p), sizeof (t_scalar_t))
 416 
 417 /*
 418  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
 419  */
 420 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
 421 static int tl_close(queue_t *, int, cred_t *);
 422 static int tl_wput(queue_t *, mblk_t *);
 423 static int tl_wsrv(queue_t *);
 424 static int tl_rsrv(queue_t *);
 425 
 426 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
 427 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
 428 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 429 
 430 
 431 /*
 432  * GLOBAL DATA STRUCTURES AND VARIABLES
 433  * -----------------------------------
 434  */
 435 
 436 /*
 437  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
 438  * For now, we only manage the SO_RECVUCRED option but we also have
 439  * harmless dummy options to make things work with some common code we access.
 440  */
 441 opdes_t tl_opt_arr[] = {
 442         /* The SO_TYPE is needed for the hack below */
 443         {
 444                 SO_TYPE,
 445                 SOL_SOCKET,
 446                 OA_R,
 447                 OA_R,
 448                 OP_NP,
 449                 0,
 450                 sizeof (t_scalar_t),
 451                 0
 452         },
 453         {
 454                 SO_RECVUCRED,
 455                 SOL_SOCKET,
 456                 OA_RW,
 457                 OA_RW,
 458                 OP_NP,
 459                 0,
 460                 sizeof (int),
 461                 0
 462         }
 463 };
 464 
 465 /*
 466  * Table of all supported levels
 467  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 468  * any supported options so we need this info separately.
 469  *
 470  * This is needed only for topmost tpi providers.
 471  */
 472 optlevel_t      tl_valid_levels_arr[] = {
 473         XTI_GENERIC,
 474         SOL_SOCKET,
 475         TL_PROT_LEVEL
 476 };
 477 
 478 #define TL_VALID_LEVELS_CNT     A_CNT(tl_valid_levels_arr)
 479 /*
 480  * Current upper bound on the amount of space needed to return all options.
 481  * Additional options with data size of sizeof(long) are handled automatically.
 482  * Others need hand job.
 483  */
 484 #define TL_MAX_OPT_BUF_LEN                                              \
 485                 ((A_CNT(tl_opt_arr) << 2) +                               \
 486                 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +          \
 487                 + 64 + sizeof (struct T_optmgmt_ack))
 488 
 489 #define TL_OPT_ARR_CNT  A_CNT(tl_opt_arr)
 490 
 491 /*
 492  *      transport addr structure
 493  */
 494 typedef struct tl_addr {
 495         zoneid_t        ta_zoneid;              /* Zone scope of address */
 496         t_scalar_t      ta_alen;                /* length of abuf */
 497         void            *ta_abuf;               /* the addr itself */
 498 } tl_addr_t;
 499 
 500 /*
 501  * Refcounted version of serializer.
 502  */
 503 typedef struct tl_serializer {
 504         uint_t          ts_refcnt;
 505         serializer_t    *ts_serializer;
 506 } tl_serializer_t;
 507 
 508 /*
 509  * Each transport type has a separate state.
 510  * Per-transport state.
 511  */
 512 typedef struct tl_transport_state {
 513         char            *tr_name;
 514         minor_t         tr_minor;
 515         uint32_t        tr_defaddr;
 516         mod_hash_t      *tr_ai_hash;
 517         mod_hash_t      *tr_addr_hash;
 518         tl_serializer_t *tr_serializer;
 519 } tl_transport_state_t;
 520 
 521 #define TL_DFADDR 0x1000
 522 
 523 static tl_transport_state_t tl_transports[] = {
 524         { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
 525         { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
 526         { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
 527         { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
 528         { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
 529         { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
 530         { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
 531 };
 532 
 533 #define TL_MAXTRANSPORT A_CNT(tl_transports)
 534 
 535 struct tl_endpt;
 536 typedef struct tl_endpt tl_endpt_t;
 537 
 538 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
 539 
 540 /*
 541  * Data structure used to represent pending connects.
 542  * Records enough information so that the connecting peer can close
 543  * before the connection gets accepted.
 544  */
 545 typedef struct tl_icon {
 546         list_node_t     ti_node;
 547         struct tl_endpt *ti_tep;        /* NULL if peer has already closed */
 548         mblk_t          *ti_mp;         /* b_next list of data + ordrel_ind */
 549         t_scalar_t      ti_seqno;       /* Sequence number */
 550 } tl_icon_t;
 551 
 552 typedef struct so_ux_addr soux_addr_t;
 553 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
 554 
 555 /*
 556  * Maximum number of unaccepted connection indications allowed per listener.
 557  */
 558 #define TL_MAXQLEN      4096
 559 int tl_maxqlen = TL_MAXQLEN;
 560 
 561 /*
 562  *      transport endpoint structure
 563  */
 564 struct tl_endpt {
 565         queue_t         *te_rq;         /* stream read queue */
 566         queue_t         *te_wq;         /* stream write queue */
 567         uint32_t        te_refcnt;
 568         int32_t         te_state;       /* TPI state of endpoint */
 569         minor_t         te_minor;       /* minor number */
 570 #define te_seqno        te_minor
 571         uint_t          te_flag;        /* flag field */
 572         boolean_t       te_nowsrv;
 573         tl_serializer_t *te_ser;        /* Serializer to use */
 574 #define te_serializer   te_ser->ts_serializer
 575 
 576         soux_addr_t     te_uxaddr;      /* Socket address */
 577 #define te_magic        te_uxaddr.soua_magic
 578 #define te_vp           te_uxaddr.soua_vp
 579         tl_addr_t       te_ap;          /* addr bound to this endpt */
 580 #define te_zoneid te_ap.ta_zoneid
 581 #define te_alen te_ap.ta_alen
 582 #define te_abuf te_ap.ta_abuf
 583 
 584         tl_transport_state_t *te_transport;
 585 #define te_addrhash     te_transport->tr_addr_hash
 586 #define te_aihash       te_transport->tr_ai_hash
 587 #define te_defaddr      te_transport->tr_defaddr
 588         cred_t          *te_credp;      /* endpoint user credentials */
 589         mod_hash_hndl_t te_hash_hndl;   /* Handle for address hash */
 590 
 591         /*
 592          * State specific for connection-oriented and connectionless transports.
 593          */
 594         union {
 595                 /* Connection-oriented state. */
 596                 struct {
 597                         t_uscalar_t _te_nicon;  /* count of conn requests */
 598                         t_uscalar_t _te_qlen;   /* max conn requests */
 599                         tl_endpt_t  *_te_oconp; /* conn request pending */
 600                         tl_endpt_t  *_te_conp;  /* connected endpt */
 601 #ifndef _ILP32
 602                         void        *_te_pad;
 603 #endif
 604                         list_t  _te_iconp;      /* list of conn ind. pending */
 605                 } _te_cots_state;
 606                 /* Connection-less state. */
 607                 struct {
 608                         tl_endpt_t *_te_lastep; /* last dest. endpoint */
 609                         tl_endpt_t *_te_flowq;  /* flow controlled on whom */
 610                         list_node_t _te_flows;  /* lists of connections */
 611                         list_t  _te_flowlist;   /* Who flowcontrols on me */
 612                 } _te_clts_state;
 613         } _te_transport_state;
 614 #define te_nicon        _te_transport_state._te_cots_state._te_nicon
 615 #define te_qlen         _te_transport_state._te_cots_state._te_qlen
 616 #define te_oconp        _te_transport_state._te_cots_state._te_oconp
 617 #define te_conp         _te_transport_state._te_cots_state._te_conp
 618 #define te_iconp        _te_transport_state._te_cots_state._te_iconp
 619 #define te_lastep       _te_transport_state._te_clts_state._te_lastep
 620 #define te_flowq        _te_transport_state._te_clts_state._te_flowq
 621 #define te_flowlist     _te_transport_state._te_clts_state._te_flowlist
 622 #define te_flows        _te_transport_state._te_clts_state._te_flows
 623 
 624         bufcall_id_t    te_bufcid;      /* outstanding bufcall id */
 625         timeout_id_t    te_timoutid;    /* outstanding timeout id */
 626         pid_t           te_cpid;        /* cached pid of endpoint */
 627         t_uscalar_t     te_acceptor_id; /* acceptor id for T_CONN_RES */
 628         /*
 629          * Pieces of the endpoint state needed for closing.
 630          */
 631         kmutex_t        te_closelock;
 632         kcondvar_t      te_closecv;
 633         uint8_t         te_closing;     /* The endpoint started closing */
 634         uint8_t         te_closewait;   /* Wait in close until zero */
 635         mblk_t          te_closemp;     /* for entering serializer on close */
 636         mblk_t          te_rsrvmp;      /* for entering serializer on rsrv */
 637         mblk_t          te_wsrvmp;      /* for entering serializer on wsrv */
 638         kmutex_t        te_srv_lock;
 639         kcondvar_t      te_srv_cv;
 640         uint8_t         te_rsrv_active; /* Running in tl_rsrv() */
 641         uint8_t         te_wsrv_active; /* Running in tl_wsrv() */
 642         /*
 643          * Pieces of the endpoint state needed for serializer transitions.
 644          */
 645         kmutex_t        te_ser_lock;    /* Protects the count below */
 646         uint_t          te_ser_count;   /* Number of messages on serializer */
 647 };
 648 
 649 /*
 650  * Flag values. Lower 4 bits specify that transport used.
 651  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
 652  * they allow to identify the endpoint more easily.
 653  */
 654 #define TL_LISTENER     0x00010 /* the listener endpoint */
 655 #define TL_ACCEPTOR     0x00020 /* the accepting endpoint */
 656 #define TL_EAGER        0x00040 /* connecting endpoint */
 657 #define TL_ACCEPTED     0x00080 /* accepted connection */
 658 #define TL_SETCRED      0x00100 /* flag to indicate sending of credentials */
 659 #define TL_SETUCRED     0x00200 /* flag to indicate sending of ucred */
 660 #define TL_SOCKUCRED    0x00400 /* flag to indicate sending of SCM_UCRED */
 661 #define TL_ADDRHASHED   0x01000 /* Endpoint address is stored in te_addrhash */
 662 #define TL_CLOSE_SER    0x10000 /* Endpoint close has entered the serializer */
 663 /*
 664  * Boolean checks for the endpoint type.
 665  */
 666 #define         IS_CLTS(x)      (((x)->te_flag & TL_TICLTS) != 0)
 667 #define         IS_COTS(x)      (((x)->te_flag & TL_TICLTS) == 0)
 668 #define         IS_COTSORD(x)   (((x)->te_flag & TL_TICOTSORD) != 0)
 669 #define         IS_SOCKET(x)    (((x)->te_flag & TL_SOCKET) != 0)
 670 
 671 /*
 672  * Certain operations are always used together. These macros reduce the chance
 673  * of missing a part of a combination.
 674  */
 675 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
 676 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
 677 
 678 #define TL_PUTBQ(x, mp) {               \
 679         ASSERT(!((x)->te_flag & TL_CLOSE_SER));  \
 680         (x)->te_nowsrv = B_TRUE;     \
 681         (void) putbq((x)->te_wq, mp);        \
 682 }
 683 
 684 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
 685 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
 686 
 687 /*
 688  * STREAMS driver glue data structures.
 689  */
 690 static  struct  module_info     tl_minfo = {
 691         TL_ID,                  /* mi_idnum */
 692         TL_NAME,                /* mi_idname */
 693         TL_MINPSZ,              /* mi_minpsz */
 694         TL_MAXPSZ,              /* mi_maxpsz */
 695         TL_HIWAT,               /* mi_hiwat */
 696         TL_LOWAT                /* mi_lowat */
 697 };
 698 
 699 static  struct  qinit   tl_rinit = {
 700         NULL,                   /* qi_putp */
 701         tl_rsrv,                /* qi_srvp */
 702         tl_open,                /* qi_qopen */
 703         tl_close,               /* qi_qclose */
 704         NULL,                   /* qi_qadmin */
 705         &tl_minfo,          /* qi_minfo */
 706         NULL                    /* qi_mstat */
 707 };
 708 
 709 static  struct  qinit   tl_winit = {
 710         tl_wput,                /* qi_putp */
 711         tl_wsrv,                /* qi_srvp */
 712         NULL,                   /* qi_qopen */
 713         NULL,                   /* qi_qclose */
 714         NULL,                   /* qi_qadmin */
 715         &tl_minfo,          /* qi_minfo */
 716         NULL                    /* qi_mstat */
 717 };
 718 
 719 static  struct streamtab        tlinfo = {
 720         &tl_rinit,          /* st_rdinit */
 721         &tl_winit,          /* st_wrinit */
 722         NULL,                   /* st_muxrinit */
 723         NULL                    /* st_muxwrinit */
 724 };
 725 
 726 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
 727     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
 728 
 729 static struct modldrv modldrv = {
 730         &mod_driverops,             /* Type of module -- pseudo driver here */
 731         "TPI Local Transport (tl)",
 732         &tl_devops,         /* driver ops */
 733 };
 734 
 735 /*
 736  * Module linkage information for the kernel.
 737  */
 738 static struct modlinkage modlinkage = {
 739         MODREV_1,
 740         &modldrv,
 741         NULL
 742 };
 743 
 744 /*
 745  * Templates for response to info request
 746  * Check sanity of unlimited connect data etc.
 747  */
 748 
 749 #define         TL_CLTS_PROVIDER_FLAG   (XPG4_1 | SENDZERO)
 750 #define         TL_COTS_PROVIDER_FLAG   (XPG4_1 | SENDZERO)
 751 
 752 static struct T_info_ack tl_cots_info_ack =
 753         {
 754                 T_INFO_ACK,     /* PRIM_type -always T_INFO_ACK */
 755                 T_INFINITE,     /* TSDU size */
 756                 T_INFINITE,     /* ETSDU size */
 757                 T_INFINITE,     /* CDATA_size */
 758                 T_INFINITE,     /* DDATA_size */
 759                 T_INFINITE,     /* ADDR_size  */
 760                 T_INFINITE,     /* OPT_size */
 761                 0,              /* TIDU_size - fill at run time */
 762                 T_COTS,         /* SERV_type */
 763                 -1,             /* CURRENT_state */
 764                 TL_COTS_PROVIDER_FLAG   /* PROVIDER_flag */
 765         };
 766 
 767 static struct T_info_ack tl_clts_info_ack =
 768         {
 769                 T_INFO_ACK,     /* PRIM_type - always T_INFO_ACK */
 770                 0,              /* TSDU_size - fill at run time */
 771                 -2,             /* ETSDU_size -2 => not supported */
 772                 -2,             /* CDATA_size -2 => not supported */
 773                 -2,             /* DDATA_size  -2 => not supported */
 774                 -1,             /* ADDR_size -1 => infinite */
 775                 -1,             /* OPT_size */
 776                 0,              /* TIDU_size - fill at run time */
 777                 T_CLTS,         /* SERV_type */
 778                 -1,             /* CURRENT_state */
 779                 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
 780         };
 781 
 782 /*
 783  * private copy of devinfo pointer used in tl_info
 784  */
 785 static dev_info_t *tl_dip;
 786 
 787 /*
 788  * Endpoints cache.
 789  */
 790 static kmem_cache_t *tl_cache;
 791 /*
 792  * Minor number space.
 793  */
 794 static id_space_t *tl_minors;
 795 
 796 /*
 797  * Default Data Unit size.
 798  */
 799 static t_scalar_t tl_tidusz;
 800 
 801 /*
 802  * Size of hash tables.
 803  */
 804 static size_t tl_hash_size = TL_HASH_SIZE;
 805 
 806 /*
 807  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
 808  * for sockets.
 809  */
 810 static int tl_disable_early_connect = 0;
 811 static int tl_client_closing_when_accepting;
 812 
 813 static int tl_serializer_noswitch;
 814 
 815 #define nr      127             /* not reachable */
 816 
 817 #define TE_NOEVENTS     28
 818 
 819 static char nextstate[TE_NOEVENTS][TS_NOSTATES] = {
 820                                 /* STATES */
 821         /* 0  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16 */
 822 
 823 /* Initialization events */
 824 
 825 #define TE_BIND_REQ     0       /* bind request                         */
 826         { 1, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 827 #define TE_UNBIND_REQ   1       /* unbind request                       */
 828         {nr, nr, nr,  2, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 829 #define TE_OPTMGMT_REQ  2       /* manage options req                   */
 830         {nr, nr, nr,  4, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 831 #define TE_BIND_ACK     3       /* bind acknowledment                   */
 832         {nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 833 #define TE_OPTMGMT_ACK  4       /* manage options ack                   */
 834         {nr, nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 835 #define TE_ERROR_ACK    5       /* error acknowledgment                 */
 836         {nr,  0,  3, nr,  3,  3, nr, nr,  7, nr, nr, nr,  6,  7,  9, 10, 11},
 837 #define TE_OK_ACK1      6       /* ok ack  seqcnt == 0                  */
 838         {nr, nr,  0, nr, nr,  6, nr, nr, nr, nr, nr, nr,  3, nr,  3,  3,  3},
 839 #define TE_OK_ACK2      7       /* ok ack  seqcnt == 1, q == resq       */
 840         {nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, nr, nr, nr,  3, nr, nr, nr},
 841 #define TE_OK_ACK3      8       /* ok ack  seqcnt == 1, q != resq       */
 842         {nr, nr, nr, nr, nr, nr, nr, nr,  3, nr, nr, nr, nr,  3, nr, nr, nr},
 843 #define TE_OK_ACK4      9       /* ok ack  seqcnt > 1                        */
 844         {nr, nr, nr, nr, nr, nr, nr, nr,  7, nr, nr, nr, nr,  7, nr, nr, nr},
 845 
 846 /* Connection oriented events */
 847 #define TE_CONN_REQ     10      /* connection request                   */
 848         {nr, nr, nr,  5, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 849 #define TE_CONN_RES     11      /* connection response                  */
 850         {nr, nr, nr, nr, nr, nr, nr,  8, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 851 #define TE_DISCON_REQ   12      /* disconnect request                   */
 852         {nr, nr, nr, nr, nr, nr, 12, 13, nr, 14, 15, 16, nr, nr, nr, nr, nr},
 853 #define TE_DATA_REQ     13      /* data request                         */
 854         {nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, 11, nr, nr, nr, nr, nr},
 855 #define TE_EXDATA_REQ   14      /* expedited data request               */
 856         {nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, nr, 11, nr, nr, nr, nr, nr},
 857 #define TE_ORDREL_REQ   15      /* orderly release req                  */
 858         {nr, nr, nr, nr, nr, nr, nr, nr, nr, 10, nr,  3, nr, nr, nr, nr, nr},
 859 #define TE_CONN_IND     16      /* connection indication                */
 860         {nr, nr, nr,  7, nr, nr, nr,  7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 861 #define TE_CONN_CON     17      /* connection confirmation              */
 862         {nr, nr, nr, nr, nr, nr,  9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 863 #define TE_DATA_IND     18      /* data indication                      */
 864         {nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, 10, nr, nr, nr, nr, nr, nr},
 865 #define TE_EXDATA_IND   19      /* expedited data indication            */
 866         {nr, nr, nr, nr, nr, nr, nr, nr, nr,  9, 10, nr, nr, nr, nr, nr, nr},
 867 #define TE_ORDREL_IND   20      /* orderly release ind                  */
 868         {nr, nr, nr, nr, nr, nr, nr, nr, nr, 11,  3, nr, nr, nr, nr, nr, nr},
 869 #define TE_DISCON_IND1  21      /* disconnect indication seq == 0       */
 870         {nr, nr, nr, nr, nr, nr,  3, nr, nr,  3,  3,  3, nr, nr, nr, nr, nr},
 871 #define TE_DISCON_IND2  22      /* disconnect indication seq == 1       */
 872         {nr, nr, nr, nr, nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 873 #define TE_DISCON_IND3  23      /* disconnect indication seq > 1     */
 874         {nr, nr, nr, nr, nr, nr, nr,  7, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 875 #define TE_PASS_CONN    24      /* pass connection                      */
 876         {nr, nr, nr,  9, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 877 
 878 
 879 /* Unit data events */
 880 
 881 #define TE_UNITDATA_REQ 25      /* unitdata request                     */
 882         {nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 883 #define TE_UNITDATA_IND 26      /* unitdata indication                  */
 884         {nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 885 #define TE_UDERROR_IND  27      /* unitdata error indication            */
 886         {nr, nr, nr,  3, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr, nr},
 887 };
 888 
 889 
 890 
 891 /*
 892  * LOCAL FUNCTION PROTOTYPES
 893  * -------------------------
 894  */
 895 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
 896 static void tl_do_proto(mblk_t *, tl_endpt_t *);
 897 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
 898 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
 899 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
 900     t_scalar_t);
 901 static void tl_bind(mblk_t *, tl_endpt_t *);
 902 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
 903 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
 904 static void tl_unbind(mblk_t *, tl_endpt_t *);
 905 static void tl_optmgmt(queue_t *, mblk_t *);
 906 static void tl_conn_req(queue_t *, mblk_t *);
 907 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
 908 static void tl_conn_res(mblk_t *, tl_endpt_t *);
 909 static void tl_discon_req(mblk_t *, tl_endpt_t *);
 910 static void tl_capability_req(mblk_t *, tl_endpt_t *);
 911 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
 912 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
 913 static void tl_info_req(mblk_t *, tl_endpt_t *);
 914 static void tl_addr_req(mblk_t *, tl_endpt_t *);
 915 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
 916 static void tl_data(mblk_t  *, tl_endpt_t *);
 917 static void tl_exdata(mblk_t *, tl_endpt_t *);
 918 static void tl_ordrel(mblk_t *, tl_endpt_t *);
 919 static void tl_unitdata(mblk_t *, tl_endpt_t *);
 920 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
 921 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
 922 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
 923 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
 924 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
 925 static void tl_cl_backenable(tl_endpt_t *);
 926 static void tl_co_unconnect(tl_endpt_t *);
 927 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
 928 static void tl_discon_ind(tl_endpt_t *, uint32_t);
 929 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
 930 static mblk_t *tl_ordrel_ind_alloc(void);
 931 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
 932 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
 933 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
 934 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
 935 static void tl_icon_freemsgs(mblk_t **);
 936 static void tl_merror(queue_t *, mblk_t *, int);
 937 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
 938 static int tl_default_opt(queue_t *, int, int, uchar_t *);
 939 static int tl_get_opt(queue_t *, int, int, uchar_t *);
 940 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
 941     uchar_t *, void *, cred_t *);
 942 static void tl_memrecover(queue_t *, mblk_t *, size_t);
 943 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
 944 static void tl_free(tl_endpt_t *);
 945 static int  tl_constructor(void *, void *, int);
 946 static void tl_destructor(void *, void *);
 947 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
 948 static tl_serializer_t *tl_serializer_alloc(int);
 949 static void tl_serializer_refhold(tl_serializer_t *);
 950 static void tl_serializer_refrele(tl_serializer_t *);
 951 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
 952 static void tl_serializer_exit(tl_endpt_t *);
 953 static boolean_t tl_noclose(tl_endpt_t *);
 954 static void tl_closeok(tl_endpt_t *);
 955 static void tl_refhold(tl_endpt_t *);
 956 static void tl_refrele(tl_endpt_t *);
 957 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
 958 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
 959 static void tl_close_ser(mblk_t *, tl_endpt_t *);
 960 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
 961 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
 962 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
 963 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
 964 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
 965 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
 966 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
 967 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
 968 static void tl_addr_unbind(tl_endpt_t *);
 969 
 970 /*
 971  * Intialize option database object for TL
 972  */
 973 
 974 optdb_obj_t tl_opt_obj = {
 975         tl_default_opt,         /* TL default value function pointer */
 976         tl_get_opt,             /* TL get function pointer */
 977         tl_set_opt,             /* TL set function pointer */
 978         TL_OPT_ARR_CNT,         /* TL option database count of entries */
 979         tl_opt_arr,             /* TL option database */
 980         TL_VALID_LEVELS_CNT,    /* TL valid level count of entries */
 981         tl_valid_levels_arr     /* TL valid level array */
 982 };
 983 
 984 /*
 985  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
 986  * ---------------------------------------
 987  */
 988 
 989 /*
 990  * Loadable module routines
 991  */
 992 int
 993 _init(void)
 994 {
 995         return (mod_install(&modlinkage));
 996 }
 997 
 998 int
 999 _fini(void)
1000 {
1001         return (mod_remove(&modlinkage));
1002 }
1003 
1004 int
1005 _info(struct modinfo *modinfop)
1006 {
1007         return (mod_info(&modlinkage, modinfop));
1008 }
1009 
1010 /*
1011  * Driver Entry Points and Other routines
1012  */
1013 static int
1014 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1015 {
1016         int i;
1017         char name[32];
1018 
1019         /*
1020          * Resume from a checkpoint state.
1021          */
1022         if (cmd == DDI_RESUME)
1023                 return (DDI_SUCCESS);
1024 
1025         if (cmd != DDI_ATTACH)
1026                 return (DDI_FAILURE);
1027 
1028         /*
1029          * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
1030          * streams message sizes can be unlimited. We use a defined constant
1031          * instead.
1032          */
1033         tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
1034 
1035         /*
1036          * Create subdevices for each transport.
1037          */
1038         for (i = 0; i < TL_UNUSED; i++) {
1039                 if (ddi_create_minor_node(devi,
1040                     tl_transports[i].tr_name,
1041                     S_IFCHR, tl_transports[i].tr_minor,
1042                     DDI_PSEUDO, 0) == DDI_FAILURE) {
1043                         ddi_remove_minor_node(devi, NULL);
1044                         return (DDI_FAILURE);
1045                 }
1046         }
1047 
1048         tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
1049             0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
1050 
1051         if (tl_cache == NULL) {
1052                 ddi_remove_minor_node(devi, NULL);
1053                 return (DDI_FAILURE);
1054         }
1055 
1056         tl_minors = id_space_create("tl_minor_space",
1057             TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
1058 
1059         /*
1060          * Create ID space for minor numbers
1061          */
1062         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1063                 tl_transport_state_t *t = &tl_transports[i];
1064 
1065                 if (i == TL_UNUSED)
1066                         continue;
1067 
1068                 /* Socket COTSORD shares namespace with COTS */
1069                 if (i == TL_SOCK_COTSORD) {
1070                         t->tr_ai_hash =
1071                             tl_transports[TL_SOCK_COTS].tr_ai_hash;
1072                         ASSERT(t->tr_ai_hash != NULL);
1073                         t->tr_addr_hash =
1074                             tl_transports[TL_SOCK_COTS].tr_addr_hash;
1075                         ASSERT(t->tr_addr_hash != NULL);
1076                         continue;
1077                 }
1078 
1079                 /*
1080                  * Create hash tables.
1081                  */
1082                 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1083                     t->tr_name);
1084 #ifdef _ILP32
1085                 if (i & TL_SOCKET)
1086                         t->tr_ai_hash =
1087                             mod_hash_create_idhash(name, tl_hash_size - 1,
1088                             mod_hash_null_valdtor);
1089                 else
1090                         t->tr_ai_hash =
1091                             mod_hash_create_ptrhash(name, tl_hash_size,
1092                             mod_hash_null_valdtor, sizeof (queue_t));
1093 #else
1094                 t->tr_ai_hash =
1095                     mod_hash_create_idhash(name, tl_hash_size - 1,
1096                     mod_hash_null_valdtor);
1097 #endif /* _ILP32 */
1098 
1099                 if (i & TL_SOCKET) {
1100                         (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1101                             t->tr_name);
1102                         t->tr_addr_hash = mod_hash_create_ptrhash(name,
1103                             tl_hash_size, mod_hash_null_valdtor,
1104                             sizeof (uintptr_t));
1105                 } else {
1106                         (void) snprintf(name, sizeof (name), "%s_addr_hash",
1107                             t->tr_name);
1108                         t->tr_addr_hash = mod_hash_create_extended(name,
1109                             tl_hash_size, mod_hash_null_keydtor,
1110                             mod_hash_null_valdtor,
1111                             tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1112                 }
1113 
1114                 /* Create serializer for connectionless transports. */
1115                 if (i & TL_TICLTS)
1116                         t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1117         }
1118 
1119         tl_dip = devi;
1120 
1121         return (DDI_SUCCESS);
1122 }
1123 
1124 static int
1125 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1126 {
1127         int i;
1128 
1129         if (cmd == DDI_SUSPEND)
1130                 return (DDI_SUCCESS);
1131 
1132         if (cmd != DDI_DETACH)
1133                 return (DDI_FAILURE);
1134 
1135         /*
1136          * Destroy arenas and hash tables.
1137          */
1138         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1139                 tl_transport_state_t *t = &tl_transports[i];
1140 
1141                 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1142                         continue;
1143 
1144                 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1145                 if (t->tr_serializer != NULL) {
1146                         tl_serializer_refrele(t->tr_serializer);
1147                         t->tr_serializer = NULL;
1148                 }
1149 
1150 #ifdef _ILP32
1151                 if (i & TL_SOCKET)
1152                         mod_hash_destroy_idhash(t->tr_ai_hash);
1153                 else
1154                         mod_hash_destroy_ptrhash(t->tr_ai_hash);
1155 #else
1156                 mod_hash_destroy_idhash(t->tr_ai_hash);
1157 #endif /* _ILP32 */
1158                 t->tr_ai_hash = NULL;
1159                 if (i & TL_SOCKET)
1160                         mod_hash_destroy_ptrhash(t->tr_addr_hash);
1161                 else
1162                         mod_hash_destroy_hash(t->tr_addr_hash);
1163                 t->tr_addr_hash = NULL;
1164         }
1165 
1166         kmem_cache_destroy(tl_cache);
1167         tl_cache = NULL;
1168         id_space_destroy(tl_minors);
1169         tl_minors = NULL;
1170         ddi_remove_minor_node(devi, NULL);
1171         return (DDI_SUCCESS);
1172 }
1173 
1174 /* ARGSUSED */
1175 static int
1176 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1177 {
1178 
1179         int retcode = DDI_FAILURE;
1180 
1181         switch (infocmd) {
1182 
1183         case DDI_INFO_DEVT2DEVINFO:
1184                 if (tl_dip != NULL) {
1185                         *result = (void *)tl_dip;
1186                         retcode = DDI_SUCCESS;
1187                 }
1188                 break;
1189 
1190         case DDI_INFO_DEVT2INSTANCE:
1191                 *result = NULL;
1192                 retcode = DDI_SUCCESS;
1193                 break;
1194 
1195         default:
1196                 break;
1197         }
1198         return (retcode);
1199 }
1200 
1201 /*
1202  * Endpoint reference management.
1203  */
1204 static void
1205 tl_refhold(tl_endpt_t *tep)
1206 {
1207         atomic_inc_32(&tep->te_refcnt);
1208 }
1209 
1210 static void
1211 tl_refrele(tl_endpt_t *tep)
1212 {
1213         ASSERT(tep->te_refcnt != 0);
1214 
1215         if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1216                 tl_free(tep);
1217 }
1218 
1219 /*ARGSUSED*/
1220 static int
1221 tl_constructor(void *buf, void *cdrarg, int kmflags)
1222 {
1223         tl_endpt_t *tep = buf;
1224 
1225         bzero(tep, sizeof (tl_endpt_t));
1226         mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1227         cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1228         mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1229         cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1230         mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1231 
1232         return (0);
1233 }
1234 
1235 /*ARGSUSED*/
1236 static void
1237 tl_destructor(void *buf, void *cdrarg)
1238 {
1239         tl_endpt_t *tep = buf;
1240 
1241         mutex_destroy(&tep->te_closelock);
1242         cv_destroy(&tep->te_closecv);
1243         mutex_destroy(&tep->te_srv_lock);
1244         cv_destroy(&tep->te_srv_cv);
1245         mutex_destroy(&tep->te_ser_lock);
1246 }
1247 
1248 static void
1249 tl_free(tl_endpt_t *tep)
1250 {
1251         ASSERT(tep->te_refcnt == 0);
1252         ASSERT(tep->te_transport != NULL);
1253         ASSERT(tep->te_rq == NULL);
1254         ASSERT(tep->te_wq == NULL);
1255         ASSERT(tep->te_ser != NULL);
1256         ASSERT(tep->te_ser_count == 0);
1257         ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1258 
1259         if (IS_SOCKET(tep)) {
1260                 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1261                 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1262                 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1263                 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1264         } else if (tep->te_abuf != NULL) {
1265                 kmem_free(tep->te_abuf, tep->te_alen);
1266                 tep->te_alen = -1; /* uninitialized */
1267                 tep->te_abuf = NULL;
1268         } else {
1269                 ASSERT(tep->te_alen == -1);
1270         }
1271 
1272         id_free(tl_minors, tep->te_minor);
1273         ASSERT(tep->te_credp == NULL);
1274 
1275         if (tep->te_hash_hndl != NULL)
1276                 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1277 
1278         if (IS_COTS(tep)) {
1279                 TL_REMOVE_PEER(tep->te_conp);
1280                 TL_REMOVE_PEER(tep->te_oconp);
1281                 tl_serializer_refrele(tep->te_ser);
1282                 tep->te_ser = NULL;
1283                 ASSERT(tep->te_nicon == 0);
1284                 ASSERT(list_head(&tep->te_iconp) == NULL);
1285         } else {
1286                 ASSERT(tep->te_lastep == NULL);
1287                 ASSERT(list_head(&tep->te_flowlist) == NULL);
1288                 ASSERT(tep->te_flowq == NULL);
1289         }
1290 
1291         ASSERT(tep->te_bufcid == 0);
1292         ASSERT(tep->te_timoutid == 0);
1293         bzero(&tep->te_ap, sizeof (tep->te_ap));
1294         tep->te_acceptor_id = 0;
1295 
1296         ASSERT(tep->te_closewait == 0);
1297         ASSERT(!tep->te_rsrv_active);
1298         ASSERT(!tep->te_wsrv_active);
1299         tep->te_closing = 0;
1300         tep->te_nowsrv = B_FALSE;
1301         tep->te_flag = 0;
1302 
1303         kmem_cache_free(tl_cache, tep);
1304 }
1305 
1306 /*
1307  * Allocate/free reference-counted wrappers for serializers.
1308  */
1309 static tl_serializer_t *
1310 tl_serializer_alloc(int flags)
1311 {
1312         tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1313         serializer_t *ser;
1314 
1315         if (s == NULL)
1316                 return (NULL);
1317 
1318         ser = serializer_create(flags);
1319 
1320         if (ser == NULL) {
1321                 kmem_free(s, sizeof (tl_serializer_t));
1322                 return (NULL);
1323         }
1324 
1325         s->ts_refcnt = 1;
1326         s->ts_serializer = ser;
1327         return (s);
1328 }
1329 
1330 static void
1331 tl_serializer_refhold(tl_serializer_t *s)
1332 {
1333         atomic_inc_32(&s->ts_refcnt);
1334 }
1335 
1336 static void
1337 tl_serializer_refrele(tl_serializer_t *s)
1338 {
1339         if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1340                 serializer_destroy(s->ts_serializer);
1341                 kmem_free(s, sizeof (tl_serializer_t));
1342         }
1343 }
1344 
1345 /*
1346  * Post a request on the endpoint serializer. For COTS transports keep track of
1347  * the number of pending requests.
1348  */
1349 static void
1350 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1351 {
1352         if (IS_COTS(tep)) {
1353                 mutex_enter(&tep->te_ser_lock);
1354                 tep->te_ser_count++;
1355                 mutex_exit(&tep->te_ser_lock);
1356         }
1357         serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1358 }
1359 
1360 /*
1361  * Complete processing the request on the serializer. Decrement the counter for
1362  * pending requests for COTS transports.
1363  */
1364 static void
1365 tl_serializer_exit(tl_endpt_t *tep)
1366 {
1367         if (IS_COTS(tep)) {
1368                 mutex_enter(&tep->te_ser_lock);
1369                 ASSERT(tep->te_ser_count != 0);
1370                 tep->te_ser_count--;
1371                 mutex_exit(&tep->te_ser_lock);
1372         }
1373 }
1374 
1375 /*
1376  * Hash management functions.
1377  */
1378 
1379 /*
1380  * Return TRUE if two addresses are equal, false otherwise.
1381  */
1382 static boolean_t
1383 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1384 {
1385         return ((ap1->ta_alen > 0) &&
1386             (ap1->ta_alen == ap2->ta_alen) &&
1387             (ap1->ta_zoneid == ap2->ta_zoneid) &&
1388             (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1389 }
1390 
1391 /*
1392  * This function is called whenever an endpoint is found in the hash table.
1393  */
1394 /* ARGSUSED0 */
1395 static void
1396 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1397 {
1398         tl_refhold((tl_endpt_t *)val);
1399 }
1400 
1401 /*
1402  * Address hash function.
1403  */
1404 /* ARGSUSED */
1405 static uint_t
1406 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1407 {
1408         tl_addr_t *ap = (tl_addr_t *)key;
1409         size_t  len = ap->ta_alen;
1410         uchar_t *p = ap->ta_abuf;
1411         uint_t i, g;
1412 
1413         ASSERT((len > 0) && (p != NULL));
1414 
1415         for (i = ap->ta_zoneid; len -- != 0; p++) {
1416                 i = (i << 4) + (*p);
1417                 if ((g = (i & 0xf0000000U)) != 0) {
1418                         i ^= (g >> 24);
1419                         i ^= g;
1420                 }
1421         }
1422         return (i);
1423 }
1424 
1425 /*
1426  * This function is used by hash lookups. It compares two generic addresses.
1427  */
1428 static int
1429 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1430 {
1431 #ifdef  DEBUG
1432         tl_addr_t *ap1 = (tl_addr_t *)key1;
1433         tl_addr_t *ap2 = (tl_addr_t *)key2;
1434 
1435         ASSERT(key1 != NULL);
1436         ASSERT(key2 != NULL);
1437 
1438         ASSERT(ap1->ta_abuf != NULL);
1439         ASSERT(ap2->ta_abuf != NULL);
1440         ASSERT(ap1->ta_alen > 0);
1441         ASSERT(ap2->ta_alen > 0);
1442 #endif
1443 
1444         return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1445 }
1446 
1447 /*
1448  * Prevent endpoint from closing if possible.
1449  * Return B_TRUE on success, B_FALSE on failure.
1450  */
1451 static boolean_t
1452 tl_noclose(tl_endpt_t *tep)
1453 {
1454         boolean_t rc = B_FALSE;
1455 
1456         mutex_enter(&tep->te_closelock);
1457         if (!tep->te_closing) {
1458                 ASSERT(tep->te_closewait == 0);
1459                 tep->te_closewait++;
1460                 rc = B_TRUE;
1461         }
1462         mutex_exit(&tep->te_closelock);
1463         return (rc);
1464 }
1465 
1466 /*
1467  * Allow endpoint to close if needed.
1468  */
1469 static void
1470 tl_closeok(tl_endpt_t *tep)
1471 {
1472         ASSERT(tep->te_closewait > 0);
1473         mutex_enter(&tep->te_closelock);
1474         ASSERT(tep->te_closewait == 1);
1475         tep->te_closewait--;
1476         cv_signal(&tep->te_closecv);
1477         mutex_exit(&tep->te_closelock);
1478 }
1479 
1480 /*
1481  * STREAMS open entry point.
1482  */
1483 /* ARGSUSED */
1484 static int
1485 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t  *credp)
1486 {
1487         tl_endpt_t *tep;
1488         minor_t     minor = getminor(*devp);
1489 
1490         /*
1491          * Driver is called directly. Both CLONEOPEN and MODOPEN
1492          * are illegal
1493          */
1494         if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1495                 return (ENXIO);
1496 
1497         if (rq->q_ptr != NULL)
1498                 return (0);
1499 
1500         /* Minor number should specify the mode used for the driver. */
1501         if ((minor >= TL_UNUSED))
1502                 return (ENXIO);
1503 
1504         if (oflag & SO_SOCKSTR) {
1505                 minor |= TL_SOCKET;
1506         }
1507 
1508         tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1509         tep->te_refcnt = 1;
1510         tep->te_cpid = curproc->p_pid;
1511         rq->q_ptr = WR(rq)->q_ptr = tep;
1512         tep->te_state = TS_UNBND;
1513         tep->te_credp = credp;
1514         crhold(credp);
1515         tep->te_zoneid = getzoneid();
1516 
1517         tep->te_flag = minor & TL_MINOR_MASK;
1518         tep->te_transport = &tl_transports[minor];
1519 
1520         /* Allocate a unique minor number for this instance. */
1521         tep->te_minor = (minor_t)id_alloc(tl_minors);
1522 
1523         /* Reserve hash handle for bind(). */
1524         (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1525 
1526         /* Transport-specific initialization */
1527         if (IS_COTS(tep)) {
1528                 /* Use private serializer */
1529                 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1530 
1531                 /* Create list for pending connections */
1532                 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1533                     offsetof(tl_icon_t, ti_node));
1534                 tep->te_qlen = 0;
1535                 tep->te_nicon = 0;
1536                 tep->te_oconp = NULL;
1537                 tep->te_conp = NULL;
1538         } else {
1539                 /* Use shared serializer */
1540                 tep->te_ser = tep->te_transport->tr_serializer;
1541                 bzero(&tep->te_flows, sizeof (list_node_t));
1542                 /* Create list for flow control */
1543                 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1544                     offsetof(tl_endpt_t, te_flows));
1545                 tep->te_flowq = NULL;
1546                 tep->te_lastep = NULL;
1547 
1548         }
1549 
1550         /* Initialize endpoint address */
1551         if (IS_SOCKET(tep)) {
1552                 /* Socket-specific address handling. */
1553                 tep->te_alen = TL_SOUX_ADDRLEN;
1554                 tep->te_abuf = &tep->te_uxaddr;
1555                 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1556                 tep->te_magic = SOU_MAGIC_IMPLICIT;
1557         } else {
1558                 tep->te_alen = -1;
1559                 tep->te_abuf = NULL;
1560         }
1561 
1562         /* clone the driver */
1563         *devp = makedevice(getmajor(*devp), tep->te_minor);
1564 
1565         tep->te_rq = rq;
1566         tep->te_wq = WR(rq);
1567 
1568 #ifdef  _ILP32
1569         if (IS_SOCKET(tep))
1570                 tep->te_acceptor_id = tep->te_minor;
1571         else
1572                 tep->te_acceptor_id = (t_uscalar_t)rq;
1573 #else
1574         tep->te_acceptor_id = tep->te_minor;
1575 #endif  /* _ILP32 */
1576 
1577 
1578         qprocson(rq);
1579 
1580         /*
1581          * Insert acceptor ID in the hash. The AI hash always sleeps on
1582          * insertion so insertion can't fail.
1583          */
1584         (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1585             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1586             (mod_hash_val_t)tep);
1587 
1588         return (0);
1589 }
1590 
1591 /* ARGSUSED1 */
1592 static int
1593 tl_close(queue_t *rq, int flag, cred_t *credp)
1594 {
1595         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1596         tl_endpt_t *elp = NULL;
1597         queue_t *wq = tep->te_wq;
1598         int rc;
1599 
1600         ASSERT(wq == WR(rq));
1601 
1602         /*
1603          * Remove the endpoint from acceptor hash.
1604          */
1605         rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1606             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1607             (mod_hash_val_t *)&elp);
1608         ASSERT(rc == 0 && tep == elp);
1609         if ((rc != 0) || (tep != elp)) {
1610                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1611                     SL_TRACE | SL_ERROR,
1612                     "tl_close:inconsistency in AI hash"));
1613         }
1614 
1615         /*
1616          * Wait till close is safe, then mark endpoint as closing.
1617          */
1618         mutex_enter(&tep->te_closelock);
1619         while (tep->te_closewait)
1620                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1621         tep->te_closing = B_TRUE;
1622         /*
1623          * Will wait for the serializer part of the close to finish, so set
1624          * te_closewait now.
1625          */
1626         tep->te_closewait = 1;
1627         tep->te_nowsrv = B_FALSE;
1628         mutex_exit(&tep->te_closelock);
1629 
1630         /*
1631          * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1632          * It is safe because close will wait for tl_close_ser to finish.
1633          */
1634         tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1635 
1636         /*
1637          * Wait for the first phase of close to complete before qprocsoff().
1638          */
1639         mutex_enter(&tep->te_closelock);
1640         while (tep->te_closewait)
1641                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1642         mutex_exit(&tep->te_closelock);
1643 
1644         qprocsoff(rq);
1645 
1646         if (tep->te_bufcid) {
1647                 qunbufcall(rq, tep->te_bufcid);
1648                 tep->te_bufcid = 0;
1649         }
1650         if (tep->te_timoutid) {
1651                 (void) quntimeout(rq, tep->te_timoutid);
1652                 tep->te_timoutid = 0;
1653         }
1654 
1655         /*
1656          * Finish close behind serializer.
1657          *
1658          * For a CLTS endpoint increase a refcount and continue close processing
1659          * with serializer protection. This processing may happen asynchronously
1660          * with the completion of tl_close().
1661          *
1662          * Fot a COTS endpoint wait before destroying tep since the serializer
1663          * may go away together with tep and we need to destroy serializer
1664          * outside of serializer context.
1665          */
1666         ASSERT(tep->te_closewait == 0);
1667         if (IS_COTS(tep))
1668                 tep->te_closewait = 1;
1669         else
1670                 tl_refhold(tep);
1671 
1672         tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1673 
1674         /*
1675          * For connection-oriented transports wait for all serializer activity
1676          * to settle down.
1677          */
1678         if (IS_COTS(tep)) {
1679                 mutex_enter(&tep->te_closelock);
1680                 while (tep->te_closewait)
1681                         cv_wait(&tep->te_closecv, &tep->te_closelock);
1682                 mutex_exit(&tep->te_closelock);
1683         }
1684 
1685         crfree(tep->te_credp);
1686         tep->te_credp = NULL;
1687         tep->te_wq = NULL;
1688         tl_refrele(tep);
1689         /*
1690          * tep is likely to be destroyed now, so can't reference it any more.
1691          */
1692 
1693         rq->q_ptr = wq->q_ptr = NULL;
1694         return (0);
1695 }
1696 
1697 /*
1698  * First phase of close processing done behind the serializer.
1699  *
1700  * Do not drop the reference in the end - tl_close() wants this reference to
1701  * stay.
1702  */
1703 /* ARGSUSED0 */
1704 static void
1705 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1706 {
1707         ASSERT(tep->te_closing);
1708         ASSERT(tep->te_closewait == 1);
1709         ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1710 
1711         tep->te_flag |= TL_CLOSE_SER;
1712 
1713         /*
1714          * Drain out all messages on queue except for TL_TICOTS where the
1715          * abortive release semantics permit discarding of data on close
1716          */
1717         if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1718                 tl_wsrv_ser(NULL, tep);
1719         }
1720 
1721         /* Remove address from hash table. */
1722         tl_addr_unbind(tep);
1723         /*
1724          * qprocsoff() gets confused when q->q_next is not NULL on the write
1725          * queue of the driver, so clear these before qprocsoff() is called.
1726          * Also clear q_next for the peer since this queue is going away.
1727          */
1728         if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1729                 tl_endpt_t *peer_tep = tep->te_conp;
1730 
1731                 tep->te_wq->q_next = NULL;
1732                 if ((peer_tep != NULL) && !peer_tep->te_closing)
1733                         peer_tep->te_wq->q_next = NULL;
1734         }
1735 
1736         tep->te_rq = NULL;
1737 
1738         /* wake up tl_close() */
1739         tl_closeok(tep);
1740         tl_serializer_exit(tep);
1741 }
1742 
1743 /*
1744  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1745  * the reference for CLTS.
1746  *
1747  * Called from serializer. Should drop reference count for CLTS only.
1748  */
1749 /* ARGSUSED0 */
1750 static void
1751 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1752 {
1753         ASSERT(tep->te_closing);
1754         IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1755         IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1756 
1757         tep->te_state = -1;  /* Uninitialized */
1758         if (IS_COTS(tep)) {
1759                 tl_co_unconnect(tep);
1760         } else {
1761                 /* Connectionless specific cleanup */
1762                 TL_REMOVE_PEER(tep->te_lastep);
1763                 /*
1764                  * Backenable anybody that is flow controlled waiting for
1765                  * this endpoint.
1766                  */
1767                 tl_cl_backenable(tep);
1768                 if (tep->te_flowq != NULL) {
1769                         list_remove(&(tep->te_flowq->te_flowlist), tep);
1770                         tep->te_flowq = NULL;
1771                 }
1772         }
1773 
1774         tl_serializer_exit(tep);
1775         if (IS_COTS(tep))
1776                 tl_closeok(tep);
1777         else
1778                 tl_refrele(tep);
1779 }
1780 
1781 /*
1782  * STREAMS write-side put procedure.
1783  * Enter serializer for most of the processing.
1784  *
1785  * The T_CONN_REQ is processed outside of serializer.
1786  */
1787 static int
1788 tl_wput(queue_t *wq, mblk_t *mp)
1789 {
1790         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
1791         ssize_t                 msz = MBLKL(mp);
1792         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
1793         tlproc_t                *tl_proc = NULL;
1794 
1795         switch (DB_TYPE(mp)) {
1796         case M_DATA:
1797                 /* Only valid for connection-oriented transports */
1798                 if (IS_CLTS(tep)) {
1799                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1800                             SL_TRACE | SL_ERROR,
1801                             "tl_wput:M_DATA invalid for ticlts driver"));
1802                         tl_merror(wq, mp, EPROTO);
1803                         return (0);
1804                 }
1805                 tl_proc = tl_wput_data_ser;
1806                 break;
1807 
1808         case M_IOCTL:
1809                 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1810                 case TL_IOC_CREDOPT:
1811                         /* FALLTHROUGH */
1812                 case TL_IOC_UCREDOPT:
1813                         /*
1814                          * Serialize endpoint state change.
1815                          */
1816                         tl_proc = tl_do_ioctl_ser;
1817                         break;
1818 
1819                 default:
1820                         miocnak(wq, mp, 0, EINVAL);
1821                         return (0);
1822                 }
1823                 break;
1824 
1825         case M_FLUSH:
1826                 /*
1827                  * do canonical M_FLUSH processing
1828                  */
1829                 if (*mp->b_rptr & FLUSHW) {
1830                         flushq(wq, FLUSHALL);
1831                         *mp->b_rptr &= ~FLUSHW;
1832                 }
1833                 if (*mp->b_rptr & FLUSHR) {
1834                         flushq(RD(wq), FLUSHALL);
1835                         qreply(wq, mp);
1836                 } else {
1837                         freemsg(mp);
1838                 }
1839                 return (0);
1840 
1841         case M_PROTO:
1842                 if (msz < sizeof (prim->type)) {
1843                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1844                             SL_TRACE | SL_ERROR,
1845                             "tl_wput:M_PROTO data too short"));
1846                         tl_merror(wq, mp, EPROTO);
1847                         return (0);
1848                 }
1849                 switch (prim->type) {
1850                 case T_OPTMGMT_REQ:
1851                 case T_SVR4_OPTMGMT_REQ:
1852                         /*
1853                          * Process TPI option management requests immediately
1854                          * in put procedure regardless of in-order processing
1855                          * of already queued messages.
1856                          * (Note: This driver supports AF_UNIX socket
1857                          * implementation.  Unless we implement this processing,
1858                          * setsockopt() on socket endpoint will block on flow
1859                          * controlled endpoints which it should not. That is
1860                          * required for successful execution of VSU socket tests
1861                          * and is consistent with BSD socket behavior).
1862                          */
1863                         tl_optmgmt(wq, mp);
1864                         return (0);
1865                 case O_T_BIND_REQ:
1866                 case T_BIND_REQ:
1867                         tl_proc = tl_bind_ser;
1868                         break;
1869                 case T_CONN_REQ:
1870                         if (IS_CLTS(tep)) {
1871                                 tl_merror(wq, mp, EPROTO);
1872                                 return (0);
1873                         }
1874                         tl_conn_req(wq, mp);
1875                         return (0);
1876                 case T_DATA_REQ:
1877                 case T_OPTDATA_REQ:
1878                 case T_EXDATA_REQ:
1879                 case T_ORDREL_REQ:
1880                         tl_proc = tl_putq_ser;
1881                         break;
1882                 case T_UNITDATA_REQ:
1883                         if (IS_COTS(tep) ||
1884                             (msz < sizeof (struct T_unitdata_req))) {
1885                                 tl_merror(wq, mp, EPROTO);
1886                                 return (0);
1887                         }
1888                         if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1889                                 tl_proc = tl_unitdata_ser;
1890                         } else {
1891                                 tl_proc = tl_putq_ser;
1892                         }
1893                         break;
1894                 default:
1895                         /*
1896                          * process in service procedure if message already
1897                          * queued (maintain in-order processing)
1898                          */
1899                         if (wq->q_first != NULL) {
1900                                 tl_proc = tl_putq_ser;
1901                         } else {
1902                                 tl_proc = tl_wput_ser;
1903                         }
1904                         break;
1905                 }
1906                 break;
1907 
1908         case M_PCPROTO:
1909                 /*
1910                  * Check that the message has enough data to figure out TPI
1911                  * primitive.
1912                  */
1913                 if (msz < sizeof (prim->type)) {
1914                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1915                             SL_TRACE | SL_ERROR,
1916                             "tl_wput:M_PCROTO data too short"));
1917                         tl_merror(wq, mp, EPROTO);
1918                         return (0);
1919                 }
1920                 switch (prim->type) {
1921                 case T_CAPABILITY_REQ:
1922                         tl_capability_req(mp, tep);
1923                         return (0);
1924                 case T_INFO_REQ:
1925                         tl_proc = tl_info_req_ser;
1926                         break;
1927                 case T_ADDR_REQ:
1928                         tl_proc = tl_addr_req_ser;
1929                         break;
1930 
1931                 default:
1932                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1933                             SL_TRACE | SL_ERROR,
1934                             "tl_wput:unknown TPI msg primitive"));
1935                         tl_merror(wq, mp, EPROTO);
1936                         return (0);
1937                 }
1938                 break;
1939         default:
1940                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1941                     "tl_wput:default:unexpected Streams message"));
1942                 freemsg(mp);
1943                 return (0);
1944         }
1945 
1946         /*
1947          * Continue processing via serializer.
1948          */
1949         ASSERT(tl_proc != NULL);
1950         tl_refhold(tep);
1951         tl_serializer_enter(tep, tl_proc, mp);
1952         return (0);
1953 }
1954 
1955 /*
1956  * Place message on the queue while preserving order.
1957  */
1958 static void
1959 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1960 {
1961         if (tep->te_closing) {
1962                 tl_wput_ser(mp, tep);
1963         } else {
1964                 TL_PUTQ(tep, mp);
1965                 tl_serializer_exit(tep);
1966                 tl_refrele(tep);
1967         }
1968 
1969 }
1970 
1971 static void
1972 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1973 {
1974         ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1975 
1976         switch (DB_TYPE(mp)) {
1977         case M_DATA:
1978                 tl_data(mp, tep);
1979                 break;
1980         case M_PROTO:
1981                 tl_do_proto(mp, tep);
1982                 break;
1983         default:
1984                 freemsg(mp);
1985                 break;
1986         }
1987 }
1988 
1989 /*
1990  * Write side put procedure called from serializer.
1991  */
1992 static void
1993 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1994 {
1995         tl_wput_common_ser(mp, tep);
1996         tl_serializer_exit(tep);
1997         tl_refrele(tep);
1998 }
1999 
2000 /*
2001  * M_DATA processing. Called from serializer.
2002  */
2003 static void
2004 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
2005 {
2006         tl_endpt_t      *peer_tep = tep->te_conp;
2007         queue_t         *peer_rq;
2008 
2009         ASSERT(DB_TYPE(mp) == M_DATA);
2010         ASSERT(IS_COTS(tep));
2011 
2012         IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
2013 
2014         /*
2015          * fastpath for data. Ignore flow control if tep is closing.
2016          */
2017         if ((peer_tep != NULL) &&
2018             !peer_tep->te_closing &&
2019             ((tep->te_state == TS_DATA_XFER) ||
2020             (tep->te_state == TS_WREQ_ORDREL)) &&
2021             (tep->te_wq != NULL) &&
2022             (tep->te_wq->q_first == NULL) &&
2023             (peer_tep->te_state == TS_DATA_XFER ||
2024             peer_tep->te_state == TS_WIND_ORDREL ||
2025             peer_tep->te_state == TS_WREQ_ORDREL) &&
2026             ((peer_rq = peer_tep->te_rq) != NULL) &&
2027             (canputnext(peer_rq) || tep->te_closing)) {
2028                 putnext(peer_rq, mp);
2029         } else if (tep->te_closing) {
2030                 /*
2031                  * It is possible that by the time we got here tep started to
2032                  * close. If the write queue is not empty, and the state is
2033                  * TS_DATA_XFER the data should be delivered in order, so we
2034                  * call putq() instead of freeing the data.
2035                  */
2036                 if ((tep->te_wq != NULL) &&
2037                     ((tep->te_state == TS_DATA_XFER) ||
2038                     (tep->te_state == TS_WREQ_ORDREL))) {
2039                         TL_PUTQ(tep, mp);
2040                 } else {
2041                         freemsg(mp);
2042                 }
2043         } else {
2044                 TL_PUTQ(tep, mp);
2045         }
2046 
2047         tl_serializer_exit(tep);
2048         tl_refrele(tep);
2049 }
2050 
2051 /*
2052  * Write side service routine.
2053  *
2054  * All actual processing happens within serializer which is entered
2055  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
2056  * messages that need processing may have arrived, so tl_wsrv repeats until
2057  * queue is empty or te_nowsrv is set.
2058  */
2059 static int
2060 tl_wsrv(queue_t *wq)
2061 {
2062         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2063 
2064         while ((wq->q_first != NULL) && !tep->te_nowsrv) {
2065                 mutex_enter(&tep->te_srv_lock);
2066                 ASSERT(tep->te_wsrv_active == B_FALSE);
2067                 tep->te_wsrv_active = B_TRUE;
2068                 mutex_exit(&tep->te_srv_lock);
2069 
2070                 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2071 
2072                 /*
2073                  * Wait for serializer job to complete.
2074                  */
2075                 mutex_enter(&tep->te_srv_lock);
2076                 while (tep->te_wsrv_active) {
2077                         cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2078                 }
2079                 cv_signal(&tep->te_srv_cv);
2080                 mutex_exit(&tep->te_srv_lock);
2081         }
2082         return (0);
2083 }
2084 
2085 /*
2086  * Serialized write side processing of the STREAMS queue.
2087  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2088  * is NULL.
2089  */
2090 static void
2091 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2092 {
2093         mblk_t *mp;
2094         queue_t *wq = tep->te_wq;
2095 
2096         ASSERT(wq != NULL);
2097         while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2098                 tl_wput_common_ser(mp, tep);
2099         }
2100 
2101         /*
2102          * Wakeup service routine unless called from close.
2103          * If ser_mp is specified, the caller is tl_wsrv().
2104          * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2105          * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2106          * be no matching tl_serializer_exit() in this case.
2107          * Also, there is no need to wakeup anyone since tl_close_ser() is not
2108          * waiting on te_srv_cv.
2109          */
2110         if (ser_mp != NULL) {
2111                 /*
2112                  * We are called from tl_wsrv.
2113                  */
2114                 mutex_enter(&tep->te_srv_lock);
2115                 ASSERT(tep->te_wsrv_active);
2116                 tep->te_wsrv_active = B_FALSE;
2117                 cv_signal(&tep->te_srv_cv);
2118                 mutex_exit(&tep->te_srv_lock);
2119                 tl_serializer_exit(tep);
2120         }
2121 }
2122 
2123 /*
2124  * Called when the stream is backenabled. Enter serializer and qenable everyone
2125  * flow controlled by tep.
2126  *
2127  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2128  * is possible that two instances of tl_rsrv will be running reusing the same
2129  * rsrv mblk.
2130  */
2131 static int
2132 tl_rsrv(queue_t *rq)
2133 {
2134         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2135 
2136         ASSERT(rq->q_first == NULL);
2137         ASSERT(tep->te_rsrv_active == 0);
2138 
2139         tep->te_rsrv_active = B_TRUE;
2140         tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2141         /*
2142          * Wait for serializer job to complete.
2143          */
2144         mutex_enter(&tep->te_srv_lock);
2145         while (tep->te_rsrv_active) {
2146                 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2147         }
2148         cv_signal(&tep->te_srv_cv);
2149         mutex_exit(&tep->te_srv_lock);
2150         return (0);
2151 }
2152 
2153 /* ARGSUSED */
2154 static void
2155 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2156 {
2157         tl_endpt_t *peer_tep;
2158 
2159         if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2160                 tl_cl_backenable(tep);
2161         } else if (
2162             IS_COTS(tep) &&
2163             ((peer_tep = tep->te_conp) != NULL) &&
2164             !peer_tep->te_closing &&
2165             ((tep->te_state == TS_DATA_XFER) ||
2166             (tep->te_state == TS_WIND_ORDREL)||
2167             (tep->te_state == TS_WREQ_ORDREL))) {
2168                 TL_QENABLE(peer_tep);
2169         }
2170 
2171         /*
2172          * Wakeup read side service routine.
2173          */
2174         mutex_enter(&tep->te_srv_lock);
2175         ASSERT(tep->te_rsrv_active);
2176         tep->te_rsrv_active = B_FALSE;
2177         cv_signal(&tep->te_srv_cv);
2178         mutex_exit(&tep->te_srv_lock);
2179         tl_serializer_exit(tep);
2180 }
2181 
2182 /*
2183  * process M_PROTO messages. Always called from serializer.
2184  */
2185 static void
2186 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2187 {
2188         ssize_t                 msz = MBLKL(mp);
2189         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
2190 
2191         /* Message size was validated by tl_wput(). */
2192         ASSERT(msz >= sizeof (prim->type));
2193 
2194         switch (prim->type) {
2195         case T_UNBIND_REQ:
2196                 tl_unbind(mp, tep);
2197                 break;
2198 
2199         case T_ADDR_REQ:
2200                 tl_addr_req(mp, tep);
2201                 break;
2202 
2203         case O_T_CONN_RES:
2204         case T_CONN_RES:
2205                 if (IS_CLTS(tep)) {
2206                         tl_merror(tep->te_wq, mp, EPROTO);
2207                         break;
2208                 }
2209                 tl_conn_res(mp, tep);
2210                 break;
2211 
2212         case T_DISCON_REQ:
2213                 if (IS_CLTS(tep)) {
2214                         tl_merror(tep->te_wq, mp, EPROTO);
2215                         break;
2216                 }
2217                 tl_discon_req(mp, tep);
2218                 break;
2219 
2220         case T_DATA_REQ:
2221                 if (IS_CLTS(tep)) {
2222                         tl_merror(tep->te_wq, mp, EPROTO);
2223                         break;
2224                 }
2225                 tl_data(mp, tep);
2226                 break;
2227 
2228         case T_OPTDATA_REQ:
2229                 if (IS_CLTS(tep)) {
2230                         tl_merror(tep->te_wq, mp, EPROTO);
2231                         break;
2232                 }
2233                 tl_data(mp, tep);
2234                 break;
2235 
2236         case T_EXDATA_REQ:
2237                 if (IS_CLTS(tep)) {
2238                         tl_merror(tep->te_wq, mp, EPROTO);
2239                         break;
2240                 }
2241                 tl_exdata(mp, tep);
2242                 break;
2243 
2244         case T_ORDREL_REQ:
2245                 if (!IS_COTSORD(tep)) {
2246                         tl_merror(tep->te_wq, mp, EPROTO);
2247                         break;
2248                 }
2249                 tl_ordrel(mp, tep);
2250                 break;
2251 
2252         case T_UNITDATA_REQ:
2253                 if (IS_COTS(tep)) {
2254                         tl_merror(tep->te_wq, mp, EPROTO);
2255                         break;
2256                 }
2257                 tl_unitdata(mp, tep);
2258                 break;
2259 
2260         default:
2261                 tl_merror(tep->te_wq, mp, EPROTO);
2262                 break;
2263         }
2264 }
2265 
2266 /*
2267  * Process ioctl from serializer.
2268  * This is a wrapper around tl_do_ioctl().
2269  */
2270 static void
2271 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2272 {
2273         if (!tep->te_closing)
2274                 tl_do_ioctl(mp, tep);
2275         else
2276                 freemsg(mp);
2277 
2278         tl_serializer_exit(tep);
2279         tl_refrele(tep);
2280 }
2281 
2282 static void
2283 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2284 {
2285         struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2286         int cmd = iocbp->ioc_cmd;
2287         queue_t *wq = tep->te_wq;
2288         int error;
2289         int thisopt, otheropt;
2290 
2291         ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2292 
2293         switch (cmd) {
2294         case TL_IOC_CREDOPT:
2295                 if (cmd == TL_IOC_CREDOPT) {
2296                         thisopt = TL_SETCRED;
2297                         otheropt = TL_SETUCRED;
2298                 } else {
2299                         /* FALLTHROUGH */
2300         case TL_IOC_UCREDOPT:
2301                         thisopt = TL_SETUCRED;
2302                         otheropt = TL_SETCRED;
2303                 }
2304                 /*
2305                  * The credentials passing does not apply to sockets.
2306                  * Only one of the cred options can be set at a given time.
2307                  */
2308                 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2309                         miocnak(wq, mp, 0, EINVAL);
2310                         return;
2311                 }
2312 
2313                 /*
2314                  * Turn on generation of credential options for
2315                  * T_conn_req, T_conn_con, T_unidata_ind.
2316                  */
2317                 error = miocpullup(mp, sizeof (uint32_t));
2318                 if (error != 0) {
2319                         miocnak(wq, mp, 0, error);
2320                         return;
2321                 }
2322                 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2323                         miocnak(wq, mp, 0, EINVAL);
2324                         return;
2325                 }
2326 
2327                 if (*(uint32_t *)mp->b_cont->b_rptr)
2328                         tep->te_flag |= thisopt;
2329                 else
2330                         tep->te_flag &= ~thisopt;
2331 
2332                 miocack(wq, mp, 0, 0);
2333                 break;
2334 
2335         default:
2336                 /* Should not be here */
2337                 miocnak(wq, mp, 0, EINVAL);
2338                 break;
2339         }
2340 }
2341 
2342 
2343 /*
2344  * send T_ERROR_ACK
2345  * Note: assumes enough memory or caller passed big enough mp
2346  *      - no recovery from allocb failures
2347  */
2348 
2349 static void
2350 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2351     t_scalar_t unix_err, t_scalar_t type)
2352 {
2353         struct T_error_ack *err_ack;
2354         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2355             M_PCPROTO, T_ERROR_ACK);
2356 
2357         if (ackmp == NULL) {
2358                 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2359                     "tl_error_ack:out of mblk memory"));
2360                 tl_merror(wq, NULL, ENOSR);
2361                 return;
2362         }
2363         err_ack = (struct T_error_ack *)ackmp->b_rptr;
2364         err_ack->ERROR_prim = type;
2365         err_ack->TLI_error = tli_err;
2366         err_ack->UNIX_error = unix_err;
2367 
2368         /*
2369          * send error ack message
2370          */
2371         qreply(wq, ackmp);
2372 }
2373 
2374 
2375 
2376 /*
2377  * send T_OK_ACK
2378  * Note: assumes enough memory or caller passed big enough mp
2379  *      - no recovery from allocb failures
2380  */
2381 static void
2382 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2383 {
2384         struct T_ok_ack *ok_ack;
2385         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2386             M_PCPROTO, T_OK_ACK);
2387 
2388         if (ackmp == NULL) {
2389                 tl_merror(wq, NULL, ENOMEM);
2390                 return;
2391         }
2392 
2393         ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2394         ok_ack->CORRECT_prim = type;
2395 
2396         (void) qreply(wq, ackmp);
2397 }
2398 
2399 /*
2400  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2401  * This is a wrapper around tl_bind().
2402  */
2403 static void
2404 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2405 {
2406         if (!tep->te_closing)
2407                 tl_bind(mp, tep);
2408         else
2409                 freemsg(mp);
2410 
2411         tl_serializer_exit(tep);
2412         tl_refrele(tep);
2413 }
2414 
2415 /*
2416  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2417  * Assumes that the endpoint is in the unbound.
2418  */
2419 static void
2420 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2421 {
2422         queue_t                 *wq = tep->te_wq;
2423         struct T_bind_ack       *b_ack;
2424         struct T_bind_req       *bind = (struct T_bind_req *)mp->b_rptr;
2425         mblk_t                  *ackmp, *bamp;
2426         soux_addr_t             ux_addr;
2427         t_uscalar_t             qlen = 0;
2428         t_scalar_t              alen, aoff;
2429         tl_addr_t               addr_req;
2430         void                    *addr_startp;
2431         ssize_t                 msz = MBLKL(mp), basize;
2432         t_scalar_t              tli_err = 0, unix_err = 0;
2433         t_scalar_t              save_prim_type = bind->PRIM_type;
2434         t_scalar_t              save_state = tep->te_state;
2435 
2436         if (tep->te_state != TS_UNBND) {
2437                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2438                     SL_TRACE | SL_ERROR,
2439                     "tl_wput:bind_request:out of state, state=%d",
2440                     tep->te_state));
2441                 tli_err = TOUTSTATE;
2442                 goto error;
2443         }
2444 
2445         if (msz < sizeof (struct T_bind_req)) {
2446                 tli_err = TSYSERR;
2447                 unix_err = EINVAL;
2448                 goto error;
2449         }
2450 
2451         tep->te_state = nextstate[TE_BIND_REQ][tep->te_state];
2452 
2453         ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2454             (bind->PRIM_type == T_BIND_REQ));
2455 
2456         alen = bind->ADDR_length;
2457         aoff = bind->ADDR_offset;
2458 
2459         /* negotiate max conn req pending */
2460         if (IS_COTS(tep)) {
2461                 qlen = bind->CONIND_number;
2462                 if (qlen > tl_maxqlen)
2463                         qlen = tl_maxqlen;
2464         }
2465 
2466         /*
2467          * Reserve hash handle. It can only be NULL if the endpoint is unbound
2468          * and bound again.
2469          */
2470         if ((tep->te_hash_hndl == NULL) &&
2471             ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2472             mod_hash_reserve_nosleep(tep->te_addrhash,
2473             &tep->te_hash_hndl) != 0) {
2474                 tli_err = TSYSERR;
2475                 unix_err = ENOSR;
2476                 goto error;
2477         }
2478 
2479         /*
2480          * Verify address correctness.
2481          */
2482         if (IS_SOCKET(tep)) {
2483                 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2484 
2485                 if ((alen != TL_SOUX_ADDRLEN) ||
2486                     (aoff < 0) ||
2487                     (aoff + alen > msz)) {
2488                         (void) (STRLOG(TL_ID, tep->te_minor,
2489                             1, SL_TRACE | SL_ERROR,
2490                             "tl_bind: invalid socket addr"));
2491                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2492                         tli_err = TSYSERR;
2493                         unix_err = EINVAL;
2494                         goto error;
2495                 }
2496                 /* Copy address from message to local buffer. */
2497                 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2498                 /*
2499                  * Check that we got correct address from sockets
2500                  */
2501                 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2502                     (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2503                         (void) (STRLOG(TL_ID, tep->te_minor,
2504                             1, SL_TRACE | SL_ERROR,
2505                             "tl_bind: invalid socket magic"));
2506                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2507                         tli_err = TSYSERR;
2508                         unix_err = EINVAL;
2509                         goto error;
2510                 }
2511                 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2512                     (ux_addr.soua_vp != NULL)) {
2513                         (void) (STRLOG(TL_ID, tep->te_minor,
2514                             1, SL_TRACE | SL_ERROR,
2515                             "tl_bind: implicit addr non-empty"));
2516                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2517                         tli_err = TSYSERR;
2518                         unix_err = EINVAL;
2519                         goto error;
2520                 }
2521                 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2522                     (ux_addr.soua_vp == NULL)) {
2523                         (void) (STRLOG(TL_ID, tep->te_minor,
2524                             1, SL_TRACE | SL_ERROR,
2525                             "tl_bind: explicit addr empty"));
2526                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2527                         tli_err = TSYSERR;
2528                         unix_err = EINVAL;
2529                         goto error;
2530                 }
2531         } else {
2532                 if ((alen > 0) && ((aoff < 0) ||
2533                     ((ssize_t)(aoff + alen) > msz) ||
2534                     ((aoff + alen) < 0))) {
2535                         (void) (STRLOG(TL_ID, tep->te_minor,
2536                             1, SL_TRACE | SL_ERROR,
2537                             "tl_bind: invalid message"));
2538                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2539                         tli_err = TSYSERR;
2540                         unix_err = EINVAL;
2541                         goto error;
2542                 }
2543                 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2544                         (void) (STRLOG(TL_ID, tep->te_minor,
2545                             1, SL_TRACE | SL_ERROR,
2546                             "tl_bind: bad addr in  message"));
2547                         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2548                         tli_err = TBADADDR;
2549                         goto error;
2550                 }
2551 #ifdef DEBUG
2552                 /*
2553                  * Mild form of ASSERT()ion to detect broken TPI apps.
2554                  * if (!assertion)
2555                  *      log warning;
2556                  */
2557                 if (!((alen == 0 && aoff == 0) ||
2558                         (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2559                         (void) (STRLOG(TL_ID, tep->te_minor,
2560                                     3, SL_TRACE | SL_ERROR,
2561                                     "tl_bind: addr overlaps TPI message"));
2562                 }
2563 #endif
2564         }
2565 
2566         /*
2567          * Bind the address provided or allocate one if requested.
2568          * Allow rebinds with a new qlen value.
2569          */
2570         if (IS_SOCKET(tep)) {
2571                 /*
2572                  * For anonymous requests the te_ap is already set up properly
2573                  * so use minor number as an address.
2574                  * For explicit requests need to check whether the address is
2575                  * already in use.
2576                  */
2577                 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2578                         int rc;
2579 
2580                         if (tep->te_flag & TL_ADDRHASHED) {
2581                                 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2582                                 if (tep->te_vp == ux_addr.soua_vp)
2583                                         goto skip_addr_bind;
2584                                 else /* Rebind to a new address. */
2585                                         tl_addr_unbind(tep);
2586                         }
2587                         /*
2588                          * Insert address in the hash if it is not already
2589                          * there.  Since we use preallocated handle, the insert
2590                          * can fail only if the key is already present.
2591                          */
2592                         rc = mod_hash_insert_reserve(tep->te_addrhash,
2593                             (mod_hash_key_t)ux_addr.soua_vp,
2594                             (mod_hash_val_t)tep, tep->te_hash_hndl);
2595 
2596                         if (rc != 0) {
2597                                 ASSERT(rc == MH_ERR_DUPLICATE);
2598                                 /*
2599                                  * Violate O_T_BIND_REQ semantics and fail with
2600                                  * TADDRBUSY - sockets will not use any address
2601                                  * other than supplied one for explicit binds.
2602                                  */
2603                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2604                                     SL_TRACE | SL_ERROR,
2605                                     "tl_bind:requested addr %p is busy",
2606                                     ux_addr.soua_vp));
2607                                 tli_err = TADDRBUSY;
2608                                 unix_err = 0;
2609                                 goto error;
2610                         }
2611                         tep->te_uxaddr = ux_addr;
2612                         tep->te_flag |= TL_ADDRHASHED;
2613                         tep->te_hash_hndl = NULL;
2614                 }
2615         } else if (alen == 0) {
2616                 /*
2617                  * assign any free address
2618                  */
2619                 if (!tl_get_any_addr(tep, NULL)) {
2620                         (void) (STRLOG(TL_ID, tep->te_minor,
2621                             1, SL_TRACE | SL_ERROR,
2622                             "tl_bind:failed to get buffer for any "
2623                             "address"));
2624                         tli_err = TSYSERR;
2625                         unix_err = ENOSR;
2626                         goto error;
2627                 }
2628         } else {
2629                 addr_req.ta_alen = alen;
2630                 addr_req.ta_abuf = (mp->b_rptr + aoff);
2631                 addr_req.ta_zoneid = tep->te_zoneid;
2632 
2633                 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2634                 if (tep->te_abuf == NULL) {
2635                         tli_err = TSYSERR;
2636                         unix_err = ENOSR;
2637                         goto error;
2638                 }
2639                 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2640                 tep->te_alen = alen;
2641 
2642                 if (mod_hash_insert_reserve(tep->te_addrhash,
2643                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2644                     tep->te_hash_hndl) != 0) {
2645                         if (save_prim_type == T_BIND_REQ) {
2646                                 /*
2647                                  * The bind semantics for this primitive
2648                                  * require a failure if the exact address
2649                                  * requested is busy
2650                                  */
2651                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2652                                     SL_TRACE | SL_ERROR,
2653                                     "tl_bind:requested addr is busy"));
2654                                 tli_err = TADDRBUSY;
2655                                 unix_err = 0;
2656                                 goto error;
2657                         }
2658 
2659                         /*
2660                          * O_T_BIND_REQ semantics say if address if requested
2661                          * address is busy, bind to any available free address
2662                          */
2663                         if (!tl_get_any_addr(tep, &addr_req)) {
2664                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2665                                     SL_TRACE | SL_ERROR,
2666                                     "tl_bind:unable to get any addr buf"));
2667                                 tli_err = TSYSERR;
2668                                 unix_err = ENOMEM;
2669                                 goto error;
2670                         }
2671                 } else {
2672                         tep->te_flag |= TL_ADDRHASHED;
2673                         tep->te_hash_hndl = NULL;
2674                 }
2675         }
2676 
2677         ASSERT(tep->te_alen >= 0);
2678 
2679 skip_addr_bind:
2680         /*
2681          * prepare T_BIND_ACK TPI message
2682          */
2683         basize = sizeof (struct T_bind_ack) + tep->te_alen;
2684         bamp = reallocb(mp, basize, 0);
2685         if (bamp == NULL) {
2686                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2687                     "tl_wput:tl_bind: allocb failed"));
2688                 /*
2689                  * roll back state changes
2690                  */
2691                 tl_addr_unbind(tep);
2692                 tep->te_state = TS_UNBND;
2693                 tl_memrecover(wq, mp, basize);
2694                 return;
2695         }
2696 
2697         DB_TYPE(bamp) = M_PCPROTO;
2698         bamp->b_wptr = bamp->b_rptr + basize;
2699         b_ack = (struct T_bind_ack *)bamp->b_rptr;
2700         b_ack->PRIM_type = T_BIND_ACK;
2701         b_ack->CONIND_number = qlen;
2702         b_ack->ADDR_length = tep->te_alen;
2703         b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2704         addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2705         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2706 
2707         if (IS_COTS(tep)) {
2708                 tep->te_qlen = qlen;
2709                 if (qlen > 0)
2710                         tep->te_flag |= TL_LISTENER;
2711         }
2712 
2713         tep->te_state = nextstate[TE_BIND_ACK][tep->te_state];
2714         /*
2715          * send T_BIND_ACK message
2716          */
2717         (void) qreply(wq, bamp);
2718         return;
2719 
2720 error:
2721         ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2722         if (ackmp == NULL) {
2723                 /*
2724                  * roll back state changes
2725                  */
2726                 tep->te_state = save_state;
2727                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2728                 return;
2729         }
2730         tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
2731         tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2732 }
2733 
2734 /*
2735  * Process T_UNBIND_REQ.
2736  * Called from serializer.
2737  */
2738 static void
2739 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2740 {
2741         queue_t *wq;
2742         mblk_t *ackmp;
2743 
2744         if (tep->te_closing) {
2745                 freemsg(mp);
2746                 return;
2747         }
2748 
2749         wq = tep->te_wq;
2750 
2751         /*
2752          * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2753          * ==> allocate for T_ERROR_ACK (known max)
2754          */
2755         if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2756                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2757                 return;
2758         }
2759         /*
2760          * memory resources committed
2761          * Note: no message validation. T_UNBIND_REQ message is
2762          * same size as PRIM_type field so already verified earlier.
2763          */
2764 
2765         /*
2766          * validate state
2767          */
2768         if (tep->te_state != TS_IDLE) {
2769                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2770                     SL_TRACE | SL_ERROR,
2771                     "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2772                     tep->te_state));
2773                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2774                 return;
2775         }
2776         tep->te_state = nextstate[TE_UNBIND_REQ][tep->te_state];
2777 
2778         /*
2779          * TPI says on T_UNBIND_REQ:
2780          *    send up a M_FLUSH to flush both
2781          *    read and write queues
2782          */
2783         (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2784 
2785         if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2786             tep->te_magic != SOU_MAGIC_EXPLICIT) {
2787 
2788                 /*
2789                  * Sockets use bind with qlen==0 followed by bind() to
2790                  * the same address with qlen > 0 for listeners.
2791                  * We allow rebind with a new qlen value.
2792                  */
2793                 tl_addr_unbind(tep);
2794         }
2795 
2796         tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
2797         /*
2798          * send  T_OK_ACK
2799          */
2800         tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2801 }
2802 
2803 
2804 /*
2805  * Option management code from drv/ip is used here
2806  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2807  *      database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2808  *      However, that is what we want as that option is 'unorthodox'
2809  *      and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2810  *      and not in T_SVR4_OPTMGMT_REQ/ACK
2811  * Note2: use of optcom_req means this routine is an exception to
2812  *       recovery from allocb() failures.
2813  */
2814 
2815 static void
2816 tl_optmgmt(queue_t *wq, mblk_t *mp)
2817 {
2818         tl_endpt_t *tep;
2819         mblk_t *ackmp;
2820         union T_primitives *prim;
2821         cred_t *cr;
2822 
2823         tep = (tl_endpt_t *)wq->q_ptr;
2824         prim = (union T_primitives *)mp->b_rptr;
2825 
2826         /*
2827          * All Solaris components should pass a db_credp
2828          * for this TPI message, hence we ASSERT.
2829          * But in case there is some other M_PROTO that looks
2830          * like a TPI message sent by some other kernel
2831          * component, we check and return an error.
2832          */
2833         cr = msg_getcred(mp, NULL);
2834         ASSERT(cr != NULL);
2835         if (cr == NULL) {
2836                 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2837                 return;
2838         }
2839 
2840         /*  all states OK for AF_UNIX options ? */
2841         if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2842             prim->type == T_SVR4_OPTMGMT_REQ) {
2843                 /*
2844                  * Broken TLI semantics that options can only be managed
2845                  * in TS_IDLE state. Needed for Sparc ABI test suite that
2846                  * tests this TLI (mis)feature using this device driver.
2847                  */
2848                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2849                     SL_TRACE | SL_ERROR,
2850                     "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2851                     tep->te_state));
2852                 /*
2853                  * preallocate memory for T_ERROR_ACK
2854                  */
2855                 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2856                 if (ackmp == NULL) {
2857                         tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2858                         return;
2859                 }
2860 
2861                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2862                 freemsg(mp);
2863                 return;
2864         }
2865 
2866         /*
2867          * call common option management routine from drv/ip
2868          */
2869         if (prim->type == T_SVR4_OPTMGMT_REQ) {
2870                 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2871         } else {
2872                 ASSERT(prim->type == T_OPTMGMT_REQ);
2873                 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2874         }
2875 }
2876 
2877 /*
2878  * Handle T_conn_req - the driver part of accept().
2879  * If TL_SET[U]CRED generate the credentials options.
2880  * If this is a socket pass through options unmodified.
2881  * For sockets generate the T_CONN_CON here instead of
2882  * waiting for the T_CONN_RES.
2883  */
2884 static void
2885 tl_conn_req(queue_t *wq, mblk_t *mp)
2886 {
2887         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
2888         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_rptr;
2889         ssize_t                 msz = MBLKL(mp);
2890         t_scalar_t              alen, aoff, olen, ooff, err = 0;
2891         tl_endpt_t              *peer_tep = NULL;
2892         mblk_t                  *ackmp;
2893         mblk_t                  *dimp;
2894         struct T_discon_ind     *di;
2895         soux_addr_t             ux_addr;
2896         tl_addr_t               dst;
2897 
2898         ASSERT(IS_COTS(tep));
2899 
2900         if (tep->te_closing) {
2901                 freemsg(mp);
2902                 return;
2903         }
2904 
2905         /*
2906          * preallocate memory for:
2907          * 1. max of T_ERROR_ACK and T_OK_ACK
2908          *      ==> known max T_ERROR_ACK
2909          * 2. max of T_DISCON_IND and T_CONN_IND
2910          */
2911         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2912         if (ackmp == NULL) {
2913                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2914                 return;
2915         }
2916         /*
2917          * memory committed for T_OK_ACK/T_ERROR_ACK now
2918          * will be committed for T_DISCON_IND/T_CONN_IND later
2919          */
2920 
2921         if (tep->te_state != TS_IDLE) {
2922                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2923                     SL_TRACE | SL_ERROR,
2924                     "tl_wput:T_CONN_REQ:out of state, state=%d",
2925                     tep->te_state));
2926                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2927                 freemsg(mp);
2928                 return;
2929         }
2930 
2931         /*
2932          * validate the message
2933          * Note: dereference fields in struct inside message only
2934          * after validating the message length.
2935          */
2936         if (msz < sizeof (struct T_conn_req)) {
2937                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2938                     "tl_conn_req:invalid message length"));
2939                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2940                 freemsg(mp);
2941                 return;
2942         }
2943         alen = creq->DEST_length;
2944         aoff = creq->DEST_offset;
2945         olen = creq->OPT_length;
2946         ooff = creq->OPT_offset;
2947         if (olen == 0)
2948                 ooff = 0;
2949 
2950         if (IS_SOCKET(tep)) {
2951                 if ((alen != TL_SOUX_ADDRLEN) ||
2952                     (aoff < 0) ||
2953                     (aoff + alen > msz) ||
2954                     (alen > msz - sizeof (struct T_conn_req))) {
2955                         (void) (STRLOG(TL_ID, tep->te_minor,
2956                                     1, SL_TRACE | SL_ERROR,
2957                                     "tl_conn_req: invalid socket addr"));
2958                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2959                         freemsg(mp);
2960                         return;
2961                 }
2962                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2963                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2964                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2965                         (void) (STRLOG(TL_ID, tep->te_minor,
2966                             1, SL_TRACE | SL_ERROR,
2967                             "tl_conn_req: invalid socket magic"));
2968                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2969                         freemsg(mp);
2970                         return;
2971                 }
2972         } else {
2973                 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2974                     (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2975                     ooff + olen < 0)) ||
2976                     olen < 0 || ooff < 0) {
2977                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2978                             SL_TRACE | SL_ERROR,
2979                             "tl_conn_req:invalid message"));
2980                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2981                         freemsg(mp);
2982                         return;
2983                 }
2984 
2985                 if (alen <= 0 || aoff < 0 ||
2986                     (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2987                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2988                                     SL_TRACE | SL_ERROR,
2989                                     "tl_conn_req:bad addr in message, "
2990                                     "alen=%d, msz=%ld",
2991                                     alen, msz));
2992                         tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2993                         freemsg(mp);
2994                         return;
2995                 }
2996 #ifdef DEBUG
2997                 /*
2998                  * Mild form of ASSERT()ion to detect broken TPI apps.
2999                  * if (!assertion)
3000                  *      log warning;
3001                  */
3002                 if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
3003                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3004                             SL_TRACE | SL_ERROR,
3005                             "tl_conn_req: addr overlaps TPI message"));
3006                 }
3007 #endif
3008                 if (olen) {
3009                         /*
3010                          * no opts in connect req
3011                          * supported in this provider except for sockets.
3012                          */
3013                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
3014                             SL_TRACE | SL_ERROR,
3015                             "tl_conn_req:options not supported "
3016                             "in message"));
3017                         tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
3018                         freemsg(mp);
3019                         return;
3020                 }
3021         }
3022 
3023         /*
3024          * Prevent tep from closing on us.
3025          */
3026         if (!tl_noclose(tep)) {
3027                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3028                     "tl_conn_req:endpoint is closing"));
3029                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
3030                 freemsg(mp);
3031                 return;
3032         }
3033 
3034         tep->te_state = nextstate[TE_CONN_REQ][tep->te_state];
3035         /*
3036          * get endpoint to connect to
3037          * check that peer with DEST addr is bound to addr
3038          * and has CONIND_number > 0
3039          */
3040         dst.ta_alen = alen;
3041         dst.ta_abuf = mp->b_rptr + aoff;
3042         dst.ta_zoneid = tep->te_zoneid;
3043 
3044         /*
3045          * Verify if remote addr is in use
3046          */
3047         peer_tep = (IS_SOCKET(tep) ?
3048             tl_sock_find_peer(tep, &ux_addr) :
3049             tl_find_peer(tep, &dst));
3050 
3051         if (peer_tep == NULL) {
3052                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3053                     "tl_conn_req:no one at connect address"));
3054                 err = ECONNREFUSED;
3055         } else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
3056                 /*
3057                  * validate that number of incoming connection is
3058                  * not to capacity on destination endpoint
3059                  */
3060                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3061                     "tl_conn_req: qlen overflow connection refused"));
3062                 err = ECONNREFUSED;
3063         }
3064 
3065         /*
3066          * Send T_DISCON_IND in case of error
3067          */
3068         if (err != 0) {
3069                 if (peer_tep != NULL)
3070                         tl_refrele(peer_tep);
3071                 /* We are still expected to send T_OK_ACK */
3072                 tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3073                 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3074                 tl_closeok(tep);
3075                 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3076                     M_PROTO, T_DISCON_IND);
3077                 if (dimp == NULL) {
3078                         tl_merror(wq, NULL, ENOSR);
3079                         return;
3080                 }
3081                 di = (struct T_discon_ind *)dimp->b_rptr;
3082                 di->DISCON_reason = err;
3083                 di->SEQ_number = BADSEQNUM;
3084 
3085                 tep->te_state = TS_IDLE;
3086                 /*
3087                  * send T_DISCON_IND message
3088                  */
3089                 putnext(tep->te_rq, dimp);
3090                 return;
3091         }
3092 
3093         ASSERT(IS_COTS(peer_tep));
3094 
3095         /*
3096          * Found the listener. At this point processing will continue on
3097          * listener serializer. Close of the endpoint should be blocked while we
3098          * switch serializers.
3099          */
3100         tl_serializer_refhold(peer_tep->te_ser);
3101         tl_serializer_refrele(tep->te_ser);
3102         tep->te_ser = peer_tep->te_ser;
3103         ASSERT(tep->te_oconp == NULL);
3104         tep->te_oconp = peer_tep;
3105 
3106         /*
3107          * It is safe to close now. Close may continue on listener serializer.
3108          */
3109         tl_closeok(tep);
3110 
3111         /*
3112          * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3113          * data, so we link mp to ackmp.
3114          */
3115         ackmp->b_cont = mp;
3116         mp = ackmp;
3117 
3118         tl_refhold(tep);
3119         tl_serializer_enter(tep, tl_conn_req_ser, mp);
3120 }
3121 
3122 /*
3123  * Finish T_CONN_REQ processing on listener serializer.
3124  */
3125 static void
3126 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3127 {
3128         queue_t         *wq;
3129         tl_endpt_t      *peer_tep = tep->te_oconp;
3130         mblk_t          *confmp, *cimp, *indmp;
3131         void            *opts = NULL;
3132         mblk_t          *ackmp = mp;
3133         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3134         struct T_conn_ind       *ci;
3135         tl_icon_t       *tip;
3136         void            *addr_startp;
3137         t_scalar_t      olen = creq->OPT_length;
3138         t_scalar_t      ooff = creq->OPT_offset;
3139         size_t          ci_msz;
3140         size_t          size;
3141         cred_t          *cr = NULL;
3142         pid_t           cpid;
3143 
3144         if (tep->te_closing) {
3145                 TL_UNCONNECT(tep->te_oconp);
3146                 tl_serializer_exit(tep);
3147                 tl_refrele(tep);
3148                 freemsg(mp);
3149                 return;
3150         }
3151 
3152         wq = tep->te_wq;
3153         tep->te_flag |= TL_EAGER;
3154 
3155         /*
3156          * Extract preallocated ackmp from mp.
3157          */
3158         mp = mp->b_cont;
3159         ackmp->b_cont = NULL;
3160 
3161         if (olen == 0)
3162                 ooff = 0;
3163 
3164         if (peer_tep->te_closing ||
3165             !((peer_tep->te_state == TS_IDLE) ||
3166             (peer_tep->te_state == TS_WRES_CIND))) {
3167                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3168                     "tl_conn_req:peer in bad state (%d)",
3169                     peer_tep->te_state));
3170                 TL_UNCONNECT(tep->te_oconp);
3171                 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3172                 freemsg(ackmp);
3173                 tl_serializer_exit(tep);
3174                 tl_refrele(tep);
3175                 return;
3176         }
3177 
3178         /*
3179          * preallocate now for T_DISCON_IND or T_CONN_IND
3180          */
3181         /*
3182          * calculate length of T_CONN_IND message
3183          */
3184         if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3185                 cr = msg_getcred(mp, &cpid);
3186                 ASSERT(cr != NULL);
3187                 if (peer_tep->te_flag & TL_SETCRED) {
3188                         ooff = 0;
3189                         olen = (t_scalar_t) sizeof (struct opthdr) +
3190                             OPTLEN(sizeof (tl_credopt_t));
3191                         /* 1 option only */
3192                 } else {
3193                         ooff = 0;
3194                         olen = (t_scalar_t)sizeof (struct opthdr) +
3195                             OPTLEN(ucredminsize(cr));
3196                         /* 1 option only */
3197                 }
3198         }
3199         ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3200         ci_msz = T_ALIGN(ci_msz) + olen;
3201         size = max(ci_msz, sizeof (struct T_discon_ind));
3202 
3203         /*
3204          * Save options from mp - we'll need them for T_CONN_IND.
3205          */
3206         if (ooff != 0) {
3207                 opts = kmem_alloc(olen, KM_NOSLEEP);
3208                 if (opts == NULL) {
3209                         /*
3210                          * roll back state changes
3211                          */
3212                         tep->te_state = TS_IDLE;
3213                         tl_memrecover(wq, mp, size);
3214                         freemsg(ackmp);
3215                         TL_UNCONNECT(tep->te_oconp);
3216                         tl_serializer_exit(tep);
3217                         tl_refrele(tep);
3218                         return;
3219                 }
3220                 /* Copy options to a temp buffer */
3221                 bcopy(mp->b_rptr + ooff, opts, olen);
3222         }
3223 
3224         if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3225                 /*
3226                  * Generate a T_CONN_CON that has the identical address
3227                  * (and options) as the T_CONN_REQ.
3228                  * NOTE: assumes that the T_conn_req and T_conn_con structures
3229                  * are isomorphic.
3230                  */
3231                 confmp = copyb(mp);
3232                 if (confmp == NULL) {
3233                         /*
3234                          * roll back state changes
3235                          */
3236                         tep->te_state = TS_IDLE;
3237                         tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3238                         freemsg(ackmp);
3239                         if (opts != NULL)
3240                                 kmem_free(opts, olen);
3241                         TL_UNCONNECT(tep->te_oconp);
3242                         tl_serializer_exit(tep);
3243                         tl_refrele(tep);
3244                         return;
3245                 }
3246                 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3247                     T_CONN_CON;
3248         } else {
3249                 confmp = NULL;
3250         }
3251         if ((indmp = reallocb(mp, size, 0)) == NULL) {
3252                 /*
3253                  * roll back state changes
3254                  */
3255                 tep->te_state = TS_IDLE;
3256                 tl_memrecover(wq, mp, size);
3257                 freemsg(ackmp);
3258                 if (opts != NULL)
3259                         kmem_free(opts, olen);
3260                 freemsg(confmp);
3261                 TL_UNCONNECT(tep->te_oconp);
3262                 tl_serializer_exit(tep);
3263                 tl_refrele(tep);
3264                 return;
3265         }
3266 
3267         tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3268         if (tip == NULL) {
3269                 /*
3270                  * roll back state changes
3271                  */
3272                 tep->te_state = TS_IDLE;
3273                 tl_memrecover(wq, indmp, sizeof (*tip));
3274                 freemsg(ackmp);
3275                 if (opts != NULL)
3276                         kmem_free(opts, olen);
3277                 freemsg(confmp);
3278                 TL_UNCONNECT(tep->te_oconp);
3279                 tl_serializer_exit(tep);
3280                 tl_refrele(tep);
3281                 return;
3282         }
3283         tip->ti_mp = NULL;
3284 
3285         /*
3286          * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3287          * and tl_icon_t cell.
3288          */
3289 
3290         /*
3291          * ack validity of request and send the peer credential in the ACK.
3292          */
3293         tep->te_state = nextstate[TE_OK_ACK1][tep->te_state];
3294 
3295         if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3296             confmp != NULL) {
3297                 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3298         }
3299 
3300         tl_ok_ack(wq, ackmp, T_CONN_REQ);
3301 
3302         /*
3303          * prepare message to send T_CONN_IND
3304          */
3305         /*
3306          * allocate the message - original data blocks retained
3307          * in the returned mblk
3308          */
3309         cimp = tl_resizemp(indmp, size);
3310         if (cimp == NULL) {
3311                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3312                     "tl_conn_req:con_ind:allocb failure"));
3313                 tl_merror(wq, indmp, ENOMEM);
3314                 TL_UNCONNECT(tep->te_oconp);
3315                 tl_serializer_exit(tep);
3316                 tl_refrele(tep);
3317                 if (opts != NULL)
3318                         kmem_free(opts, olen);
3319                 freemsg(confmp);
3320                 ASSERT(tip->ti_mp == NULL);
3321                 kmem_free(tip, sizeof (*tip));
3322                 return;
3323         }
3324 
3325         DB_TYPE(cimp) = M_PROTO;
3326         ci = (struct T_conn_ind *)cimp->b_rptr;
3327         ci->PRIM_type  = T_CONN_IND;
3328         ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3329         ci->SRC_length = tep->te_alen;
3330         ci->SEQ_number = tep->te_seqno;
3331 
3332         addr_startp = cimp->b_rptr + ci->SRC_offset;
3333         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3334         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3335 
3336                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3337                     ci->SRC_length);
3338                 ci->OPT_length = olen; /* because only 1 option */
3339                 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3340                     cr, cpid,
3341                     peer_tep->te_flag, peer_tep->te_credp);
3342         } else if (ooff != 0) {
3343                 /* Copy option from T_CONN_REQ */
3344                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3345                     ci->SRC_length);
3346                 ci->OPT_length = olen;
3347                 ASSERT(opts != NULL);
3348                 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3349         } else {
3350                 ci->OPT_offset = 0;
3351                 ci->OPT_length = 0;
3352         }
3353         if (opts != NULL)
3354                 kmem_free(opts, olen);
3355 
3356         /*
3357          * register connection request with server peer
3358          * append to list of incoming connections
3359          * increment references for both peer_tep and tep: peer_tep is placed on
3360          * te_oconp and tep is placed on listeners queue.
3361          */
3362         tip->ti_tep = tep;
3363         tip->ti_seqno = tep->te_seqno;
3364         list_insert_tail(&peer_tep->te_iconp, tip);
3365         peer_tep->te_nicon++;
3366 
3367         peer_tep->te_state = nextstate[TE_CONN_IND][peer_tep->te_state];
3368         /*
3369          * send the T_CONN_IND message
3370          */
3371         putnext(peer_tep->te_rq, cimp);
3372 
3373         /*
3374          * Send a T_CONN_CON message for sockets.
3375          * Disable the queues until we have reached the correct state!
3376          */
3377         if (confmp != NULL) {
3378                 tep->te_state = nextstate[TE_CONN_CON][tep->te_state];
3379                 noenable(wq);
3380                 putnext(tep->te_rq, confmp);
3381         }
3382         /*
3383          * Now we need to increment tep reference because tep is referenced by
3384          * server list of pending connections. We also need to decrement
3385          * reference before exiting serializer. Two operations void each other
3386          * so we don't modify reference at all.
3387          */
3388         ASSERT(tep->te_refcnt >= 2);
3389         ASSERT(peer_tep->te_refcnt >= 2);
3390         tl_serializer_exit(tep);
3391 }
3392 
3393 
3394 
3395 /*
3396  * Handle T_conn_res on listener stream. Called on listener serializer.
3397  * tl_conn_req has already generated the T_CONN_CON.
3398  * tl_conn_res is called on listener serializer.
3399  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3400  * Switch eager serializer to acceptor's.
3401  *
3402  * If TL_SET[U]CRED generate the credentials options.
3403  * For sockets tl_conn_req has already generated the T_CONN_CON.
3404  */
3405 static void
3406 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3407 {
3408         queue_t                 *wq;
3409         struct T_conn_res       *cres = (struct T_conn_res *)mp->b_rptr;
3410         ssize_t                 msz = MBLKL(mp);
3411         t_scalar_t              olen, ooff, err = 0;
3412         t_scalar_t              prim = cres->PRIM_type;
3413         uchar_t                 *addr_startp;
3414         tl_endpt_t              *acc_ep = NULL, *cl_ep = NULL;
3415         tl_icon_t               *tip;
3416         size_t                  size;
3417         mblk_t                  *ackmp, *respmp;
3418         mblk_t                  *dimp, *ccmp = NULL;
3419         struct T_discon_ind     *di;
3420         struct T_conn_con       *cc;
3421         boolean_t               client_noclose_set = B_FALSE;
3422         boolean_t               switch_client_serializer = B_TRUE;
3423 
3424         ASSERT(IS_COTS(tep));
3425 
3426         if (tep->te_closing) {
3427                 freemsg(mp);
3428                 return;
3429         }
3430 
3431         wq = tep->te_wq;
3432 
3433         /*
3434          * preallocate memory for:
3435          * 1. max of T_ERROR_ACK and T_OK_ACK
3436          *      ==> known max T_ERROR_ACK
3437          * 2. max of T_DISCON_IND and T_CONN_CON
3438          */
3439         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3440         if (ackmp == NULL) {
3441                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3442                 return;
3443         }
3444         /*
3445          * memory committed for T_OK_ACK/T_ERROR_ACK now
3446          * will be committed for T_DISCON_IND/T_CONN_CON later
3447          */
3448 
3449 
3450         ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3451 
3452         /*
3453          * validate state
3454          */
3455         if (tep->te_state != TS_WRES_CIND) {
3456                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3457                     SL_TRACE | SL_ERROR,
3458                     "tl_wput:T_CONN_RES:out of state, state=%d",
3459                     tep->te_state));
3460                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3461                 freemsg(mp);
3462                 return;
3463         }
3464 
3465         /*
3466          * validate the message
3467          * Note: dereference fields in struct inside message only
3468          * after validating the message length.
3469          */
3470         if (msz < sizeof (struct T_conn_res)) {
3471                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3472                     "tl_conn_res:invalid message length"));
3473                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3474                 freemsg(mp);
3475                 return;
3476         }
3477         olen = cres->OPT_length;
3478         ooff = cres->OPT_offset;
3479         if (((olen > 0) && ((ooff + olen) > msz))) {
3480                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3481                     "tl_conn_res:invalid message"));
3482                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3483                 freemsg(mp);
3484                 return;
3485         }
3486         if (olen) {
3487                 /*
3488                  * no opts in connect res
3489                  * supported in this provider
3490                  */
3491                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3492                     "tl_conn_res:options not supported in message"));
3493                 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3494                 freemsg(mp);
3495                 return;
3496         }
3497 
3498         tep->te_state = nextstate[TE_CONN_RES][tep->te_state];
3499         ASSERT(tep->te_state == TS_WACK_CRES);
3500 
3501         if (cres->SEQ_number < TL_MINOR_START &&
3502             cres->SEQ_number >= BADSEQNUM) {
3503                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3504                     "tl_conn_res:remote endpoint sequence number bad"));
3505                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3506                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3507                 freemsg(mp);
3508                 return;
3509         }
3510 
3511         /*
3512          * find accepting endpoint. Will have extra reference if found.
3513          */
3514         if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3515             (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3516             (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3517                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3518                     "tl_conn_res:bad accepting endpoint"));
3519                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3520                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3521                 freemsg(mp);
3522                 return;
3523         }
3524 
3525         /*
3526          * Prevent acceptor from closing.
3527          */
3528         if (!tl_noclose(acc_ep)) {
3529                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3530                     "tl_conn_res:bad accepting endpoint"));
3531                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3532                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3533                 tl_refrele(acc_ep);
3534                 freemsg(mp);
3535                 return;
3536         }
3537 
3538         acc_ep->te_flag |= TL_ACCEPTOR;
3539 
3540         /*
3541          * validate that accepting endpoint, if different from listening
3542          * has address bound => state is TS_IDLE
3543          * TROUBLE in XPG4 !!?
3544          */
3545         if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3546                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3547                     "tl_conn_res:accepting endpoint has no address bound,"
3548                     "state=%d", acc_ep->te_state));
3549                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3550                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3551                 freemsg(mp);
3552                 tl_closeok(acc_ep);
3553                 tl_refrele(acc_ep);
3554                 return;
3555         }
3556 
3557         /*
3558          * validate if accepting endpt same as listening, then
3559          * no other incoming connection should be on the queue
3560          */
3561 
3562         if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3563                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3564                     "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3565                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3566                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3567                 freemsg(mp);
3568                 tl_closeok(acc_ep);
3569                 tl_refrele(acc_ep);
3570                 return;
3571         }
3572 
3573         /*
3574          * Mark for deletion, the entry corresponding to client
3575          * on list of pending connections made by the listener
3576          *  search list to see if client is one of the
3577          * recorded as a listener.
3578          */
3579         tip = tl_icon_find(tep, cres->SEQ_number);
3580         if (tip == NULL) {
3581                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3582                     "tl_conn_res:no client in listener list"));
3583                 tep->te_state = nextstate[TE_ERROR_ACK][tep->te_state];
3584                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3585                 freemsg(mp);
3586                 tl_closeok(acc_ep);
3587                 tl_refrele(acc_ep);
3588                 return;
3589         }
3590 
3591         /*
3592          * If ti_tep is NULL the client has already closed. In this case
3593          * the code below will avoid any action on the client side
3594          * but complete the server and acceptor state transitions.
3595          */
3596         ASSERT(tip->ti_tep == NULL ||
3597             tip->ti_tep->te_seqno == cres->SEQ_number);
3598         cl_ep = tip->ti_tep;
3599 
3600         /*
3601          * If the client is present it is switched from listener's to acceptor's
3602          * serializer. We should block client closes while serializers are
3603          * being switched.
3604          *
3605          * It is possible that the client is present but is currently being
3606          * closed. There are two possible cases:
3607          *
3608          * 1) The client has already entered tl_close_finish_ser() and sent
3609          *    T_ORDREL_IND. In this case we can just ignore the client (but we
3610          *    still need to send all messages from tip->ti_mp to the acceptor).
3611          *
3612          * 2) The client started the close but has not entered
3613          *    tl_close_finish_ser() yet. In this case, the client is already
3614          *    proceeding asynchronously on the listener's serializer, so we're
3615          *    forced to change the acceptor to use the listener's serializer to
3616          *    ensure that any operations on the acceptor are serialized with
3617          *    respect to the close that's in-progress.
3618          */
3619         if (cl_ep != NULL) {
3620                 if (tl_noclose(cl_ep)) {
3621                         client_noclose_set = B_TRUE;
3622                 } else {
3623                         /*
3624                          * Client is closing. If it it has sent the
3625                          * T_ORDREL_IND, we can simply ignore it - otherwise,
3626                          * we have to let let the client continue until it is
3627                          * sent.
3628                          *
3629                          * If we do continue using the client, acceptor will
3630                          * switch to client's serializer which is used by client
3631                          * for its close.
3632                          */
3633                         tl_client_closing_when_accepting++;
3634                         switch_client_serializer = B_FALSE;
3635                         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3636                             cl_ep->te_state == -1)
3637                                 cl_ep = NULL;
3638                 }
3639         }
3640 
3641         if (cl_ep != NULL) {
3642                 /*
3643                  * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3644                  * (latter for sockets only)
3645                  */
3646                 if (cl_ep->te_state != TS_WCON_CREQ &&
3647                     (cl_ep->te_state != TS_DATA_XFER &&
3648                     IS_SOCKET(cl_ep))) {
3649                         err = ECONNREFUSED;
3650                         /*
3651                          * T_DISCON_IND sent later after committing memory
3652                          * and acking validity of request
3653                          */
3654                         (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3655                             "tl_conn_res:peer in bad state"));
3656                 }
3657 
3658                 /*
3659                  * preallocate now for T_DISCON_IND or T_CONN_CONN
3660                  * ack validity of request (T_OK_ACK) after memory committed
3661                  */
3662 
3663                 if (err) {
3664                         size = sizeof (struct T_discon_ind);
3665                 } else {
3666                         /*
3667                          * calculate length of T_CONN_CON message
3668                          */
3669                         olen = 0;
3670                         if (cl_ep->te_flag & TL_SETCRED) {
3671                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3672                                     OPTLEN(sizeof (tl_credopt_t));
3673                         } else if (cl_ep->te_flag & TL_SETUCRED) {
3674                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3675                                     OPTLEN(ucredminsize(acc_ep->te_credp));
3676                         }
3677                         size = T_ALIGN(sizeof (struct T_conn_con) +
3678                             acc_ep->te_alen) + olen;
3679                 }
3680                 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3681                         /*
3682                          * roll back state changes
3683                          */
3684                         tep->te_state = TS_WRES_CIND;
3685                         tl_memrecover(wq, mp, size);
3686                         freemsg(ackmp);
3687                         if (client_noclose_set)
3688                                 tl_closeok(cl_ep);
3689                         tl_closeok(acc_ep);
3690                         tl_refrele(acc_ep);
3691                         return;
3692                 }
3693                 mp = NULL;
3694         }
3695 
3696         /*
3697          * Now ack validity of request
3698          */
3699         if (tep->te_nicon == 1) {
3700                 if (tep == acc_ep)
3701                         tep->te_state = nextstate[TE_OK_ACK2][tep->te_state];
3702                 else
3703                         tep->te_state = nextstate[TE_OK_ACK3][tep->te_state];
3704         } else {
3705                 tep->te_state = nextstate[TE_OK_ACK4][tep->te_state];
3706         }
3707 
3708         /*
3709          * send T_DISCON_IND now if client state validation failed earlier
3710          */
3711         if (err) {
3712                 tl_ok_ack(wq, ackmp, prim);
3713                 /*
3714                  * flush the queues - why always ?
3715                  */
3716                 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3717 
3718                 dimp = tl_resizemp(respmp, size);
3719                 if (dimp == NULL) {
3720                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3721                             SL_TRACE | SL_ERROR,
3722                             "tl_conn_res:con_ind:allocb failure"));
3723                         tl_merror(wq, respmp, ENOMEM);
3724                         tl_closeok(acc_ep);
3725                         if (client_noclose_set)
3726                                 tl_closeok(cl_ep);
3727                         tl_refrele(acc_ep);
3728                         return;
3729                 }
3730                 if (dimp->b_cont) {
3731                         /* no user data in provider generated discon ind */
3732                         freemsg(dimp->b_cont);
3733                         dimp->b_cont = NULL;
3734                 }
3735 
3736                 DB_TYPE(dimp) = M_PROTO;
3737                 di = (struct T_discon_ind *)dimp->b_rptr;
3738                 di->PRIM_type  = T_DISCON_IND;
3739                 di->DISCON_reason = err;
3740                 di->SEQ_number = BADSEQNUM;
3741 
3742                 tep->te_state = TS_IDLE;
3743                 /*
3744                  * send T_DISCON_IND message
3745                  */
3746                 putnext(acc_ep->te_rq, dimp);
3747                 if (client_noclose_set)
3748                         tl_closeok(cl_ep);
3749                 tl_closeok(acc_ep);
3750                 tl_refrele(acc_ep);
3751                 return;
3752         }
3753 
3754         /*
3755          * now start connecting the accepting endpoint
3756          */
3757         if (tep != acc_ep)
3758                 acc_ep->te_state = nextstate[TE_PASS_CONN][acc_ep->te_state];
3759 
3760         if (cl_ep == NULL) {
3761                 /*
3762                  * The client has already closed. Send up any queued messages
3763                  * and change the state accordingly.
3764                  */
3765                 tl_ok_ack(wq, ackmp, prim);
3766                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3767 
3768                 /*
3769                  * remove endpoint from incoming connection
3770                  * delete client from list of incoming connections
3771                  */
3772                 tl_freetip(tep, tip);
3773                 freemsg(mp);
3774                 tl_closeok(acc_ep);
3775                 tl_refrele(acc_ep);
3776                 return;
3777         } else if (tip->ti_mp != NULL) {
3778                 /*
3779                  * The client could have queued a T_DISCON_IND which needs
3780                  * to be sent up.
3781                  * Note that t_discon_req can not operate the same as
3782                  * t_data_req since it is not possible for it to putbq
3783                  * the message and return -1 due to the use of qwriter.
3784                  */
3785                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3786         }
3787 
3788         /*
3789          * prepare connect confirm T_CONN_CON message
3790          */
3791 
3792         /*
3793          * allocate the message - original data blocks
3794          * retained in the returned mblk
3795          */
3796         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3797                 ccmp = tl_resizemp(respmp, size);
3798                 if (ccmp == NULL) {
3799                         tl_ok_ack(wq, ackmp, prim);
3800                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3801                             SL_TRACE | SL_ERROR,
3802                             "tl_conn_res:conn_con:allocb failure"));
3803                         tl_merror(wq, respmp, ENOMEM);
3804                         tl_closeok(acc_ep);
3805                         if (client_noclose_set)
3806                                 tl_closeok(cl_ep);
3807                         tl_refrele(acc_ep);
3808                         return;
3809                 }
3810 
3811                 DB_TYPE(ccmp) = M_PROTO;
3812                 cc = (struct T_conn_con *)ccmp->b_rptr;
3813                 cc->PRIM_type  = T_CONN_CON;
3814                 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3815                 cc->RES_length = acc_ep->te_alen;
3816                 addr_startp = ccmp->b_rptr + cc->RES_offset;
3817                 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3818                 if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3819                         cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3820                             cc->RES_length);
3821                         cc->OPT_length = olen;
3822                         tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3823                             acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3824                             cl_ep->te_credp);
3825                 } else {
3826                         cc->OPT_offset = 0;
3827                         cc->OPT_length = 0;
3828                 }
3829                 /*
3830                  * Forward the credential in the packet so it can be picked up
3831                  * at the higher layers for more complete credential processing
3832                  */
3833                 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3834         } else {
3835                 freemsg(respmp);
3836                 respmp = NULL;
3837         }
3838 
3839         /*
3840          * make connection linking
3841          * accepting and client endpoints
3842          * No need to increment references:
3843          *      on client: it should already have one from tip->ti_tep linkage.
3844          *      on acceptor is should already have one from the table lookup.
3845          *
3846          * At this point both client and acceptor can't close. Set client
3847          * serializer to acceptor's.
3848          */
3849         ASSERT(cl_ep->te_refcnt >= 2);
3850         ASSERT(acc_ep->te_refcnt >= 2);
3851         ASSERT(cl_ep->te_conp == NULL);
3852         ASSERT(acc_ep->te_conp == NULL);
3853         cl_ep->te_conp = acc_ep;
3854         acc_ep->te_conp = cl_ep;
3855         ASSERT(cl_ep->te_ser == tep->te_ser);
3856         if (switch_client_serializer) {
3857                 mutex_enter(&cl_ep->te_ser_lock);
3858                 if (cl_ep->te_ser_count > 0) {
3859                         switch_client_serializer = B_FALSE;
3860                         tl_serializer_noswitch++;
3861                 } else {
3862                         /*
3863                          * Move client to the acceptor's serializer.
3864                          */
3865                         tl_serializer_refhold(acc_ep->te_ser);
3866                         tl_serializer_refrele(cl_ep->te_ser);
3867                         cl_ep->te_ser = acc_ep->te_ser;
3868                 }
3869                 mutex_exit(&cl_ep->te_ser_lock);
3870         }
3871         if (!switch_client_serializer) {
3872                 /*
3873                  * It is not possible to switch client to use acceptor's.
3874                  * Move acceptor to client's serializer (which is the same as
3875                  * listener's).
3876                  */
3877                 tl_serializer_refhold(cl_ep->te_ser);
3878                 tl_serializer_refrele(acc_ep->te_ser);
3879                 acc_ep->te_ser = cl_ep->te_ser;
3880         }
3881 
3882         TL_REMOVE_PEER(cl_ep->te_oconp);
3883         TL_REMOVE_PEER(acc_ep->te_oconp);
3884 
3885         /*
3886          * remove endpoint from incoming connection
3887          * delete client from list of incoming connections
3888          */
3889         tip->ti_tep = NULL;
3890         tl_freetip(tep, tip);
3891         tl_ok_ack(wq, ackmp, prim);
3892 
3893         /*
3894          * data blocks already linked in reallocb()
3895          */
3896 
3897         /*
3898          * link queues so that I_SENDFD will work
3899          */
3900         if (!IS_SOCKET(tep)) {
3901                 acc_ep->te_wq->q_next = cl_ep->te_rq;
3902                 cl_ep->te_wq->q_next = acc_ep->te_rq;
3903         }
3904 
3905         /*
3906          * send T_CONN_CON up on client side unless it was already
3907          * done (for a socket). In cases any data or ordrel req has been
3908          * queued make sure that the service procedure runs.
3909          */
3910         if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3911                 enableok(cl_ep->te_wq);
3912                 TL_QENABLE(cl_ep);
3913                 if (ccmp != NULL)
3914                         freemsg(ccmp);
3915         } else {
3916                 /*
3917                  * change client state on TE_CONN_CON event
3918                  */
3919                 cl_ep->te_state = nextstate[TE_CONN_CON][cl_ep->te_state];
3920                 putnext(cl_ep->te_rq, ccmp);
3921         }
3922 
3923         /* Mark the both endpoints as accepted */
3924         cl_ep->te_flag |= TL_ACCEPTED;
3925         acc_ep->te_flag |= TL_ACCEPTED;
3926 
3927         /*
3928          * Allow client and acceptor to close.
3929          */
3930         tl_closeok(acc_ep);
3931         if (client_noclose_set)
3932                 tl_closeok(cl_ep);
3933 }
3934 
3935 
3936 
3937 
3938 static void
3939 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3940 {
3941         queue_t                 *wq;
3942         struct T_discon_req     *dr;
3943         ssize_t                 msz;
3944         tl_endpt_t              *peer_tep = tep->te_conp;
3945         tl_endpt_t              *srv_tep = tep->te_oconp;
3946         tl_icon_t               *tip;
3947         size_t                  size;
3948         mblk_t                  *ackmp, *dimp, *respmp;
3949         struct T_discon_ind     *di;
3950         t_scalar_t              save_state, new_state;
3951 
3952         if (tep->te_closing) {
3953                 freemsg(mp);
3954                 return;
3955         }
3956 
3957         if ((peer_tep != NULL) && peer_tep->te_closing) {
3958                 TL_UNCONNECT(tep->te_conp);
3959                 peer_tep = NULL;
3960         }
3961         if ((srv_tep != NULL) && srv_tep->te_closing) {
3962                 TL_UNCONNECT(tep->te_oconp);
3963                 srv_tep = NULL;
3964         }
3965 
3966         wq = tep->te_wq;
3967 
3968         /*
3969          * preallocate memory for:
3970          * 1. max of T_ERROR_ACK and T_OK_ACK
3971          *      ==> known max T_ERROR_ACK
3972          * 2. for  T_DISCON_IND
3973          */
3974         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3975         if (ackmp == NULL) {
3976                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3977                 return;
3978         }
3979         /*
3980          * memory committed for T_OK_ACK/T_ERROR_ACK now
3981          * will be committed for T_DISCON_IND  later
3982          */
3983 
3984         dr = (struct T_discon_req *)mp->b_rptr;
3985         msz = MBLKL(mp);
3986 
3987         /*
3988          * validate the state
3989          */
3990         save_state = new_state = tep->te_state;
3991         if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3992             !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3993                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3994                     SL_TRACE | SL_ERROR,
3995                     "tl_wput:T_DISCON_REQ:out of state, state=%d",
3996                     tep->te_state));
3997                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3998                 freemsg(mp);
3999                 return;
4000         }
4001         /*
4002          * Defer committing the state change until it is determined if
4003          * the message will be queued with the tl_icon or not.
4004          */
4005         new_state  = nextstate[TE_DISCON_REQ][tep->te_state];
4006 
4007         /* validate the message */
4008         if (msz < sizeof (struct T_discon_req)) {
4009                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4010                     "tl_discon_req:invalid message"));
4011                 tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4012                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
4013                 freemsg(mp);
4014                 return;
4015         }
4016 
4017         /*
4018          * if server, then validate that client exists
4019          * by connection sequence number etc.
4020          */
4021         if (tep->te_nicon > 0) { /* server */
4022 
4023                 /*
4024                  * search server list for disconnect client
4025                  */
4026                 tip = tl_icon_find(tep, dr->SEQ_number);
4027                 if (tip == NULL) {
4028                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4029                             SL_TRACE | SL_ERROR,
4030                             "tl_discon_req:no disconnect endpoint"));
4031                         tep->te_state = nextstate[TE_ERROR_ACK][new_state];
4032                         tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
4033                         freemsg(mp);
4034                         return;
4035                 }
4036                 /*
4037                  * If ti_tep is NULL the client has already closed. In this case
4038                  * the code below will avoid any action on the client side.
4039                  */
4040 
4041                 IMPLY(tip->ti_tep != NULL,
4042                     tip->ti_tep->te_seqno == dr->SEQ_number);
4043                 peer_tep = tip->ti_tep;
4044         }
4045 
4046         /*
4047          * preallocate now for T_DISCON_IND
4048          * ack validity of request (T_OK_ACK) after memory committed
4049          */
4050         size = sizeof (struct T_discon_ind);
4051         if ((respmp = reallocb(mp, size, 0)) == NULL) {
4052                 tl_memrecover(wq, mp, size);
4053                 freemsg(ackmp);
4054                 return;
4055         }
4056 
4057         /*
4058          * prepare message to ack validity of request
4059          */
4060         if (tep->te_nicon == 0) {
4061                 new_state = nextstate[TE_OK_ACK1][new_state];
4062         } else {
4063                 if (tep->te_nicon == 1)
4064                         new_state = nextstate[TE_OK_ACK2][new_state];
4065                 else
4066                         new_state = nextstate[TE_OK_ACK4][new_state];
4067         }
4068 
4069         /*
4070          * Flushing queues according to TPI. Using the old state.
4071          */
4072         if ((tep->te_nicon <= 1) &&
4073             ((save_state == TS_DATA_XFER) ||
4074             (save_state == TS_WIND_ORDREL) ||
4075             (save_state == TS_WREQ_ORDREL)))
4076                 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4077 
4078         /* send T_OK_ACK up  */
4079         tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4080 
4081         /*
4082          * now do disconnect business
4083          */
4084         if (tep->te_nicon > 0) { /* listener */
4085                 if (peer_tep != NULL && !peer_tep->te_closing) {
4086                         /*
4087                          * disconnect incoming connect request pending to tep
4088                          */
4089                         if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4090                                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4091                                     SL_TRACE | SL_ERROR,
4092                                     "tl_discon_req: reallocb failed"));
4093                                 tep->te_state = new_state;
4094                                 tl_merror(wq, respmp, ENOMEM);
4095                                 return;
4096                         }
4097                         di = (struct T_discon_ind *)dimp->b_rptr;
4098                         di->SEQ_number = BADSEQNUM;
4099                         save_state = peer_tep->te_state;
4100                         peer_tep->te_state = TS_IDLE;
4101 
4102                         TL_REMOVE_PEER(peer_tep->te_oconp);
4103                         enableok(peer_tep->te_wq);
4104                         TL_QENABLE(peer_tep);
4105                 } else {
4106                         freemsg(respmp);
4107                         dimp = NULL;
4108                 }
4109 
4110                 /*
4111                  * remove endpoint from incoming connection list
4112                  * - remove disconnect client from list on server
4113                  */
4114                 tl_freetip(tep, tip);
4115         } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4116                 /*
4117                  * disconnect an outgoing request pending from tep
4118                  */
4119 
4120                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4121                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4122                             SL_TRACE | SL_ERROR,
4123                             "tl_discon_req: reallocb failed"));
4124                         tep->te_state = new_state;
4125                         tl_merror(wq, respmp, ENOMEM);
4126                         return;
4127                 }
4128                 di = (struct T_discon_ind *)dimp->b_rptr;
4129                 DB_TYPE(dimp) = M_PROTO;
4130                 di->PRIM_type  = T_DISCON_IND;
4131                 di->DISCON_reason = ECONNRESET;
4132                 di->SEQ_number = tep->te_seqno;
4133 
4134                 /*
4135                  * If this is a socket the T_DISCON_IND is queued with
4136                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4137                  * from the list of pending connections.
4138                  * Note that when te_oconp is set the peer better have
4139                  * a t_connind_t for the client.
4140                  */
4141                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4142                         /*
4143                          * No need to check that
4144                          * ti_tep == NULL since the T_DISCON_IND
4145                          * takes precedence over other queued
4146                          * messages.
4147                          */
4148                         tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4149                         peer_tep = NULL;
4150                         dimp = NULL;
4151                         /*
4152                          * Can't clear te_oconp since tl_co_unconnect needs
4153                          * it as a hint not to free the tep.
4154                          * Keep the state unchanged since tl_conn_res inspects
4155                          * it.
4156                          */
4157                         new_state = tep->te_state;
4158                 } else {
4159                         /* Found - delete it */
4160                         tip = tl_icon_find(peer_tep, tep->te_seqno);
4161                         if (tip != NULL) {
4162                                 ASSERT(tep == tip->ti_tep);
4163                                 save_state = peer_tep->te_state;
4164                                 if (peer_tep->te_nicon == 1)
4165                                         peer_tep->te_state =
4166                                             nextstate[TE_DISCON_IND2]
4167                                             [peer_tep->te_state];
4168                                 else
4169                                         peer_tep->te_state =
4170                                             nextstate[TE_DISCON_IND3]
4171                                             [peer_tep->te_state];
4172                                 tl_freetip(peer_tep, tip);
4173                         }
4174                         ASSERT(tep->te_oconp != NULL);
4175                         TL_UNCONNECT(tep->te_oconp);
4176                 }
4177         } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4178                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4179                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4180                             SL_TRACE | SL_ERROR,
4181                             "tl_discon_req: reallocb failed"));
4182                         tep->te_state = new_state;
4183                         tl_merror(wq, respmp, ENOMEM);
4184                         return;
4185                 }
4186                 di = (struct T_discon_ind *)dimp->b_rptr;
4187                 di->SEQ_number = BADSEQNUM;
4188 
4189                 save_state = peer_tep->te_state;
4190                 peer_tep->te_state = TS_IDLE;
4191         } else {
4192                 /* Not connected */
4193                 tep->te_state = new_state;
4194                 freemsg(respmp);
4195                 return;
4196         }
4197 
4198         /* Commit state changes */
4199         tep->te_state = new_state;
4200 
4201         if (peer_tep == NULL) {
4202                 ASSERT(dimp == NULL);
4203                 goto done;
4204         }
4205         /*
4206          * Flush queues on peer before sending up
4207          * T_DISCON_IND according to TPI
4208          */
4209 
4210         if ((save_state == TS_DATA_XFER) ||
4211             (save_state == TS_WIND_ORDREL) ||
4212             (save_state == TS_WREQ_ORDREL))
4213                 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4214 
4215         DB_TYPE(dimp) = M_PROTO;
4216         di->PRIM_type  = T_DISCON_IND;
4217         di->DISCON_reason = ECONNRESET;
4218 
4219         /*
4220          * data blocks already linked into dimp by reallocb()
4221          */
4222         /*
4223          * send indication message to peer user module
4224          */
4225         ASSERT(dimp != NULL);
4226         putnext(peer_tep->te_rq, dimp);
4227 done:
4228         if (tep->te_conp) {  /* disconnect pointers if connected */
4229                 ASSERT(!peer_tep->te_closing);
4230 
4231                 /*
4232                  * Messages may be queued on peer's write queue
4233                  * waiting to be processed by its write service
4234                  * procedure. Before the pointer to the peer transport
4235                  * structure is set to NULL, qenable the peer's write
4236                  * queue so that the queued up messages are processed.
4237                  */
4238                 if ((save_state == TS_DATA_XFER) ||
4239                     (save_state == TS_WIND_ORDREL) ||
4240                     (save_state == TS_WREQ_ORDREL))
4241                         TL_QENABLE(peer_tep);
4242                 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4243                 TL_UNCONNECT(peer_tep->te_conp);
4244                 if (!IS_SOCKET(tep)) {
4245                         /*
4246                          * unlink the streams
4247                          */
4248                         tep->te_wq->q_next = NULL;
4249                         peer_tep->te_wq->q_next = NULL;
4250                 }
4251                 TL_UNCONNECT(tep->te_conp);
4252         }
4253 }
4254 
4255 static void
4256 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4257 {
4258         if (!tep->te_closing)
4259                 tl_addr_req(mp, tep);
4260         else
4261                 freemsg(mp);
4262 
4263         tl_serializer_exit(tep);
4264         tl_refrele(tep);
4265 }
4266 
4267 static void
4268 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4269 {
4270         queue_t                 *wq;
4271         size_t                  ack_sz;
4272         mblk_t                  *ackmp;
4273         struct T_addr_ack       *taa;
4274 
4275         if (tep->te_closing) {
4276                 freemsg(mp);
4277                 return;
4278         }
4279 
4280         wq = tep->te_wq;
4281 
4282         /*
4283          * Note: T_ADDR_REQ message has only PRIM_type field
4284          * so it is already validated earlier.
4285          */
4286 
4287         if (IS_CLTS(tep) ||
4288             (tep->te_state > TS_WREQ_ORDREL) ||
4289             (tep->te_state < TS_DATA_XFER)) {
4290                 /*
4291                  * Either connectionless or connection oriented but not
4292                  * in connected data transfer state or half-closed states.
4293                  */
4294                 ack_sz = sizeof (struct T_addr_ack);
4295                 if (tep->te_state >= TS_IDLE)
4296                         /* is bound */
4297                         ack_sz += tep->te_alen;
4298                 ackmp = reallocb(mp, ack_sz, 0);
4299                 if (ackmp == NULL) {
4300                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4301                             SL_TRACE | SL_ERROR,
4302                             "tl_addr_req: reallocb failed"));
4303                         tl_memrecover(wq, mp, ack_sz);
4304                         return;
4305                 }
4306 
4307                 taa = (struct T_addr_ack *)ackmp->b_rptr;
4308 
4309                 bzero(taa, sizeof (struct T_addr_ack));
4310 
4311                 taa->PRIM_type = T_ADDR_ACK;
4312                 ackmp->b_datap->db_type = M_PCPROTO;
4313                 ackmp->b_wptr = (uchar_t *)&taa[1];
4314 
4315                 if (tep->te_state >= TS_IDLE) {
4316                         /* endpoint is bound */
4317                         taa->LOCADDR_length = tep->te_alen;
4318                         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4319 
4320                         bcopy(tep->te_abuf, ackmp->b_wptr,
4321                             tep->te_alen);
4322                         ackmp->b_wptr += tep->te_alen;
4323                         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4324                 }
4325 
4326                 (void) qreply(wq, ackmp);
4327         } else {
4328                 ASSERT(tep->te_state == TS_DATA_XFER ||
4329                     tep->te_state == TS_WIND_ORDREL ||
4330                     tep->te_state == TS_WREQ_ORDREL);
4331                 /* connection oriented in data transfer */
4332                 tl_connected_cots_addr_req(mp, tep);
4333         }
4334 }
4335 
4336 
4337 static void
4338 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4339 {
4340         tl_endpt_t              *peer_tep = tep->te_conp;
4341         size_t                  ack_sz;
4342         mblk_t                  *ackmp;
4343         struct T_addr_ack       *taa;
4344         uchar_t                 *addr_startp;
4345 
4346         if (tep->te_closing) {
4347                 freemsg(mp);
4348                 return;
4349         }
4350 
4351         if (peer_tep == NULL || peer_tep->te_closing) {
4352                 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4353                 return;
4354         }
4355 
4356         ASSERT(tep->te_state >= TS_IDLE);
4357 
4358         ack_sz = sizeof (struct T_addr_ack);
4359         ack_sz += T_ALIGN(tep->te_alen);
4360         ack_sz += peer_tep->te_alen;
4361 
4362         ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4363         if (ackmp == NULL) {
4364                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4365                     "tl_connected_cots_addr_req: reallocb failed"));
4366                 tl_memrecover(tep->te_wq, mp, ack_sz);
4367                 return;
4368         }
4369 
4370         taa = (struct T_addr_ack *)ackmp->b_rptr;
4371 
4372         /* endpoint is bound */
4373         taa->LOCADDR_length = tep->te_alen;
4374         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4375 
4376         addr_startp = (uchar_t *)&taa[1];
4377 
4378         bcopy(tep->te_abuf, addr_startp,
4379             tep->te_alen);
4380 
4381         taa->REMADDR_length = peer_tep->te_alen;
4382         taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4383             taa->LOCADDR_length);
4384         addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4385         bcopy(peer_tep->te_abuf, addr_startp,
4386             peer_tep->te_alen);
4387         ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4388             taa->REMADDR_offset + peer_tep->te_alen;
4389         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4390 
4391         putnext(tep->te_rq, ackmp);
4392 }
4393 
4394 static void
4395 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4396 {
4397         if (IS_CLTS(tep)) {
4398                 *ia = tl_clts_info_ack;
4399                 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4400         } else {
4401                 *ia = tl_cots_info_ack;
4402                 if (IS_COTSORD(tep))
4403                         ia->SERV_type = T_COTS_ORD;
4404         }
4405         ia->TIDU_size = tl_tidusz;
4406         ia->CURRENT_state = tep->te_state;
4407 }
4408 
4409 /*
4410  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4411  * tl_wput.
4412  */
4413 static void
4414 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4415 {
4416         mblk_t                  *ackmp;
4417         t_uscalar_t             cap_bits1;
4418         struct T_capability_ack *tcap;
4419 
4420         if (tep->te_closing) {
4421                 freemsg(mp);
4422                 return;
4423         }
4424 
4425         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4426 
4427         ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4428             M_PCPROTO, T_CAPABILITY_ACK);
4429         if (ackmp == NULL) {
4430                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4431                     "tl_capability_req: reallocb failed"));
4432                 tl_memrecover(tep->te_wq, mp,
4433                     sizeof (struct T_capability_ack));
4434                 return;
4435         }
4436 
4437         tcap = (struct T_capability_ack *)ackmp->b_rptr;
4438         tcap->CAP_bits1 = 0;
4439 
4440         if (cap_bits1 & TC1_INFO) {
4441                 tl_copy_info(&tcap->INFO_ack, tep);
4442                 tcap->CAP_bits1 |= TC1_INFO;
4443         }
4444 
4445         if (cap_bits1 & TC1_ACCEPTOR_ID) {
4446                 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4447                 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4448         }
4449 
4450         putnext(tep->te_rq, ackmp);
4451 }
4452 
4453 static void
4454 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4455 {
4456         if (!tep->te_closing)
4457                 tl_info_req(mp, tep);
4458         else
4459                 freemsg(mp);
4460 
4461         tl_serializer_exit(tep);
4462         tl_refrele(tep);
4463 }
4464 
4465 static void
4466 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4467 {
4468         mblk_t *ackmp;
4469 
4470         ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4471             M_PCPROTO, T_INFO_ACK);
4472         if (ackmp == NULL) {
4473                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4474                     "tl_info_req: reallocb failed"));
4475                 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4476                 return;
4477         }
4478 
4479         /*
4480          * fill in T_INFO_ACK contents
4481          */
4482         tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4483 
4484         /*
4485          * send ack message
4486          */
4487         putnext(tep->te_rq, ackmp);
4488 }
4489 
4490 /*
4491  * Handle M_DATA, T_data_req and T_optdata_req.
4492  * If this is a socket pass through T_optdata_req options unmodified.
4493  */
4494 static void
4495 tl_data(mblk_t *mp, tl_endpt_t *tep)
4496 {
4497         queue_t                 *wq = tep->te_wq;
4498         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4499         ssize_t                 msz = MBLKL(mp);
4500         tl_endpt_t              *peer_tep;
4501         queue_t                 *peer_rq;
4502         boolean_t               closing = tep->te_closing;
4503 
4504         if (IS_CLTS(tep)) {
4505                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4506                     SL_TRACE | SL_ERROR,
4507                     "tl_wput:clts:unattached M_DATA"));
4508                 if (!closing) {
4509                         tl_merror(wq, mp, EPROTO);
4510                 } else {
4511                         freemsg(mp);
4512                 }
4513                 return;
4514         }
4515 
4516         /*
4517          * If the endpoint is closing it should still forward any data to the
4518          * peer (if it has one). If it is not allowed to forward it can just
4519          * free the message.
4520          */
4521         if (closing &&
4522             (tep->te_state != TS_DATA_XFER) &&
4523             (tep->te_state != TS_WREQ_ORDREL)) {
4524                 freemsg(mp);
4525                 return;
4526         }
4527 
4528         if (DB_TYPE(mp) == M_PROTO) {
4529                 if (prim->type == T_DATA_REQ &&
4530                     msz < sizeof (struct T_data_req)) {
4531                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4532                                 SL_TRACE | SL_ERROR,
4533                                 "tl_data:T_DATA_REQ:invalid message"));
4534                         if (!closing) {
4535                                 tl_merror(wq, mp, EPROTO);
4536                         } else {
4537                                 freemsg(mp);
4538                         }
4539                         return;
4540                 } else if (prim->type == T_OPTDATA_REQ &&
4541                     (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4542                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4543                             SL_TRACE | SL_ERROR,
4544                             "tl_data:T_OPTDATA_REQ:invalid message"));
4545                         if (!closing) {
4546                                 tl_merror(wq, mp, EPROTO);
4547                         } else {
4548                                 freemsg(mp);
4549                         }
4550                         return;
4551                 }
4552         }
4553 
4554         /*
4555          * connection oriented provider
4556          */
4557         switch (tep->te_state) {
4558         case TS_IDLE:
4559                 /*
4560                  * Other end not here - do nothing.
4561                  */
4562                 freemsg(mp);
4563                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4564                     "tl_data:cots with endpoint idle"));
4565                 return;
4566 
4567         case TS_DATA_XFER:
4568                 /* valid states */
4569                 if (tep->te_conp != NULL)
4570                         break;
4571 
4572                 if (tep->te_oconp == NULL) {
4573                         if (!closing) {
4574                                 tl_merror(wq, mp, EPROTO);
4575                         } else {
4576                                 freemsg(mp);
4577                         }
4578                         return;
4579                 }
4580                 /*
4581                  * For a socket the T_CONN_CON is sent early thus
4582                  * the peer might not yet have accepted the connection.
4583                  * If we are closing queue the packet with the T_CONN_IND.
4584                  * Otherwise defer processing the packet until the peer
4585                  * accepts the connection.
4586                  * Note that the queue is noenabled when we go into this
4587                  * state.
4588                  */
4589                 if (!closing) {
4590                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4591                             SL_TRACE | SL_ERROR,
4592                             "tl_data: ocon"));
4593                         TL_PUTBQ(tep, mp);
4594                         return;
4595                 }
4596                 if (DB_TYPE(mp) == M_PROTO) {
4597                         if (msz < sizeof (t_scalar_t)) {
4598                                 freemsg(mp);
4599                                 return;
4600                         }
4601                         /* reuse message block - just change REQ to IND */
4602                         if (prim->type == T_DATA_REQ)
4603                                 prim->type = T_DATA_IND;
4604                         else
4605                                 prim->type = T_OPTDATA_IND;
4606                 }
4607                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4608                 return;
4609 
4610         case TS_WREQ_ORDREL:
4611                 if (tep->te_conp == NULL) {
4612                         /*
4613                          * Other end closed - generate discon_ind
4614                          * with reason 0 to cause an EPIPE but no
4615                          * read side error on AF_UNIX sockets.
4616                          */
4617                         freemsg(mp);
4618                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4619                             SL_TRACE | SL_ERROR,
4620                             "tl_data: WREQ_ORDREL and no peer"));
4621                         tl_discon_ind(tep, 0);
4622                         return;
4623                 }
4624                 break;
4625 
4626         default:
4627                 /* invalid state for event TE_DATA_REQ */
4628                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4629                     "tl_data:cots:out of state"));
4630                 tl_merror(wq, mp, EPROTO);
4631                 return;
4632         }
4633         /*
4634          * tep->te_state = nextstate[TE_DATA_REQ][tep->te_state];
4635          * (State stays same on this event)
4636          */
4637 
4638         /*
4639          * get connected endpoint
4640          */
4641         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4642                 freemsg(mp);
4643                 /* Peer closed */
4644                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4645                     "tl_data: peer gone"));
4646                 return;
4647         }
4648 
4649         ASSERT(tep->te_serializer == peer_tep->te_serializer);
4650         peer_rq = peer_tep->te_rq;
4651 
4652         /*
4653          * Put it back if flow controlled
4654          * Note: Messages already on queue when we are closing is bounded
4655          * so we can ignore flow control.
4656          */
4657         if (!canputnext(peer_rq) && !closing) {
4658                 TL_PUTBQ(tep, mp);
4659                 return;
4660         }
4661 
4662         /*
4663          * validate peer state
4664          */
4665         switch (peer_tep->te_state) {
4666         case TS_DATA_XFER:
4667         case TS_WIND_ORDREL:
4668                 /* valid states */
4669                 break;
4670         default:
4671                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4672                     "tl_data:rx side:invalid state"));
4673                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4674                 return;
4675         }
4676         if (DB_TYPE(mp) == M_PROTO) {
4677                 /* reuse message block - just change REQ to IND */
4678                 if (prim->type == T_DATA_REQ)
4679                         prim->type = T_DATA_IND;
4680                 else
4681                         prim->type = T_OPTDATA_IND;
4682         }
4683         /*
4684          * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4685          * (peer state stays same on this event)
4686          */
4687         /*
4688          * send data to connected peer
4689          */
4690         putnext(peer_rq, mp);
4691 }
4692 
4693 
4694 
4695 static void
4696 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4697 {
4698         queue_t                 *wq = tep->te_wq;
4699         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4700         ssize_t                 msz = MBLKL(mp);
4701         tl_endpt_t              *peer_tep;
4702         queue_t                 *peer_rq;
4703         boolean_t               closing = tep->te_closing;
4704 
4705         if (msz < sizeof (struct T_exdata_req)) {
4706                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4707                     "tl_exdata:invalid message"));
4708                 if (!closing) {
4709                         tl_merror(wq, mp, EPROTO);
4710                 } else {
4711                         freemsg(mp);
4712                 }
4713                 return;
4714         }
4715 
4716         /*
4717          * If the endpoint is closing it should still forward any data to the
4718          * peer (if it has one). If it is not allowed to forward it can just
4719          * free the message.
4720          */
4721         if (closing &&
4722             (tep->te_state != TS_DATA_XFER) &&
4723             (tep->te_state != TS_WREQ_ORDREL)) {
4724                 freemsg(mp);
4725                 return;
4726         }
4727 
4728         /*
4729          * validate state
4730          */
4731         switch (tep->te_state) {
4732         case TS_IDLE:
4733                 /*
4734                  * Other end not here - do nothing.
4735                  */
4736                 freemsg(mp);
4737                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4738                     "tl_exdata:cots with endpoint idle"));
4739                 return;
4740 
4741         case TS_DATA_XFER:
4742                 /* valid states */
4743                 if (tep->te_conp != NULL)
4744                         break;
4745 
4746                 if (tep->te_oconp == NULL) {
4747                         if (!closing) {
4748                                 tl_merror(wq, mp, EPROTO);
4749                         } else {
4750                                 freemsg(mp);
4751                         }
4752                         return;
4753                 }
4754                 /*
4755                  * For a socket the T_CONN_CON is sent early thus
4756                  * the peer might not yet have accepted the connection.
4757                  * If we are closing queue the packet with the T_CONN_IND.
4758                  * Otherwise defer processing the packet until the peer
4759                  * accepts the connection.
4760                  * Note that the queue is noenabled when we go into this
4761                  * state.
4762                  */
4763                 if (!closing) {
4764                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4765                             SL_TRACE | SL_ERROR,
4766                             "tl_exdata: ocon"));
4767                         TL_PUTBQ(tep, mp);
4768                         return;
4769                 }
4770                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4771                     "tl_exdata: closing socket ocon"));
4772                 prim->type = T_EXDATA_IND;
4773                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4774                 return;
4775 
4776         case TS_WREQ_ORDREL:
4777                 if (tep->te_conp == NULL) {
4778                         /*
4779                          * Other end closed - generate discon_ind
4780                          * with reason 0 to cause an EPIPE but no
4781                          * read side error on AF_UNIX sockets.
4782                          */
4783                         freemsg(mp);
4784                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4785                             SL_TRACE | SL_ERROR,
4786                             "tl_exdata: WREQ_ORDREL and no peer"));
4787                         tl_discon_ind(tep, 0);
4788                         return;
4789                 }
4790                 break;
4791 
4792         default:
4793                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4794                     SL_TRACE | SL_ERROR,
4795                     "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4796                     tep->te_state));
4797                 tl_merror(wq, mp, EPROTO);
4798                 return;
4799         }
4800         /*
4801          * tep->te_state = nextstate[TE_EXDATA_REQ][tep->te_state];
4802          * (state stays same on this event)
4803          */
4804 
4805         /*
4806          * get connected endpoint
4807          */
4808         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4809                 freemsg(mp);
4810                 /* Peer closed */
4811                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4812                     "tl_exdata: peer gone"));
4813                 return;
4814         }
4815 
4816         peer_rq = peer_tep->te_rq;
4817 
4818         /*
4819          * Put it back if flow controlled
4820          * Note: Messages already on queue when we are closing is bounded
4821          * so we can ignore flow control.
4822          */
4823         if (!canputnext(peer_rq) && !closing) {
4824                 TL_PUTBQ(tep, mp);
4825                 return;
4826         }
4827 
4828         /*
4829          * validate state on peer
4830          */
4831         switch (peer_tep->te_state) {
4832         case TS_DATA_XFER:
4833         case TS_WIND_ORDREL:
4834                 /* valid states */
4835                 break;
4836         default:
4837                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4838                     "tl_exdata:rx side:invalid state"));
4839                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4840                 return;
4841         }
4842         /*
4843          * peer_tep->te_state = nextstate[TE_DATA_IND][peer_tep->te_state];
4844          * (peer state stays same on this event)
4845          */
4846         /*
4847          * reuse message block
4848          */
4849         prim->type = T_EXDATA_IND;
4850 
4851         /*
4852          * send data to connected peer
4853          */
4854         putnext(peer_rq, mp);
4855 }
4856 
4857 
4858 
4859 static void
4860 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4861 {
4862         queue_t                 *wq = tep->te_wq;
4863         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4864         ssize_t                 msz = MBLKL(mp);
4865         tl_endpt_t              *peer_tep;
4866         queue_t                 *peer_rq;
4867         boolean_t               closing = tep->te_closing;
4868 
4869         if (msz < sizeof (struct T_ordrel_req)) {
4870                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4871                     "tl_ordrel:invalid message"));
4872                 if (!closing) {
4873                         tl_merror(wq, mp, EPROTO);
4874                 } else {
4875                         freemsg(mp);
4876                 }
4877                 return;
4878         }
4879 
4880         /*
4881          * validate state
4882          */
4883         switch (tep->te_state) {
4884         case TS_DATA_XFER:
4885         case TS_WREQ_ORDREL:
4886                 /* valid states */
4887                 if (tep->te_conp != NULL)
4888                         break;
4889 
4890                 if (tep->te_oconp == NULL)
4891                         break;
4892 
4893                 /*
4894                  * For a socket the T_CONN_CON is sent early thus
4895                  * the peer might not yet have accepted the connection.
4896                  * If we are closing queue the packet with the T_CONN_IND.
4897                  * Otherwise defer processing the packet until the peer
4898                  * accepts the connection.
4899                  * Note that the queue is noenabled when we go into this
4900                  * state.
4901                  */
4902                 if (!closing) {
4903                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4904                             SL_TRACE | SL_ERROR,
4905                             "tl_ordlrel: ocon"));
4906                         TL_PUTBQ(tep, mp);
4907                         return;
4908                 }
4909                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4910                     "tl_ordlrel: closing socket ocon"));
4911                 prim->type = T_ORDREL_IND;
4912                 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4913                 return;
4914 
4915         default:
4916                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4917                     SL_TRACE | SL_ERROR,
4918                     "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4919                     tep->te_state));
4920                 if (!closing) {
4921                         tl_merror(wq, mp, EPROTO);
4922                 } else {
4923                         freemsg(mp);
4924                 }
4925                 return;
4926         }
4927         tep->te_state = nextstate[TE_ORDREL_REQ][tep->te_state];
4928 
4929         /*
4930          * get connected endpoint
4931          */
4932         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4933                 /* Peer closed */
4934                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4935                     "tl_ordrel: peer gone"));
4936                 freemsg(mp);
4937                 return;
4938         }
4939 
4940         peer_rq = peer_tep->te_rq;
4941 
4942         /*
4943          * Put it back if flow controlled except when we are closing.
4944          * Note: Messages already on queue when we are closing is bounded
4945          * so we can ignore flow control.
4946          */
4947         if (!canputnext(peer_rq) && !closing) {
4948                 TL_PUTBQ(tep, mp);
4949                 return;
4950         }
4951 
4952         /*
4953          * validate state on peer
4954          */
4955         switch (peer_tep->te_state) {
4956         case TS_DATA_XFER:
4957         case TS_WIND_ORDREL:
4958                 /* valid states */
4959                 break;
4960         default:
4961                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4962                     "tl_ordrel:rx side:invalid state"));
4963                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4964                 return;
4965         }
4966         peer_tep->te_state = nextstate[TE_ORDREL_IND][peer_tep->te_state];
4967 
4968         /*
4969          * reuse message block
4970          */
4971         prim->type = T_ORDREL_IND;
4972         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4973             "tl_ordrel: send ordrel_ind"));
4974 
4975         /*
4976          * send data to connected peer
4977          */
4978         putnext(peer_rq, mp);
4979 }
4980 
4981 
4982 /*
4983  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4984  */
4985 static void
4986 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4987 {
4988         size_t                  err_sz;
4989         tl_endpt_t              *tep;
4990         struct T_unitdata_req   *udreq;
4991         mblk_t                  *err_mp;
4992         t_scalar_t              alen;
4993         t_scalar_t              olen;
4994         struct T_uderror_ind    *uderr;
4995         uchar_t                 *addr_startp;
4996 
4997         err_sz = sizeof (struct T_uderror_ind);
4998         tep = (tl_endpt_t *)wq->q_ptr;
4999         udreq = (struct T_unitdata_req *)mp->b_rptr;
5000         alen = udreq->DEST_length;
5001         olen = udreq->OPT_length;
5002 
5003         if (alen > 0)
5004                 err_sz = T_ALIGN(err_sz + alen);
5005         if (olen > 0)
5006                 err_sz += olen;
5007 
5008         err_mp = allocb(err_sz, BPRI_MED);
5009         if (err_mp == NULL) {
5010                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5011                     "tl_uderr:allocb failure"));
5012                 /*
5013                  * Note: no rollback of state needed as it does
5014                  * not change in connectionless transport
5015                  */
5016                 tl_memrecover(wq, mp, err_sz);
5017                 return;
5018         }
5019 
5020         DB_TYPE(err_mp) = M_PROTO;
5021         err_mp->b_wptr = err_mp->b_rptr + err_sz;
5022         uderr = (struct T_uderror_ind *)err_mp->b_rptr;
5023         uderr->PRIM_type = T_UDERROR_IND;
5024         uderr->ERROR_type = err;
5025         uderr->DEST_length = alen;
5026         uderr->OPT_length = olen;
5027         if (alen <= 0) {
5028                 uderr->DEST_offset = 0;
5029         } else {
5030                 uderr->DEST_offset =
5031                     (t_scalar_t)sizeof (struct T_uderror_ind);
5032                 addr_startp = mp->b_rptr + udreq->DEST_offset;
5033                 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
5034                     (size_t)alen);
5035         }
5036         if (olen <= 0) {
5037                 uderr->OPT_offset = 0;
5038         } else {
5039                 uderr->OPT_offset =
5040                     (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
5041                     uderr->DEST_length);
5042                 addr_startp = mp->b_rptr + udreq->OPT_offset;
5043                 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
5044                     (size_t)olen);
5045         }
5046         freemsg(mp);
5047 
5048         /*
5049          * send indication message
5050          */
5051         tep->te_state = nextstate[TE_UDERROR_IND][tep->te_state];
5052 
5053         qreply(wq, err_mp);
5054 }
5055 
5056 static void
5057 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
5058 {
5059         queue_t *wq = tep->te_wq;
5060 
5061         if (!tep->te_closing && (wq->q_first != NULL)) {
5062                 TL_PUTQ(tep, mp);
5063         } else {
5064                 if (tep->te_rq != NULL)
5065                         tl_unitdata(mp, tep);
5066                 else
5067                         freemsg(mp);
5068         }
5069 
5070         tl_serializer_exit(tep);
5071         tl_refrele(tep);
5072 }
5073 
5074 /*
5075  * Handle T_unitdata_req.
5076  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5077  * If this is a socket pass through options unmodified.
5078  */
5079 static void
5080 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5081 {
5082         queue_t                 *wq = tep->te_wq;
5083         soux_addr_t             ux_addr;
5084         tl_addr_t               destaddr;
5085         uchar_t                 *addr_startp;
5086         tl_endpt_t              *peer_tep;
5087         struct T_unitdata_ind   *udind;
5088         struct T_unitdata_req   *udreq;
5089         ssize_t                 msz, ui_sz, reuse_mb_sz;
5090         t_scalar_t              alen, aoff, olen, ooff;
5091         t_scalar_t              oldolen = 0;
5092         cred_t                  *cr = NULL;
5093         pid_t                   cpid;
5094 
5095         udreq = (struct T_unitdata_req *)mp->b_rptr;
5096         msz = MBLKL(mp);
5097 
5098         /*
5099          * validate the state
5100          */
5101         if (tep->te_state != TS_IDLE) {
5102                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5103                     SL_TRACE | SL_ERROR,
5104                     "tl_wput:T_CONN_REQ:out of state"));
5105                 tl_merror(wq, mp, EPROTO);
5106                 return;
5107         }
5108         /*
5109          * tep->te_state = nextstate[TE_UNITDATA_REQ][tep->te_state];
5110          * (state does not change on this event)
5111          */
5112 
5113         /*
5114          * validate the message
5115          * Note: dereference fields in struct inside message only
5116          * after validating the message length.
5117          */
5118         if (msz < sizeof (struct T_unitdata_req)) {
5119                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5120                     "tl_unitdata:invalid message length"));
5121                 tl_merror(wq, mp, EINVAL);
5122                 return;
5123         }
5124         alen = udreq->DEST_length;
5125         aoff = udreq->DEST_offset;
5126         oldolen = olen = udreq->OPT_length;
5127         ooff = udreq->OPT_offset;
5128         if (olen == 0)
5129                 ooff = 0;
5130 
5131         if (IS_SOCKET(tep)) {
5132                 if ((alen != TL_SOUX_ADDRLEN) ||
5133                     (aoff < 0) ||
5134                     (aoff + alen > msz) ||
5135                     (olen < 0) || (ooff < 0) ||
5136                     ((olen > 0) && ((ooff + olen) > msz))) {
5137                         (void) (STRLOG(TL_ID, tep->te_minor,
5138                             1, SL_TRACE | SL_ERROR,
5139                             "tl_unitdata_req: invalid socket addr "
5140                             "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5141                             (int)msz, alen, aoff, olen, ooff));
5142                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5143                         return;
5144                 }
5145                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5146 
5147                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5148                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5149                         (void) (STRLOG(TL_ID, tep->te_minor,
5150                             1, SL_TRACE | SL_ERROR,
5151                             "tl_conn_req: invalid socket magic"));
5152                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5153                         return;
5154                 }
5155         } else {
5156                 if ((alen < 0) ||
5157                     (aoff < 0) ||
5158                     ((alen > 0) && ((aoff + alen) > msz)) ||
5159                     ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5160                     ((aoff + alen) < 0) ||
5161                     ((olen > 0) && ((ooff + olen) > msz)) ||
5162                     (olen < 0) ||
5163                     (ooff < 0) ||
5164                     ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5165                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5166                                     SL_TRACE | SL_ERROR,
5167                                     "tl_unitdata:invalid unit data message"));
5168                         tl_merror(wq, mp, EINVAL);
5169                         return;
5170                 }
5171         }
5172 
5173         /* Options not supported unless it's a socket */
5174         if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5175                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5176                     "tl_unitdata:option use(unsupported) or zero len addr"));
5177                 tl_uderr(wq, mp, EPROTO);
5178                 return;
5179         }
5180 #ifdef DEBUG
5181         /*
5182          * Mild form of ASSERT()ion to detect broken TPI apps.
5183          * if (!assertion)
5184          *      log warning;
5185          */
5186         if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5187                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5188                     "tl_unitdata:addr overlaps TPI message"));
5189         }
5190 #endif
5191         /*
5192          * get destination endpoint
5193          */
5194         destaddr.ta_alen = alen;
5195         destaddr.ta_abuf = mp->b_rptr + aoff;
5196         destaddr.ta_zoneid = tep->te_zoneid;
5197 
5198         /*
5199          * Check whether the destination is the same that was used previously
5200          * and the destination endpoint is in the right state. If something is
5201          * wrong, find destination again and cache it.
5202          */
5203         peer_tep = tep->te_lastep;
5204 
5205         if ((peer_tep == NULL) || peer_tep->te_closing ||
5206             (peer_tep->te_state != TS_IDLE) ||
5207             !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5208                 /*
5209                  * Not the same as cached destination , need to find the right
5210                  * destination.
5211                  */
5212                 peer_tep = (IS_SOCKET(tep) ?
5213                     tl_sock_find_peer(tep, &ux_addr) :
5214                     tl_find_peer(tep, &destaddr));
5215 
5216                 if (peer_tep == NULL) {
5217                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5218                             SL_TRACE | SL_ERROR,
5219                             "tl_unitdata:no one at destination address"));
5220                         tl_uderr(wq, mp, ECONNRESET);
5221                         return;
5222                 }
5223 
5224                 /*
5225                  * Cache the new peer.
5226                  */
5227                 if (tep->te_lastep != NULL)
5228                         tl_refrele(tep->te_lastep);
5229 
5230                 tep->te_lastep = peer_tep;
5231         }
5232 
5233         if (peer_tep->te_state != TS_IDLE) {
5234                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5235                     "tl_unitdata:provider in invalid state"));
5236                 tl_uderr(wq, mp, EPROTO);
5237                 return;
5238         }
5239 
5240         ASSERT(peer_tep->te_rq != NULL);
5241 
5242         /*
5243          * Put it back if flow controlled except when we are closing.
5244          * Note: Messages already on queue when we are closing is bounded
5245          * so we can ignore flow control.
5246          */
5247         if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5248                 /* record what we are flow controlled on */
5249                 if (tep->te_flowq != NULL) {
5250                         list_remove(&tep->te_flowq->te_flowlist, tep);
5251                 }
5252                 list_insert_head(&peer_tep->te_flowlist, tep);
5253                 tep->te_flowq = peer_tep;
5254                 TL_PUTBQ(tep, mp);
5255                 return;
5256         }
5257         /*
5258          * prepare indication message
5259          */
5260 
5261         /*
5262          * calculate length of message
5263          */
5264         if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5265                 cr = msg_getcred(mp, &cpid);
5266                 ASSERT(cr != NULL);
5267 
5268                 if (peer_tep->te_flag & TL_SETCRED) {
5269                         ASSERT(olen == 0);
5270                         olen = (t_scalar_t)sizeof (struct opthdr) +
5271                             OPTLEN(sizeof (tl_credopt_t));
5272                                                 /* 1 option only */
5273                 } else if (peer_tep->te_flag & TL_SETUCRED) {
5274                         ASSERT(olen == 0);
5275                         olen = (t_scalar_t)sizeof (struct opthdr) +
5276                             OPTLEN(ucredminsize(cr));
5277                                                 /* 1 option only */
5278                 } else {
5279                         /* Possibly more than one option */
5280                         olen += (t_scalar_t)sizeof (struct T_opthdr) +
5281                             OPTLEN(ucredminsize(cr));
5282                 }
5283         }
5284 
5285         ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5286         reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5287 
5288         /*
5289          * If the unitdata_ind fits and we are not adding options
5290          * reuse the udreq mblk.
5291          *
5292          * Otherwise, it is possible we need to append an option if one of the
5293          * te_flag bits is set. This requires extra space in the data block for
5294          * the additional option but the traditional technique used below to
5295          * allocate a new block and copy into it will not work when there is a
5296          * message block with a free pointer (since we don't know anything
5297          * about the layout of the data, pointers referencing or within the
5298          * data, etc.). To handle this possibility the upper layers may have
5299          * preallocated some space to use for appending an option. We check the
5300          * overall mblock size against the size we need ('reuse_mb_sz' with the
5301          * original address length [alen] to ensure we won't overrun the
5302          * current mblk data size) to see if there is free space and thus
5303          * avoid allocating a new message block.
5304          */
5305         if (msz >= ui_sz && alen >= tep->te_alen &&
5306             !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5307                 /*
5308                  * Reuse the original mblk. Leave options in place.
5309                  */
5310                 udind = (struct T_unitdata_ind *)mp->b_rptr;
5311                 udind->PRIM_type = T_UNITDATA_IND;
5312                 udind->SRC_length = tep->te_alen;
5313                 addr_startp = mp->b_rptr + udind->SRC_offset;
5314                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5315 
5316         } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5317             mp->b_datap->db_frtnp != NULL) {
5318                 /*
5319                  * We have a message block with a free pointer, but extra space
5320                  * has been pre-allocated for us in case we need to append an
5321                  * option. Reuse the original mblk, leaving existing options in
5322                  * place.
5323                  */
5324                 udind = (struct T_unitdata_ind *)mp->b_rptr;
5325                 udind->PRIM_type = T_UNITDATA_IND;
5326                 udind->SRC_length = tep->te_alen;
5327                 addr_startp = mp->b_rptr + udind->SRC_offset;
5328                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5329 
5330                 if (peer_tep->te_flag &
5331                     (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5332                         ASSERT(cr != NULL);
5333                         /*
5334                          * We're appending one new option here after the
5335                          * original ones.
5336                          */
5337                         tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5338                             cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5339                 }
5340 
5341         } else if (mp->b_datap->db_frtnp != NULL) {
5342                 /*
5343                  * The next block creates a new mp and tries to copy the data
5344                  * block into it, but that cannot handle a message with a free
5345                  * pointer (for more details see the comment in kstrputmsg()
5346                  * where dupmsg() is called). Since we can never properly
5347                  * duplicate the mp while also extending the data, just error
5348                  * out now.
5349                  */
5350                 tl_uderr(wq, mp, EPROTO);
5351                 return;
5352         } else {
5353                 /* Allocate a new T_unitdata_ind message */
5354                 mblk_t *ui_mp;
5355 
5356                 ui_mp = allocb(ui_sz, BPRI_MED);
5357                 if (ui_mp == NULL) {
5358                         (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5359                             "tl_unitdata:allocb failure:message queued"));
5360                         tl_memrecover(wq, mp, ui_sz);
5361                         return;
5362                 }
5363 
5364                 /*
5365                  * fill in T_UNITDATA_IND contents
5366                  */
5367                 DB_TYPE(ui_mp) = M_PROTO;
5368                 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5369                 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5370                 udind->PRIM_type = T_UNITDATA_IND;
5371                 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5372                 udind->SRC_length = tep->te_alen;
5373                 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5374                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5375                 udind->OPT_offset =
5376                     (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5377                 udind->OPT_length = olen;
5378                 if (peer_tep->te_flag &
5379                     (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5380 
5381                         if (oldolen != 0) {
5382                                 bcopy((void *)((uintptr_t)udreq + ooff),
5383                                     (void *)((uintptr_t)udind +
5384                                     udind->OPT_offset),
5385                                     oldolen);
5386                         }
5387                         ASSERT(cr != NULL);
5388 
5389                         tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5390                             oldolen, cr, cpid,
5391                             peer_tep->te_flag, peer_tep->te_credp);
5392                 } else {
5393                         bcopy((void *)((uintptr_t)udreq + ooff),
5394                             (void *)((uintptr_t)udind + udind->OPT_offset),
5395                             olen);
5396                 }
5397 
5398                 /*
5399                  * relink data blocks from mp to ui_mp
5400                  */
5401                 ui_mp->b_cont = mp->b_cont;
5402                 freeb(mp);
5403                 mp = ui_mp;
5404         }
5405         /*
5406          * send indication message
5407          */
5408         peer_tep->te_state = nextstate[TE_UNITDATA_IND][peer_tep->te_state];
5409         putnext(peer_tep->te_rq, mp);
5410 }
5411 
5412 
5413 
5414 /*
5415  * Check if a given addr is in use.
5416  * Endpoint ptr returned or NULL if not found.
5417  * The name space is separate for each mode. This implies that
5418  * sockets get their own name space.
5419  */
5420 static tl_endpt_t *
5421 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5422 {
5423         tl_endpt_t *peer_tep = NULL;
5424         int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5425             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5426 
5427         ASSERT(!IS_SOCKET(tep));
5428 
5429         ASSERT(ap != NULL && ap->ta_alen > 0);
5430         ASSERT(ap->ta_zoneid == tep->te_zoneid);
5431         ASSERT(ap->ta_abuf != NULL);
5432         EQUIV(rc == 0, peer_tep != NULL);
5433         IMPLY(rc == 0,
5434             (tep->te_zoneid == peer_tep->te_zoneid) &&
5435             (tep->te_transport == peer_tep->te_transport));
5436 
5437         if ((rc == 0) && (peer_tep->te_closing)) {
5438                 tl_refrele(peer_tep);
5439                 peer_tep = NULL;
5440         }
5441 
5442         return (peer_tep);
5443 }
5444 
5445 /*
5446  * Find peer for a socket based on unix domain address.
5447  * For implicit addresses our peer can be found by minor number in ai hash. For
5448  * explicit binds we look vnode address at addr_hash.
5449  */
5450 static tl_endpt_t *
5451 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5452 {
5453         tl_endpt_t *peer_tep = NULL;
5454         mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5455             tep->te_aihash : tep->te_addrhash;
5456         int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5457             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5458 
5459         ASSERT(IS_SOCKET(tep));
5460         EQUIV(rc == 0, peer_tep != NULL);
5461         IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5462 
5463         if (peer_tep != NULL) {
5464                 /* Don't attempt to use closing peer. */
5465                 if (peer_tep->te_closing)
5466                         goto errout;
5467 
5468                 /*
5469                  * Cross-zone unix sockets are permitted, but for Trusted
5470                  * Extensions only, the "server" for these must be in the
5471                  * global zone.
5472                  */
5473                 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5474                     is_system_labeled() &&
5475                     (peer_tep->te_zoneid != GLOBAL_ZONEID))
5476                         goto errout;
5477         }
5478 
5479         return (peer_tep);
5480 
5481 errout:
5482         tl_refrele(peer_tep);
5483         return (NULL);
5484 }
5485 
5486 /*
5487  * Generate a free addr and return it in struct pointed by ap
5488  * but allocating space for address buffer.
5489  * The generated address will be at least 4 bytes long and, if req->ta_alen
5490  * exceeds 4 bytes, be req->ta_alen bytes long.
5491  *
5492  * If address is found it will be inserted in the hash.
5493  *
5494  * If req->ta_alen is larger than the default alen (4 bytes) the last
5495  * alen-4 bytes will always be the same as in req.
5496  *
5497  * Return 0 for failure.
5498  * Return non-zero for success.
5499  */
5500 static boolean_t
5501 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5502 {
5503         t_scalar_t      alen;
5504         uint32_t        loopcnt;        /* Limit loop to 2^32 */
5505 
5506         ASSERT(tep->te_hash_hndl != NULL);
5507         ASSERT(!IS_SOCKET(tep));
5508 
5509         if (tep->te_hash_hndl == NULL)
5510                 return (B_FALSE);
5511 
5512         /*
5513          * check if default addr is in use
5514          * if it is - bump it and try again
5515          */
5516         if (req == NULL) {
5517                 alen = sizeof (uint32_t);
5518         } else {
5519                 alen = max(req->ta_alen, sizeof (uint32_t));
5520                 ASSERT(tep->te_zoneid == req->ta_zoneid);
5521         }
5522 
5523         if (tep->te_alen < alen) {
5524                 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5525 
5526                 /*
5527                  * Not enough space in tep->ta_ap to hold the address,
5528                  * allocate a bigger space.
5529                  */
5530                 if (abuf == NULL)
5531                         return (B_FALSE);
5532 
5533                 if (tep->te_alen > 0)
5534                         kmem_free(tep->te_abuf, tep->te_alen);
5535 
5536                 tep->te_alen = alen;
5537                 tep->te_abuf = abuf;
5538         }
5539 
5540         /* Copy in the address in req */
5541         if (req != NULL) {
5542                 ASSERT(alen >= req->ta_alen);
5543                 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5544         }
5545 
5546         /*
5547          * First try minor number then try default addresses.
5548          */
5549         bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5550 
5551         for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5552                 if (mod_hash_insert_reserve(tep->te_addrhash,
5553                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5554                     tep->te_hash_hndl) == 0) {
5555                         /*
5556                          * found free address
5557                          */
5558                         tep->te_flag |= TL_ADDRHASHED;
5559                         tep->te_hash_hndl = NULL;
5560 
5561                         return (B_TRUE); /* successful return */
5562                 }
5563                 /*
5564                  * Use default address.
5565                  */
5566                 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5567                 atomic_inc_32(&tep->te_defaddr);
5568         }
5569 
5570         /*
5571          * Failed to find anything.
5572          */
5573         (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5574             "tl_get_any_addr:looped 2^32 times"));
5575         return (B_FALSE);
5576 }
5577 
5578 /*
5579  * reallocb + set r/w ptrs to reflect size.
5580  */
5581 static mblk_t *
5582 tl_resizemp(mblk_t *mp, ssize_t new_size)
5583 {
5584         if ((mp = reallocb(mp, new_size, 0)) == NULL)
5585                 return (NULL);
5586 
5587         mp->b_rptr = DB_BASE(mp);
5588         mp->b_wptr = mp->b_rptr + new_size;
5589         return (mp);
5590 }
5591 
5592 static void
5593 tl_cl_backenable(tl_endpt_t *tep)
5594 {
5595         list_t *l = &tep->te_flowlist;
5596         tl_endpt_t *elp;
5597 
5598         ASSERT(IS_CLTS(tep));
5599 
5600         for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5601                 ASSERT(tep->te_ser == elp->te_ser);
5602                 ASSERT(elp->te_flowq == tep);
5603                 if (!elp->te_closing)
5604                         TL_QENABLE(elp);
5605                 elp->te_flowq = NULL;
5606                 list_remove(l, elp);
5607         }
5608 }
5609 
5610 /*
5611  * Unconnect endpoints.
5612  */
5613 static void
5614 tl_co_unconnect(tl_endpt_t *tep)
5615 {
5616         tl_endpt_t      *peer_tep = tep->te_conp;
5617         tl_endpt_t      *srv_tep = tep->te_oconp;
5618         list_t          *l;
5619         tl_icon_t       *tip;
5620         tl_endpt_t      *cl_tep;
5621         mblk_t          *d_mp;
5622 
5623         ASSERT(IS_COTS(tep));
5624         /*
5625          * If our peer is closing, don't use it.
5626          */
5627         if ((peer_tep != NULL) && peer_tep->te_closing) {
5628                 TL_UNCONNECT(tep->te_conp);
5629                 peer_tep = NULL;
5630         }
5631         if ((srv_tep != NULL) && srv_tep->te_closing) {
5632                 TL_UNCONNECT(tep->te_oconp);
5633                 srv_tep = NULL;
5634         }
5635 
5636         if (tep->te_nicon > 0) {
5637                 l = &tep->te_iconp;
5638                 /*
5639                  * If incoming requests pending, change state
5640                  * of clients on disconnect ind event and send
5641                  * discon_ind pdu to modules above them
5642                  * for server: all clients get disconnect
5643                  */
5644 
5645                 while (tep->te_nicon > 0) {
5646                         tip    = list_head(l);
5647                         cl_tep = tip->ti_tep;
5648 
5649                         if (cl_tep == NULL) {
5650                                 tl_freetip(tep, tip);
5651                                 continue;
5652                         }
5653 
5654                         if (cl_tep->te_oconp != NULL) {
5655                                 ASSERT(cl_tep != cl_tep->te_oconp);
5656                                 TL_UNCONNECT(cl_tep->te_oconp);
5657                         }
5658 
5659                         if (cl_tep->te_closing) {
5660                                 tl_freetip(tep, tip);
5661                                 continue;
5662                         }
5663 
5664                         enableok(cl_tep->te_wq);
5665                         TL_QENABLE(cl_tep);
5666                         d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5667                         if (d_mp != NULL) {
5668                                 cl_tep->te_state = TS_IDLE;
5669                                 putnext(cl_tep->te_rq, d_mp);
5670                         } else {
5671                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5672                                     SL_TRACE | SL_ERROR,
5673                                     "tl_co_unconnect:icmng: "
5674                                     "allocb failure"));
5675                         }
5676                         tl_freetip(tep, tip);
5677                 }
5678         } else if (srv_tep != NULL) {
5679                 /*
5680                  * If outgoing request pending, change state
5681                  * of server on discon ind event
5682                  */
5683 
5684                 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5685                     IS_COTSORD(srv_tep) &&
5686                     !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5687                         /*
5688                          * Queue ordrel_ind for server to be picked up
5689                          * when the connection is accepted.
5690                          */
5691                         d_mp = tl_ordrel_ind_alloc();
5692                 } else {
5693                         /*
5694                          * send discon_ind to server
5695                          */
5696                         d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5697                 }
5698                 if (d_mp == NULL) {
5699                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5700                             SL_TRACE | SL_ERROR,
5701                             "tl_co_unconnect:outgoing:allocb failure"));
5702                         TL_UNCONNECT(tep->te_oconp);
5703                         goto discon_peer;
5704                 }
5705 
5706                 /*
5707                  * If this is a socket the T_DISCON_IND is queued with
5708                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5709                  * from the list of pending connections.
5710                  * Note that when te_oconp is set the peer better have
5711                  * a t_connind_t for the client.
5712                  */
5713                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5714                         /*
5715                          * Queue the disconnection message.
5716                          */
5717                         tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5718                 } else {
5719                         tip = tl_icon_find(srv_tep, tep->te_seqno);
5720                         if (tip == NULL) {
5721                                 freemsg(d_mp);
5722                         } else {
5723                                 ASSERT(tep == tip->ti_tep);
5724                                 ASSERT(tep->te_ser == srv_tep->te_ser);
5725                                 /*
5726                                  * Delete tip from the server list.
5727                                  */
5728                                 if (srv_tep->te_nicon == 1) {
5729                                         srv_tep->te_state =
5730                                             nextstate[TE_DISCON_IND2]
5731                                             [srv_tep->te_state];
5732                                 } else {
5733                                         srv_tep->te_state =
5734                                             nextstate[TE_DISCON_IND3]
5735                                             [srv_tep->te_state];
5736                                 }
5737                                 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5738                                     T_DISCON_IND);
5739                                 putnext(srv_tep->te_rq, d_mp);
5740                                 tl_freetip(srv_tep, tip);
5741                         }
5742                         TL_UNCONNECT(tep->te_oconp);
5743                         srv_tep = NULL;
5744                 }
5745         } else if (peer_tep != NULL) {
5746                 /*
5747                  * unconnect existing connection
5748                  * If connected, change state of peer on
5749                  * discon ind event and send discon ind pdu
5750                  * to module above it
5751                  */
5752 
5753                 ASSERT(tep->te_ser == peer_tep->te_ser);
5754                 if (IS_COTSORD(peer_tep) &&
5755                     (peer_tep->te_state == TS_WIND_ORDREL ||
5756                     peer_tep->te_state == TS_DATA_XFER)) {
5757                         /*
5758                          * send ordrel ind
5759                          */
5760                         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5761                         "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5762                             peer_tep->te_state,
5763                             nextstate[TE_ORDREL_IND][peer_tep->te_state]));
5764                         d_mp = tl_ordrel_ind_alloc();
5765                         if (d_mp == NULL) {
5766                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5767                                     SL_TRACE | SL_ERROR,
5768                                     "tl_co_unconnect:connected:"
5769                                     "allocb failure"));
5770                                 /*
5771                                  * Continue with cleaning up peer as
5772                                  * this side may go away with the close
5773                                  */
5774                                 TL_QENABLE(peer_tep);
5775                                 goto discon_peer;
5776                         }
5777                         peer_tep->te_state =
5778                             nextstate[TE_ORDREL_IND][peer_tep->te_state];
5779 
5780                         putnext(peer_tep->te_rq, d_mp);
5781                         /*
5782                          * Handle flow control case.  This will generate
5783                          * a t_discon_ind message with reason 0 if there
5784                          * is data queued on the write side.
5785                          */
5786                         TL_QENABLE(peer_tep);
5787                 } else if (IS_COTSORD(peer_tep) &&
5788                     peer_tep->te_state == TS_WREQ_ORDREL) {
5789                         /*
5790                          * Sent an ordrel_ind. We send a discon with
5791                          * with error 0 to inform that the peer is gone.
5792                          */
5793                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5794                             SL_TRACE | SL_ERROR,
5795                             "tl_co_unconnect: discon in state %d",
5796                             tep->te_state));
5797                         tl_discon_ind(peer_tep, 0);
5798                 } else {
5799                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5800                             SL_TRACE | SL_ERROR,
5801                             "tl_co_unconnect: state %d", tep->te_state));
5802                         tl_discon_ind(peer_tep, ECONNRESET);
5803                 }
5804 
5805 discon_peer:
5806                 /*
5807                  * Disconnect cross-pointers only for close
5808                  */
5809                 if (tep->te_closing) {
5810                         peer_tep = tep->te_conp;
5811                         TL_REMOVE_PEER(peer_tep->te_conp);
5812                         TL_REMOVE_PEER(tep->te_conp);
5813                 }
5814         }
5815 }
5816 
5817 /*
5818  * Note: The following routine does not recover from allocb()
5819  * failures
5820  * The reason should be from the <sys/errno.h> space.
5821  */
5822 static void
5823 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5824 {
5825         mblk_t *d_mp;
5826 
5827         if (tep->te_closing)
5828                 return;
5829 
5830         /*
5831          * flush the queues.
5832          */
5833         flushq(tep->te_rq, FLUSHDATA);
5834         (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5835 
5836         /*
5837          * send discon ind
5838          */
5839         d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5840         if (d_mp == NULL) {
5841                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5842                     "tl_discon_ind:allocb failure"));
5843                 return;
5844         }
5845         tep->te_state = TS_IDLE;
5846         putnext(tep->te_rq, d_mp);
5847 }
5848 
5849 /*
5850  * Note: The following routine does not recover from allocb()
5851  * failures
5852  * The reason should be from the <sys/errno.h> space.
5853  */
5854 static mblk_t *
5855 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5856 {
5857         mblk_t *mp;
5858         struct T_discon_ind *tdi;
5859 
5860         if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5861                 DB_TYPE(mp) = M_PROTO;
5862                 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5863                 tdi = (struct T_discon_ind *)mp->b_rptr;
5864                 tdi->PRIM_type = T_DISCON_IND;
5865                 tdi->DISCON_reason = reason;
5866                 tdi->SEQ_number = seqnum;
5867         }
5868         return (mp);
5869 }
5870 
5871 
5872 /*
5873  * Note: The following routine does not recover from allocb()
5874  * failures
5875  */
5876 static mblk_t *
5877 tl_ordrel_ind_alloc(void)
5878 {
5879         mblk_t *mp;
5880         struct T_ordrel_ind *toi;
5881 
5882         if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5883                 DB_TYPE(mp) = M_PROTO;
5884                 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5885                 toi = (struct T_ordrel_ind *)mp->b_rptr;
5886                 toi->PRIM_type = T_ORDREL_IND;
5887         }
5888         return (mp);
5889 }
5890 
5891 
5892 /*
5893  * Lookup the seqno in the list of queued connections.
5894  */
5895 static tl_icon_t *
5896 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5897 {
5898         list_t *l = &tep->te_iconp;
5899         tl_icon_t *tip = list_head(l);
5900 
5901         ASSERT(seqno != 0);
5902 
5903         for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5904                 ;
5905 
5906         return (tip);
5907 }
5908 
5909 /*
5910  * Queue data for a given T_CONN_IND while verifying that redundant
5911  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5912  * Used when the originator of the connection closes.
5913  */
5914 static void
5915 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5916 {
5917         tl_icon_t               *tip;
5918         mblk_t                  **mpp, *mp;
5919         int                     prim, nprim;
5920 
5921         if (nmp->b_datap->db_type == M_PROTO)
5922                 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5923         else
5924                 nprim = -1;     /* M_DATA */
5925 
5926         tip = tl_icon_find(tep, seqno);
5927         if (tip == NULL) {
5928                 freemsg(nmp);
5929                 return;
5930         }
5931 
5932         ASSERT(tip->ti_seqno != 0);
5933         mpp = &tip->ti_mp;
5934         while (*mpp != NULL) {
5935                 mp = *mpp;
5936 
5937                 if (mp->b_datap->db_type == M_PROTO)
5938                         prim = ((union T_primitives *)mp->b_rptr)->type;
5939                 else
5940                         prim = -1;      /* M_DATA */
5941 
5942                 /*
5943                  * Allow nothing after a T_DISCON_IND
5944                  */
5945                 if (prim == T_DISCON_IND) {
5946                         freemsg(nmp);
5947                         return;
5948                 }
5949                 /*
5950                  * Only allow a T_DISCON_IND after an T_ORDREL_IND
5951                  */
5952                 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5953                         freemsg(nmp);
5954                         return;
5955                 }
5956                 mpp = &(mp->b_next);
5957         }
5958         *mpp = nmp;
5959 }
5960 
5961 /*
5962  * Verify if a certain TPI primitive exists on the connind queue.
5963  * Use prim -1 for M_DATA.
5964  * Return non-zero if found.
5965  */
5966 static boolean_t
5967 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5968 {
5969         tl_icon_t *tip = tl_icon_find(tep, seqno);
5970         boolean_t found = B_FALSE;
5971 
5972         if (tip != NULL) {
5973                 mblk_t *mp;
5974                 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5975                         found = (DB_TYPE(mp) == M_PROTO &&
5976                             ((union T_primitives *)mp->b_rptr)->type == prim);
5977                 }
5978         }
5979         return (found);
5980 }
5981 
5982 /*
5983  * Send the b_next mblk chain that has accumulated before the connection
5984  * was accepted. Perform the necessary state transitions.
5985  */
5986 static void
5987 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5988 {
5989         mblk_t                  *mp;
5990         union T_primitives      *primp;
5991 
5992         if (tep->te_closing) {
5993                 tl_icon_freemsgs(mpp);
5994                 return;
5995         }
5996 
5997         ASSERT(tep->te_state == TS_DATA_XFER);
5998         ASSERT(tep->te_rq->q_first == NULL);
5999 
6000         while ((mp = *mpp) != NULL) {
6001                 *mpp = mp->b_next;
6002                 mp->b_next = NULL;
6003 
6004                 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
6005                 switch (DB_TYPE(mp)) {
6006                 default:
6007                         freemsg(mp);
6008                         break;
6009                 case M_DATA:
6010                         putnext(tep->te_rq, mp);
6011                         break;
6012                 case M_PROTO:
6013                         primp = (union T_primitives *)mp->b_rptr;
6014                         switch (primp->type) {
6015                         case T_UNITDATA_IND:
6016                         case T_DATA_IND:
6017                         case T_OPTDATA_IND:
6018                         case T_EXDATA_IND:
6019                                 putnext(tep->te_rq, mp);
6020                                 break;
6021                         case T_ORDREL_IND:
6022                                 tep->te_state = nextstate[TE_ORDREL_IND]
6023                                     [tep->te_state];
6024                                 putnext(tep->te_rq, mp);
6025                                 break;
6026                         case T_DISCON_IND:
6027                                 tep->te_state = TS_IDLE;
6028                                 putnext(tep->te_rq, mp);
6029                                 break;
6030                         default:
6031 #ifdef DEBUG
6032                                 cmn_err(CE_PANIC,
6033                                     "tl_icon_sendmsgs: unknown primitive");
6034 #endif /* DEBUG */
6035                                 freemsg(mp);
6036                                 break;
6037                         }
6038                         break;
6039                 }
6040         }
6041 }
6042 
6043 /*
6044  * Free the b_next mblk chain that has accumulated before the connection
6045  * was accepted.
6046  */
6047 static void
6048 tl_icon_freemsgs(mblk_t **mpp)
6049 {
6050         mblk_t *mp;
6051 
6052         while ((mp = *mpp) != NULL) {
6053                 *mpp = mp->b_next;
6054                 mp->b_next = NULL;
6055                 freemsg(mp);
6056         }
6057 }
6058 
6059 /*
6060  * Send M_ERROR
6061  * Note: assumes caller ensured enough space in mp or enough
6062  *      memory available. Does not attempt recovery from allocb()
6063  *      failures
6064  */
6065 
6066 static void
6067 tl_merror(queue_t *wq, mblk_t *mp, int error)
6068 {
6069         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6070 
6071         if (tep->te_closing) {
6072                 freemsg(mp);
6073                 return;
6074         }
6075 
6076         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6077             SL_TRACE | SL_ERROR,
6078             "tl_merror: tep=%p, err=%d", (void *)tep, error));
6079 
6080         /*
6081          * flush all messages on queue. we are shutting
6082          * the stream down on fatal error
6083          */
6084         flushq(wq, FLUSHALL);
6085         if (IS_COTS(tep)) {
6086                 /* connection oriented - unconnect endpoints */
6087                 tl_co_unconnect(tep);
6088         }
6089         if (mp->b_cont) {
6090                 freemsg(mp->b_cont);
6091                 mp->b_cont = NULL;
6092         }
6093 
6094         if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6095                 freemsg(mp);
6096                 mp = allocb(1, BPRI_HI);
6097                 if (mp == NULL) {
6098                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6099                             SL_TRACE | SL_ERROR,
6100                             "tl_merror:M_PROTO: out of memory"));
6101                         return;
6102                 }
6103         }
6104         if (mp) {
6105                 DB_TYPE(mp) = M_ERROR;
6106                 mp->b_rptr = DB_BASE(mp);
6107                 *mp->b_rptr = (char)error;
6108                 mp->b_wptr = mp->b_rptr + sizeof (char);
6109                 qreply(wq, mp);
6110         } else {
6111                 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6112         }
6113 }
6114 
6115 static void
6116 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6117 {
6118         ASSERT(cr != NULL);
6119 
6120         if (flag & TL_SETCRED) {
6121                 struct opthdr *opt = (struct opthdr *)buf;
6122                 tl_credopt_t *tlcred;
6123 
6124                 opt->level = TL_PROT_LEVEL;
6125                 opt->name = TL_OPT_PEER_CRED;
6126                 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6127 
6128                 tlcred = (tl_credopt_t *)(opt + 1);
6129                 tlcred->tc_uid = crgetuid(cr);
6130                 tlcred->tc_gid = crgetgid(cr);
6131                 tlcred->tc_ruid = crgetruid(cr);
6132                 tlcred->tc_rgid = crgetrgid(cr);
6133                 tlcred->tc_suid = crgetsuid(cr);
6134                 tlcred->tc_sgid = crgetsgid(cr);
6135                 tlcred->tc_ngroups = crgetngroups(cr);
6136         } else if (flag & TL_SETUCRED) {
6137                 struct opthdr *opt = (struct opthdr *)buf;
6138 
6139                 opt->level = TL_PROT_LEVEL;
6140                 opt->name = TL_OPT_PEER_UCRED;
6141                 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6142 
6143                 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6144         } else {
6145                 struct T_opthdr *topt = (struct T_opthdr *)buf;
6146                 ASSERT(flag & TL_SOCKUCRED);
6147 
6148                 topt->level = SOL_SOCKET;
6149                 topt->name = SCM_UCRED;
6150                 topt->len = ucredminsize(cr) + sizeof (*topt);
6151                 topt->status = 0;
6152                 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6153         }
6154 }
6155 
6156 /* ARGSUSED */
6157 static int
6158 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6159 {
6160         /* no default value processed in protocol specific code currently */
6161         return (-1);
6162 }
6163 
6164 /* ARGSUSED */
6165 static int
6166 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6167 {
6168         int len;
6169         tl_endpt_t *tep;
6170         int *valp;
6171 
6172         tep = (tl_endpt_t *)wq->q_ptr;
6173 
6174         len = 0;
6175 
6176         /*
6177          * Assumes: option level and name sanity check done elsewhere
6178          */
6179 
6180         switch (level) {
6181         case SOL_SOCKET:
6182                 if (!IS_SOCKET(tep))
6183                         break;
6184                 switch (name) {
6185                 case SO_RECVUCRED:
6186                         len = sizeof (int);
6187                         valp = (int *)ptr;
6188                         *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6189                         break;
6190                 default:
6191                         break;
6192                 }
6193                 break;
6194         case TL_PROT_LEVEL:
6195                 switch (name) {
6196                 case TL_OPT_PEER_CRED:
6197                 case TL_OPT_PEER_UCRED:
6198                         /*
6199                          * option not supposed to retrieved directly
6200                          * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6201                          * when some internal flags set by other options
6202                          * Direct retrieval always designed to fail(ignored)
6203                          * for this option.
6204                          */
6205                         break;
6206                 }
6207         }
6208         return (len);
6209 }
6210 
6211 /* ARGSUSED */
6212 static int
6213 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6214     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6215     cred_t *cr)
6216 {
6217         int error;
6218         tl_endpt_t *tep;
6219 
6220         tep = (tl_endpt_t *)wq->q_ptr;
6221 
6222         error = 0;              /* NOERROR */
6223 
6224         /*
6225          * Assumes: option level and name sanity checks done elsewhere
6226          */
6227 
6228         switch (level) {
6229         case SOL_SOCKET:
6230                 if (!IS_SOCKET(tep)) {
6231                         error = EINVAL;
6232                         break;
6233                 }
6234                 /*
6235                  * TBD: fill in other AF_UNIX socket options and then stop
6236                  * returning error.
6237                  */
6238                 switch (name) {
6239                 case SO_RECVUCRED:
6240                         /*
6241                          * We only support this for datagram sockets;
6242                          * getpeerucred handles the connection oriented
6243                          * transports.
6244                          */
6245                         if (!IS_CLTS(tep)) {
6246                                 error = EINVAL;
6247                                 break;
6248                         }
6249                         if (*(int *)invalp == 0)
6250                                 tep->te_flag &= ~TL_SOCKUCRED;
6251                         else
6252                                 tep->te_flag |= TL_SOCKUCRED;
6253                         break;
6254                 default:
6255                         error = EINVAL;
6256                         break;
6257                 }
6258                 break;
6259         case TL_PROT_LEVEL:
6260                 switch (name) {
6261                 case TL_OPT_PEER_CRED:
6262                 case TL_OPT_PEER_UCRED:
6263                         /*
6264                          * option not supposed to be set directly
6265                          * Its value in initialized for each endpoint at
6266                          * driver open time.
6267                          * Direct setting always designed to fail for this
6268                          * option.
6269                          */
6270                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6271                             SL_TRACE | SL_ERROR,
6272                             "tl_set_opt: option is not supported"));
6273                         error = EPROTO;
6274                         break;
6275                 }
6276         }
6277         return (error);
6278 }
6279 
6280 
6281 static void
6282 tl_timer(void *arg)
6283 {
6284         queue_t *wq = arg;
6285         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6286 
6287         ASSERT(tep);
6288 
6289         tep->te_timoutid = 0;
6290 
6291         enableok(wq);
6292         /*
6293          * Note: can call wsrv directly here and save context switch
6294          * Consider change when qtimeout (not timeout) is active
6295          */
6296         qenable(wq);
6297 }
6298 
6299 static void
6300 tl_buffer(void *arg)
6301 {
6302         queue_t *wq = arg;
6303         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6304 
6305         ASSERT(tep);
6306 
6307         tep->te_bufcid = 0;
6308         tep->te_nowsrv = B_FALSE;
6309 
6310         enableok(wq);
6311         /*
6312          *  Note: can call wsrv directly here and save context switch
6313          * Consider change when qbufcall (not bufcall) is active
6314          */
6315         qenable(wq);
6316 }
6317 
6318 static void
6319 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6320 {
6321         tl_endpt_t *tep;
6322 
6323         tep = (tl_endpt_t *)wq->q_ptr;
6324 
6325         if (tep->te_closing) {
6326                 freemsg(mp);
6327                 return;
6328         }
6329         noenable(wq);
6330 
6331         (void) insq(wq, wq->q_first, mp);
6332 
6333         if (tep->te_bufcid || tep->te_timoutid) {
6334                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6335                     "tl_memrecover:recover %p pending", (void *)wq));
6336                 return;
6337         }
6338 
6339         tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6340         if (tep->te_bufcid == NULL) {
6341                 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6342                     drv_usectohz(TL_BUFWAIT));
6343         }
6344 }
6345 
6346 static void
6347 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6348 {
6349         ASSERT(tip->ti_seqno != 0);
6350 
6351         if (tip->ti_mp != NULL) {
6352                 tl_icon_freemsgs(&tip->ti_mp);
6353                 tip->ti_mp = NULL;
6354         }
6355         if (tip->ti_tep != NULL) {
6356                 tl_refrele(tip->ti_tep);
6357                 tip->ti_tep = NULL;
6358         }
6359         list_remove(&tep->te_iconp, tip);
6360         kmem_free(tip, sizeof (tl_icon_t));
6361         tep->te_nicon--;
6362 }
6363 
6364 /*
6365  * Remove address from address hash.
6366  */
6367 static void
6368 tl_addr_unbind(tl_endpt_t *tep)
6369 {
6370         tl_endpt_t *elp;
6371 
6372         if (tep->te_flag & TL_ADDRHASHED) {
6373                 if (IS_SOCKET(tep)) {
6374                         (void) mod_hash_remove(tep->te_addrhash,
6375                             (mod_hash_key_t)tep->te_vp,
6376                             (mod_hash_val_t *)&elp);
6377                         tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6378                         tep->te_magic = SOU_MAGIC_IMPLICIT;
6379                 } else {
6380                         (void) mod_hash_remove(tep->te_addrhash,
6381                             (mod_hash_key_t)&tep->te_ap,
6382                             (mod_hash_val_t *)&elp);
6383                         (void) kmem_free(tep->te_abuf, tep->te_alen);
6384                         tep->te_alen = -1;
6385                         tep->te_abuf = NULL;
6386                 }
6387                 tep->te_flag &= ~TL_ADDRHASHED;
6388         }
6389 }