1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  27  */
  28 
  29 /*
  30  * Multithreaded STREAMS Local Transport Provider.
  31  *
  32  * OVERVIEW
  33  * ========
  34  *
  35  * This driver provides TLI as well as socket semantics.  It provides
  36  * connectionless, connection oriented, and connection oriented with orderly
  37  * release transports for TLI and sockets. Each transport type has separate name
  38  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
  39  * this removes any name space conflicts when binding to socket style transport
  40  * addresses.
  41  *
  42  * NOTE: There is one exception: Socket ticots and ticotsord transports share
  43  * the same namespace. In fact, sockets always use ticotsord type transport.
  44  *
  45  * The driver mode is specified during open() by the minor number used for
  46  * open.
  47  *
  48  *  The sockets in addition have the following semantic differences:
  49  *  No support for passing up credentials (TL_SET[U]CRED).
  50  *
  51  *      Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
  52  *      from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
  53  *      T_OPTDATA_IND.
  54  *
  55  *      The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
  56  *      a T_CONN_RES is received from the acceptor. This means that a socket
  57  *      connect will complete before the peer has called accept.
  58  *
  59  *
  60  * MULTITHREADING
  61  * ==============
  62  *
  63  * The driver does not use STREAMS protection mechanisms. Instead it uses a
  64  * generic "serializer" abstraction. Most of the operations are executed behind
  65  * the serializer and are, essentially single-threaded. All functions executed
  66  * behind the same serializer are strictly serialized. So if one thread calls
  67  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
  68  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
  69  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
  70  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
  71  * same time.
  72  *
  73  * Connectionless transport use a single serializer per transport type (one for
  74  * TLI and one for sockets. Connection-oriented transports use finer-grained
  75  * serializers.
  76  *
  77  * All COTS-type endpoints start their life with private serializers. During
  78  * connection request processing the endpoint serializer is switched to the
  79  * listener's serializer and the rest of T_CONN_REQ processing is done on the
  80  * listener serializer. During T_CONN_RES processing the eager serializer is
  81  * switched from listener to acceptor serializer and after that point all
  82  * processing for eager and acceptor happens on this serializer. To avoid races
  83  * with endpoint closes while its serializer may be changing closes are blocked
  84  * while serializers are manipulated.
  85  *
  86  * References accounting
  87  * ---------------------
  88  *
  89  * Endpoints are reference counted and freed when the last reference is
  90  * dropped. Functions within the serializer may access an endpoint state even
  91  * after an endpoint closed. The te_closing being set on the endpoint indicates
  92  * that the endpoint entered its close routine.
  93  *
  94  * One reference is held for each opened endpoint instance. The reference
  95  * counter is incremented when the endpoint is linked to another endpoint and
  96  * decremented when the link disappears. It is also incremented when the
  97  * endpoint is found by the hash table lookup. This increment is atomic with the
  98  * lookup itself and happens while the hash table read lock is held.
  99  *
 100  * Close synchronization
 101  * ---------------------
 102  *
 103  * During close the endpoint as marked as closing using te_closing flag. It is
 104  * usually enough to check for te_closing flag since all other state changes
 105  * happen after this flag is set and the close entered serializer. Immediately
 106  * after setting te_closing flag tl_close() enters serializer and waits until
 107  * the callback finishes. This allows all functions called within serializer to
 108  * simply check te_closing without any locks.
 109  *
 110  * Serializer management.
 111  * ---------------------
 112  *
 113  * For COTS transports serializers are created when the endpoint is constructed
 114  * and destroyed when the endpoint is destructed. CLTS transports use global
 115  * serializers - one for sockets and one for TLI.
 116  *
 117  * COTS serializers have separate reference counts to deal with several
 118  * endpoints sharing the same serializer. There is a subtle problem related to
 119  * the serializer destruction. The serializer should never be destroyed by any
 120  * function executed inside serializer. This means that close has to wait till
 121  * all serializer activity for this endpoint is finished before it can drop the
 122  * last reference on the endpoint (which may as well free the serializer).  This
 123  * is only relevant for COTS transports which manage serializers
 124  * dynamically. For CLTS transports close may complete without waiting for all
 125  * serializer activity to finish since serializer is only destroyed at driver
 126  * detach time.
 127  *
 128  * COTS endpoints keep track of the number of outstanding requests on the
 129  * serializer for the endpoint. The code handling accept() avoids changing
 130  * client serializer if it has any pending messages on the serializer and
 131  * instead moves acceptor to listener's serializer.
 132  *
 133  *
 134  * Use of hash tables
 135  * ------------------
 136  *
 137  * The driver uses modhash hash table implementation. Each transport uses two
 138  * hash tables - one for finding endpoints by acceptor ID and another one for
 139  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
 140  * pair of hash tables since sockets only use TICOTSORD.
 141  *
 142  * All hash tables lookups increment a reference count for returned endpoints,
 143  * so we may safely check the endpoint state even when the endpoint is removed
 144  * from the hash by another thread immediately after it is found.
 145  *
 146  *
 147  * CLOSE processing
 148  * ================
 149  *
 150  * The driver enters serializer twice on close(). The close sequence is the
 151  * following:
 152  *
 153  * 1) Wait until closing is safe (te_closewait becomes zero)
 154  *      This step is needed to prevent close during serializer switches. In most
 155  *      cases (close happening after connection establishment) te_closewait is
 156  *      zero.
 157  * 1) Set te_closing.
 158  * 2) Call tl_close_ser() within serializer and wait for it to complete.
 159  *
 160  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
 161  *      It also needs to clear write-side q_next pointers - this should be done
 162  *      before qprocsoff().
 163  *
 164  *    This synchronous serializer entry during close is needed to ensure that
 165  *    the queue is valid everywhere inside the serializer.
 166  *
 167  *    Note that in many cases close will execute tl_close_ser() synchronously,
 168  *    so it will not wait at all.
 169  *
 170  * 3) Calls qprocsoff().
 171  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
 172  *      complete (for COTS transports). For CLTS transport there is no wait.
 173  *
 174  *      tl_close_finish_ser() Finishes the close process and wakes up waiting
 175  *      close if there is any.
 176  *
 177  *    Note that in most cases close will enter te_close_ser_finish()
 178  *    synchronously and will not wait at all.
 179  *
 180  *
 181  * Flow Control
 182  * ============
 183  *
 184  * The driver implements both read and write side service routines. No one calls
 185  * putq() on the read queue. The read side service routine tl_rsrv() is called
 186  * when the read side stream is back-enabled. It enters serializer synchronously
 187  * (waits till serializer processing is complete). Within serializer it
 188  * back-enables all endpoints blocked by the queue for connection-less
 189  * transports and enables write side service processing for the peer for
 190  * connection-oriented transports.
 191  *
 192  * Read and write side service routines use special mblk_sized space in the
 193  * endpoint structure to enter perimeter.
 194  *
 195  * Write-side flow control
 196  * -----------------------
 197  *
 198  * Write side flow control is a bit tricky. The driver needs to deal with two
 199  * message queues - the explicit STREAMS message queue maintained by
 200  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
 201  * queues should be synchronized to preserve message ordering and should
 202  * maintain a single order determined by the order in which messages enter
 203  * tl_wput(). In order to maintain the ordering between these two queues the
 204  * STREAMS queue is only manipulated within the serializer, so the ordering is
 205  * provided by the serializer.
 206  *
 207  * Functions called from the tl_wsrv() sometimes may call putbq(). To
 208  * immediately stop any further processing of the STREAMS message queues the
 209  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
 210  * side service processing stops when the flag is set.
 211  *
 212  * The tl_wsrv() function enters serializer synchronously and waits for it to
 213  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
 214  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
 215  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
 216  * always bounded by the amount of messages on the STREAMS queue at the time
 217  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
 218  * queue from another serialized entry which can't happen in parallel. This
 219  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
 220  * of it draining forever while writer places new messages on the STREAMS
 221  * queue).
 222  *
 223  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
 224  *
 225  *
 226  * Unix Domain Sockets
 227  * ===================
 228  *
 229  * The driver knows the structure of Unix Domain sockets addresses and treats
 230  * them differently from generic TLI addresses. For sockets implicit binds are
 231  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
 232  * instead of using address length of zero. Explicit binds specify
 233  * SOU_MAGIC_EXPLICIT as magic.
 234  *
 235  * For implicit binds we always use minor number as soua_vp part of the address
 236  * and avoid any hash table lookups. This saves two hash tables lookups per
 237  * anonymous bind.
 238  *
 239  * For explicit address we hash the vnode pointer instead of hashing the
 240  * full-scale address+zone+length. Hashing by pointer is more efficient then
 241  * hashing by the full address.
 242  *
 243  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
 244  * tep structure, so it should be never freed.
 245  *
 246  * Also for sockets the driver always uses minor number as acceptor id.
 247  *
 248  * TPI VIOLATIONS
 249  * --------------
 250  *
 251  * This driver violates TPI in several respects for Unix Domain Sockets:
 252  *
 253  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
 254  *      is requested and the endpoint is already in use. There is no point in
 255  *      generating an unused address since this address will be rejected by
 256  *      sockfs anyway. For implicit binds it always generates a new address
 257  *      (sets soua_vp to its minor number).
 258  *
 259  * 2) It always uses minor number as acceptor ID and never uses queue
 260  *      pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
 261  *      message and they do not use the queue pointer.
 262  *
 263  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
 264  *      followed by listen(). The listen() should be issued with non-zero
 265  *      backlog, so sotpi_listen() issues unbind request followed by bind
 266  *      request to the same address but with a non-zero qlen value. Both
 267  *      tl_bind() and tl_unbind() require write lock on the hash table to
 268  *      insert/remove the address. The driver does not remove the address from
 269  *      the hash for endpoints that are bound to the explicit address and have
 270  *      backlog of zero. During T_BIND_REQ processing if the address requested
 271  *      is equal to the address the endpoint already has it updates the backlog
 272  *      without reinserting the address in the hash table. This optimization
 273  *      avoids two hash table updates for each listener created. It always
 274  *      avoids the problem of a "stolen" address when another listener may use
 275  *      the same address between the unbind and bind and suddenly listen() fails
 276  *      because address is in use even though the bind() succeeded.
 277  *
 278  *
 279  * CONNECTIONLESS TRANSPORTS
 280  * =========================
 281  *
 282  * Connectionless transports all share the same serializer (one for TLI and one
 283  * for Sockets). Functions executing behind serializer can check or modify state
 284  * of any endpoint.
 285  *
 286  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
 287  * te_lastep field. The next time X talks to some address A it checks whether A
 288  * is the same as Y's address and if it is there is no need to lookup Y. If the
 289  * address is different or the state of Y is not appropriate (e.g. closed or not
 290  * idle) X does a lookup using tl_find_peer() and caches the new address.
 291  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
 292  * on the endpoint found.
 293  *
 294  * During close of endpoint Y it doesn't try to remove itself from other
 295  * endpoints caches. They will detect that Y is gone and will search the peer
 296  * endpoint again.
 297  *
 298  * Flow Control Handling.
 299  * ----------------------
 300  *
 301  * Each connectionless endpoint keeps a list of endpoints which are
 302  * flow-controlled by its queue. It also keeps a pointer to the queue which
 303  * flow-controls itself.  Whenever flow control releases for endpoint X it
 304  * enables all queues from the list. During close it also back-enables everyone
 305  * in the list. If X is flow-controlled when it is closing it removes it from
 306  * the peers list.
 307  *
 308  * DATA STRUCTURES
 309  * ===============
 310  *
 311  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
 312  * endpoint state. For connection-oriented transports it has a keeps a list
 313  * of pending connections (tl_icon_t). For connectionless transports it keeps a
 314  * list of endpoints flow controlled by this one.
 315  *
 316  * Each transport type is represented by a per-transport data structure
 317  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
 318  * endpoint address hash tables for each transport. It also contains pointer to
 319  * transport serializer for connectionless transports.
 320  *
 321  * Each endpoint keeps a link to its transport structure, so the code can find
 322  * all per-transport information quickly.
 323  */
 324 
 325 #include        <sys/types.h>
 326 #include        <sys/inttypes.h>
 327 #include        <sys/stream.h>
 328 #include        <sys/stropts.h>
 329 #define _SUN_TPI_VERSION 2
 330 #include        <sys/tihdr.h>
 331 #include        <sys/strlog.h>
 332 #include        <sys/debug.h>
 333 #include        <sys/cred.h>
 334 #include        <sys/errno.h>
 335 #include        <sys/kmem.h>
 336 #include        <sys/id_space.h>
 337 #include        <sys/modhash.h>
 338 #include        <sys/mkdev.h>
 339 #include        <sys/tl.h>
 340 #include        <sys/stat.h>
 341 #include        <sys/conf.h>
 342 #include        <sys/modctl.h>
 343 #include        <sys/strsun.h>
 344 #include        <sys/socket.h>
 345 #include        <sys/socketvar.h>
 346 #include        <sys/sysmacros.h>
 347 #include        <sys/xti_xtiopt.h>
 348 #include        <sys/ddi.h>
 349 #include        <sys/sunddi.h>
 350 #include        <sys/zone.h>
 351 #include        <inet/common.h>   /* typedef int (*pfi_t)() for inet/optcom.h */
 352 #include        <inet/optcom.h>
 353 #include        <sys/strsubr.h>
 354 #include        <sys/ucred.h>
 355 #include        <sys/suntpi.h>
 356 #include        <sys/list.h>
 357 #include        <sys/serializer.h>
 358 
 359 /*
 360  * TBD List
 361  * 14 Eliminate state changes through table
 362  * 16. AF_UNIX socket options
 363  * 17. connect() for ticlts
 364  * 18. support for "netstat" to show AF_UNIX plus TLI local
 365  *      transport connections
 366  * 21. sanity check to flushing on sending M_ERROR
 367  */
 368 
 369 /*
 370  * CONSTANT DECLARATIONS
 371  * --------------------
 372  */
 373 
 374 /*
 375  * Local declarations
 376  */
 377 #define NEXTSTATE(EV, ST)       ti_statetbl[EV][ST]
 378 
 379 #define BADSEQNUM       (-1)    /* initial seq number used by T_DISCON_IND */
 380 #define TL_BUFWAIT      (10000) /* usecs to wait for allocb buffer timeout */
 381 #define TL_TIDUSZ (64*1024)     /* tidu size when "strmsgz" is unlimited (0) */
 382 /*
 383  * Hash tables size.
 384  */
 385 #define TL_HASH_SIZE 311
 386 
 387 /*
 388  * Definitions for module_info
 389  */
 390 #define         TL_ID           (104)           /* module ID number */
 391 #define         TL_NAME         "tl"            /* module name */
 392 #define         TL_MINPSZ       (0)             /* min packet size */
 393 #define         TL_MAXPSZ       INFPSZ          /* max packet size ZZZ */
 394 #define         TL_HIWAT        (16*1024)       /* hi water mark */
 395 #define         TL_LOWAT        (256)           /* lo water mark */
 396 /*
 397  * Definition of minor numbers/modes for new transport provider modes.
 398  * We view the socket use as a separate mode to get a separate name space.
 399  */
 400 #define         TL_TICOTS       0       /* connection oriented transport */
 401 #define         TL_TICOTSORD    1       /* COTS w/ orderly release */
 402 #define         TL_TICLTS       2       /* connectionless transport */
 403 #define         TL_UNUSED       3
 404 #define         TL_SOCKET       4       /* Socket */
 405 #define         TL_SOCK_COTS    (TL_SOCKET|TL_TICOTS)
 406 #define         TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
 407 #define         TL_SOCK_CLTS    (TL_SOCKET|TL_TICLTS)
 408 
 409 #define         TL_MINOR_MASK   0x7
 410 #define         TL_MINOR_START  (TL_TICLTS + 1)
 411 
 412 /*
 413  * LOCAL MACROS
 414  */
 415 #define T_ALIGN(p)      P2ROUNDUP((p), sizeof (t_scalar_t))
 416 
 417 /*
 418  * EXTERNAL VARIABLE DECLARATIONS
 419  * -----------------------------
 420  */
 421 /*
 422  * state table defined in the OS space.c
 423  */
 424 extern  char    ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
 425 
 426 /*
 427  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
 428  */
 429 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
 430 static int tl_close(queue_t *, int, cred_t *);
 431 static void tl_wput(queue_t *, mblk_t *);
 432 static void tl_wsrv(queue_t *);
 433 static void tl_rsrv(queue_t *);
 434 
 435 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
 436 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
 437 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 438 
 439 
 440 /*
 441  * GLOBAL DATA STRUCTURES AND VARIABLES
 442  * -----------------------------------
 443  */
 444 
 445 /*
 446  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
 447  * For now, we only manage the SO_RECVUCRED option but we also have
 448  * harmless dummy options to make things work with some common code we access.
 449  */
 450 opdes_t tl_opt_arr[] = {
 451         /* The SO_TYPE is needed for the hack below */
 452         {
 453                 SO_TYPE,
 454                 SOL_SOCKET,
 455                 OA_R,
 456                 OA_R,
 457                 OP_NP,
 458                 0,
 459                 sizeof (t_scalar_t),
 460                 0
 461         },
 462         {
 463                 SO_RECVUCRED,
 464                 SOL_SOCKET,
 465                 OA_RW,
 466                 OA_RW,
 467                 OP_NP,
 468                 0,
 469                 sizeof (int),
 470                 0
 471         }
 472 };
 473 
 474 /*
 475  * Table of all supported levels
 476  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 477  * any supported options so we need this info separately.
 478  *
 479  * This is needed only for topmost tpi providers.
 480  */
 481 optlevel_t      tl_valid_levels_arr[] = {
 482         XTI_GENERIC,
 483         SOL_SOCKET,
 484         TL_PROT_LEVEL
 485 };
 486 
 487 #define TL_VALID_LEVELS_CNT     A_CNT(tl_valid_levels_arr)
 488 /*
 489  * Current upper bound on the amount of space needed to return all options.
 490  * Additional options with data size of sizeof(long) are handled automatically.
 491  * Others need hand job.
 492  */
 493 #define TL_MAX_OPT_BUF_LEN                                              \
 494                 ((A_CNT(tl_opt_arr) << 2) +                               \
 495                 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +          \
 496                 + 64 + sizeof (struct T_optmgmt_ack))
 497 
 498 #define TL_OPT_ARR_CNT  A_CNT(tl_opt_arr)
 499 
 500 /*
 501  *      transport addr structure
 502  */
 503 typedef struct tl_addr {
 504         zoneid_t        ta_zoneid;              /* Zone scope of address */
 505         t_scalar_t      ta_alen;                /* length of abuf */
 506         void            *ta_abuf;               /* the addr itself */
 507 } tl_addr_t;
 508 
 509 /*
 510  * Refcounted version of serializer.
 511  */
 512 typedef struct tl_serializer {
 513         uint_t          ts_refcnt;
 514         serializer_t    *ts_serializer;
 515 } tl_serializer_t;
 516 
 517 /*
 518  * Each transport type has a separate state.
 519  * Per-transport state.
 520  */
 521 typedef struct tl_transport_state {
 522         char            *tr_name;
 523         minor_t         tr_minor;
 524         uint32_t        tr_defaddr;
 525         mod_hash_t      *tr_ai_hash;
 526         mod_hash_t      *tr_addr_hash;
 527         tl_serializer_t *tr_serializer;
 528 } tl_transport_state_t;
 529 
 530 #define TL_DFADDR 0x1000
 531 
 532 static tl_transport_state_t tl_transports[] = {
 533         { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
 534         { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
 535         { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
 536         { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
 537         { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
 538         { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
 539         { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
 540 };
 541 
 542 #define TL_MAXTRANSPORT A_CNT(tl_transports)
 543 
 544 struct tl_endpt;
 545 typedef struct tl_endpt tl_endpt_t;
 546 
 547 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
 548 
 549 /*
 550  * Data structure used to represent pending connects.
 551  * Records enough information so that the connecting peer can close
 552  * before the connection gets accepted.
 553  */
 554 typedef struct tl_icon {
 555         list_node_t     ti_node;
 556         struct tl_endpt *ti_tep;        /* NULL if peer has already closed */
 557         mblk_t          *ti_mp;         /* b_next list of data + ordrel_ind */
 558         t_scalar_t      ti_seqno;       /* Sequence number */
 559 } tl_icon_t;
 560 
 561 typedef struct so_ux_addr soux_addr_t;
 562 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
 563 
 564 /*
 565  * Maximum number of unaccepted connection indications allowed per listener.
 566  */
 567 #define TL_MAXQLEN      4096
 568 int tl_maxqlen = TL_MAXQLEN;
 569 
 570 /*
 571  *      transport endpoint structure
 572  */
 573 struct tl_endpt {
 574         queue_t         *te_rq;         /* stream read queue */
 575         queue_t         *te_wq;         /* stream write queue */
 576         uint32_t        te_refcnt;
 577         int32_t         te_state;       /* TPI state of endpoint */
 578         minor_t         te_minor;       /* minor number */
 579 #define te_seqno        te_minor
 580         uint_t          te_flag;        /* flag field */
 581         boolean_t       te_nowsrv;
 582         tl_serializer_t *te_ser;        /* Serializer to use */
 583 #define te_serializer   te_ser->ts_serializer
 584 
 585         soux_addr_t     te_uxaddr;      /* Socket address */
 586 #define te_magic        te_uxaddr.soua_magic
 587 #define te_vp           te_uxaddr.soua_vp
 588         tl_addr_t       te_ap;          /* addr bound to this endpt */
 589 #define te_zoneid te_ap.ta_zoneid
 590 #define te_alen te_ap.ta_alen
 591 #define te_abuf te_ap.ta_abuf
 592 
 593         tl_transport_state_t *te_transport;
 594 #define te_addrhash     te_transport->tr_addr_hash
 595 #define te_aihash       te_transport->tr_ai_hash
 596 #define te_defaddr      te_transport->tr_defaddr
 597         cred_t          *te_credp;      /* endpoint user credentials */
 598         mod_hash_hndl_t te_hash_hndl;   /* Handle for address hash */
 599 
 600         /*
 601          * State specific for connection-oriented and connectionless transports.
 602          */
 603         union {
 604                 /* Connection-oriented state. */
 605                 struct {
 606                         t_uscalar_t _te_nicon;  /* count of conn requests */
 607                         t_uscalar_t _te_qlen;   /* max conn requests */
 608                         tl_endpt_t  *_te_oconp; /* conn request pending */
 609                         tl_endpt_t  *_te_conp;  /* connected endpt */
 610 #ifndef _ILP32
 611                         void        *_te_pad;
 612 #endif
 613                         list_t  _te_iconp;      /* list of conn ind. pending */
 614                 } _te_cots_state;
 615                 /* Connection-less state. */
 616                 struct {
 617                         tl_endpt_t *_te_lastep; /* last dest. endpoint */
 618                         tl_endpt_t *_te_flowq;  /* flow controlled on whom */
 619                         list_node_t _te_flows;  /* lists of connections */
 620                         list_t  _te_flowlist;   /* Who flowcontrols on me */
 621                 } _te_clts_state;
 622         } _te_transport_state;
 623 #define te_nicon        _te_transport_state._te_cots_state._te_nicon
 624 #define te_qlen         _te_transport_state._te_cots_state._te_qlen
 625 #define te_oconp        _te_transport_state._te_cots_state._te_oconp
 626 #define te_conp         _te_transport_state._te_cots_state._te_conp
 627 #define te_iconp        _te_transport_state._te_cots_state._te_iconp
 628 #define te_lastep       _te_transport_state._te_clts_state._te_lastep
 629 #define te_flowq        _te_transport_state._te_clts_state._te_flowq
 630 #define te_flowlist     _te_transport_state._te_clts_state._te_flowlist
 631 #define te_flows        _te_transport_state._te_clts_state._te_flows
 632 
 633         bufcall_id_t    te_bufcid;      /* outstanding bufcall id */
 634         timeout_id_t    te_timoutid;    /* outstanding timeout id */
 635         pid_t           te_cpid;        /* cached pid of endpoint */
 636         t_uscalar_t     te_acceptor_id; /* acceptor id for T_CONN_RES */
 637         /*
 638          * Pieces of the endpoint state needed for closing.
 639          */
 640         kmutex_t        te_closelock;
 641         kcondvar_t      te_closecv;
 642         uint8_t         te_closing;     /* The endpoint started closing */
 643         uint8_t         te_closewait;   /* Wait in close until zero */
 644         mblk_t          te_closemp;     /* for entering serializer on close */
 645         mblk_t          te_rsrvmp;      /* for entering serializer on rsrv */
 646         mblk_t          te_wsrvmp;      /* for entering serializer on wsrv */
 647         kmutex_t        te_srv_lock;
 648         kcondvar_t      te_srv_cv;
 649         uint8_t         te_rsrv_active; /* Running in tl_rsrv() */
 650         uint8_t         te_wsrv_active; /* Running in tl_wsrv() */
 651         /*
 652          * Pieces of the endpoint state needed for serializer transitions.
 653          */
 654         kmutex_t        te_ser_lock;    /* Protects the count below */
 655         uint_t          te_ser_count;   /* Number of messages on serializer */
 656 };
 657 
 658 /*
 659  * Flag values. Lower 4 bits specify that transport used.
 660  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
 661  * they allow to identify the endpoint more easily.
 662  */
 663 #define TL_LISTENER     0x00010 /* the listener endpoint */
 664 #define TL_ACCEPTOR     0x00020 /* the accepting endpoint */
 665 #define TL_EAGER        0x00040 /* connecting endpoint */
 666 #define TL_ACCEPTED     0x00080 /* accepted connection */
 667 #define TL_SETCRED      0x00100 /* flag to indicate sending of credentials */
 668 #define TL_SETUCRED     0x00200 /* flag to indicate sending of ucred */
 669 #define TL_SOCKUCRED    0x00400 /* flag to indicate sending of SCM_UCRED */
 670 #define TL_ADDRHASHED   0x01000 /* Endpoint address is stored in te_addrhash */
 671 #define TL_CLOSE_SER    0x10000 /* Endpoint close has entered the serializer */
 672 /*
 673  * Boolean checks for the endpoint type.
 674  */
 675 #define         IS_CLTS(x)      (((x)->te_flag & TL_TICLTS) != 0)
 676 #define         IS_COTS(x)      (((x)->te_flag & TL_TICLTS) == 0)
 677 #define         IS_COTSORD(x)   (((x)->te_flag & TL_TICOTSORD) != 0)
 678 #define         IS_SOCKET(x)    (((x)->te_flag & TL_SOCKET) != 0)
 679 
 680 /*
 681  * Certain operations are always used together. These macros reduce the chance
 682  * of missing a part of a combination.
 683  */
 684 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
 685 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
 686 
 687 #define TL_PUTBQ(x, mp) {               \
 688         ASSERT(!((x)->te_flag & TL_CLOSE_SER));  \
 689         (x)->te_nowsrv = B_TRUE;     \
 690         (void) putbq((x)->te_wq, mp);        \
 691 }
 692 
 693 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
 694 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
 695 
 696 /*
 697  * STREAMS driver glue data structures.
 698  */
 699 static  struct  module_info     tl_minfo = {
 700         TL_ID,                  /* mi_idnum */
 701         TL_NAME,                /* mi_idname */
 702         TL_MINPSZ,              /* mi_minpsz */
 703         TL_MAXPSZ,              /* mi_maxpsz */
 704         TL_HIWAT,               /* mi_hiwat */
 705         TL_LOWAT                /* mi_lowat */
 706 };
 707 
 708 static  struct  qinit   tl_rinit = {
 709         NULL,                   /* qi_putp */
 710         (int (*)())tl_rsrv,     /* qi_srvp */
 711         tl_open,                /* qi_qopen */
 712         tl_close,               /* qi_qclose */
 713         NULL,                   /* qi_qadmin */
 714         &tl_minfo,          /* qi_minfo */
 715         NULL                    /* qi_mstat */
 716 };
 717 
 718 static  struct  qinit   tl_winit = {
 719         (int (*)())tl_wput,     /* qi_putp */
 720         (int (*)())tl_wsrv,     /* qi_srvp */
 721         NULL,                   /* qi_qopen */
 722         NULL,                   /* qi_qclose */
 723         NULL,                   /* qi_qadmin */
 724         &tl_minfo,          /* qi_minfo */
 725         NULL                    /* qi_mstat */
 726 };
 727 
 728 static  struct streamtab        tlinfo = {
 729         &tl_rinit,          /* st_rdinit */
 730         &tl_winit,          /* st_wrinit */
 731         NULL,                   /* st_muxrinit */
 732         NULL                    /* st_muxwrinit */
 733 };
 734 
 735 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
 736     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
 737 
 738 static struct modldrv modldrv = {
 739         &mod_driverops,             /* Type of module -- pseudo driver here */
 740         "TPI Local Transport (tl)",
 741         &tl_devops,         /* driver ops */
 742 };
 743 
 744 /*
 745  * Module linkage information for the kernel.
 746  */
 747 static struct modlinkage modlinkage = {
 748         MODREV_1,
 749         &modldrv,
 750         NULL
 751 };
 752 
 753 /*
 754  * Templates for response to info request
 755  * Check sanity of unlimited connect data etc.
 756  */
 757 
 758 #define         TL_CLTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 759 #define         TL_COTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 760 
 761 static struct T_info_ack tl_cots_info_ack =
 762         {
 763                 T_INFO_ACK,     /* PRIM_type -always T_INFO_ACK */
 764                 T_INFINITE,     /* TSDU size */
 765                 T_INFINITE,     /* ETSDU size */
 766                 T_INFINITE,     /* CDATA_size */
 767                 T_INFINITE,     /* DDATA_size */
 768                 T_INFINITE,     /* ADDR_size  */
 769                 T_INFINITE,     /* OPT_size */
 770                 0,              /* TIDU_size - fill at run time */
 771                 T_COTS,         /* SERV_type */
 772                 -1,             /* CURRENT_state */
 773                 TL_COTS_PROVIDER_FLAG   /* PROVIDER_flag */
 774         };
 775 
 776 static struct T_info_ack tl_clts_info_ack =
 777         {
 778                 T_INFO_ACK,     /* PRIM_type - always T_INFO_ACK */
 779                 0,              /* TSDU_size - fill at run time */
 780                 -2,             /* ETSDU_size -2 => not supported */
 781                 -2,             /* CDATA_size -2 => not supported */
 782                 -2,             /* DDATA_size  -2 => not supported */
 783                 -1,             /* ADDR_size -1 => infinite */
 784                 -1,             /* OPT_size */
 785                 0,              /* TIDU_size - fill at run time */
 786                 T_CLTS,         /* SERV_type */
 787                 -1,             /* CURRENT_state */
 788                 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
 789         };
 790 
 791 /*
 792  * private copy of devinfo pointer used in tl_info
 793  */
 794 static dev_info_t *tl_dip;
 795 
 796 /*
 797  * Endpoints cache.
 798  */
 799 static kmem_cache_t *tl_cache;
 800 /*
 801  * Minor number space.
 802  */
 803 static id_space_t *tl_minors;
 804 
 805 /*
 806  * Default Data Unit size.
 807  */
 808 static t_scalar_t tl_tidusz;
 809 
 810 /*
 811  * Size of hash tables.
 812  */
 813 static size_t tl_hash_size = TL_HASH_SIZE;
 814 
 815 /*
 816  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
 817  * for sockets.
 818  */
 819 static int tl_disable_early_connect = 0;
 820 static int tl_client_closing_when_accepting;
 821 
 822 static int tl_serializer_noswitch;
 823 
 824 /*
 825  * LOCAL FUNCTION PROTOTYPES
 826  * -------------------------
 827  */
 828 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
 829 static void tl_do_proto(mblk_t *, tl_endpt_t *);
 830 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
 831 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
 832 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
 833         t_scalar_t);
 834 static void tl_bind(mblk_t *, tl_endpt_t *);
 835 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
 836 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
 837 static void tl_unbind(mblk_t *, tl_endpt_t *);
 838 static void tl_optmgmt(queue_t *, mblk_t *);
 839 static void tl_conn_req(queue_t *, mblk_t *);
 840 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
 841 static void tl_conn_res(mblk_t *, tl_endpt_t *);
 842 static void tl_discon_req(mblk_t *, tl_endpt_t *);
 843 static void tl_capability_req(mblk_t *, tl_endpt_t *);
 844 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
 845 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
 846 static void tl_info_req(mblk_t *, tl_endpt_t *);
 847 static void tl_addr_req(mblk_t *, tl_endpt_t *);
 848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
 849 static void tl_data(mblk_t  *, tl_endpt_t *);
 850 static void tl_exdata(mblk_t *, tl_endpt_t *);
 851 static void tl_ordrel(mblk_t *, tl_endpt_t *);
 852 static void tl_unitdata(mblk_t *, tl_endpt_t *);
 853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
 854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
 855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
 856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
 857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
 858 static void tl_cl_backenable(tl_endpt_t *);
 859 static void tl_co_unconnect(tl_endpt_t *);
 860 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
 861 static void tl_discon_ind(tl_endpt_t *, uint32_t);
 862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
 863 static mblk_t *tl_ordrel_ind_alloc(void);
 864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
 865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
 866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
 867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
 868 static void tl_icon_freemsgs(mblk_t **);
 869 static void tl_merror(queue_t *, mblk_t *, int);
 870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
 871 static int tl_default_opt(queue_t *, int, int, uchar_t *);
 872 static int tl_get_opt(queue_t *, int, int, uchar_t *);
 873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
 874     uchar_t *, void *, cred_t *);
 875 static void tl_memrecover(queue_t *, mblk_t *, size_t);
 876 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
 877 static void tl_free(tl_endpt_t *);
 878 static int  tl_constructor(void *, void *, int);
 879 static void tl_destructor(void *, void *);
 880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
 881 static tl_serializer_t *tl_serializer_alloc(int);
 882 static void tl_serializer_refhold(tl_serializer_t *);
 883 static void tl_serializer_refrele(tl_serializer_t *);
 884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
 885 static void tl_serializer_exit(tl_endpt_t *);
 886 static boolean_t tl_noclose(tl_endpt_t *);
 887 static void tl_closeok(tl_endpt_t *);
 888 static void tl_refhold(tl_endpt_t *);
 889 static void tl_refrele(tl_endpt_t *);
 890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
 891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
 892 static void tl_close_ser(mblk_t *, tl_endpt_t *);
 893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
 894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
 895 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
 896 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
 897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
 898 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
 899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
 900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
 901 static void tl_addr_unbind(tl_endpt_t *);
 902 
 903 /*
 904  * Intialize option database object for TL
 905  */
 906 
 907 optdb_obj_t tl_opt_obj = {
 908         tl_default_opt,         /* TL default value function pointer */
 909         tl_get_opt,             /* TL get function pointer */
 910         tl_set_opt,             /* TL set function pointer */
 911         TL_OPT_ARR_CNT,         /* TL option database count of entries */
 912         tl_opt_arr,             /* TL option database */
 913         TL_VALID_LEVELS_CNT,    /* TL valid level count of entries */
 914         tl_valid_levels_arr     /* TL valid level array */
 915 };
 916 
 917 /*
 918  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
 919  * ---------------------------------------
 920  */
 921 
 922 /*
 923  * Loadable module routines
 924  */
 925 int
 926 _init(void)
 927 {
 928         return (mod_install(&modlinkage));
 929 }
 930 
 931 int
 932 _fini(void)
 933 {
 934         return (mod_remove(&modlinkage));
 935 }
 936 
 937 int
 938 _info(struct modinfo *modinfop)
 939 {
 940         return (mod_info(&modlinkage, modinfop));
 941 }
 942 
 943 /*
 944  * Driver Entry Points and Other routines
 945  */
 946 static int
 947 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 948 {
 949         int i;
 950         char name[32];
 951 
 952         /*
 953          * Resume from a checkpoint state.
 954          */
 955         if (cmd == DDI_RESUME)
 956                 return (DDI_SUCCESS);
 957 
 958         if (cmd != DDI_ATTACH)
 959                 return (DDI_FAILURE);
 960 
 961         /*
 962          * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
 963          * streams message sizes can be unlimited. We use a defined constant
 964          * instead.
 965          */
 966         tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
 967 
 968         /*
 969          * Create subdevices for each transport.
 970          */
 971         for (i = 0; i < TL_UNUSED; i++) {
 972                 if (ddi_create_minor_node(devi,
 973                     tl_transports[i].tr_name,
 974                     S_IFCHR, tl_transports[i].tr_minor,
 975                     DDI_PSEUDO, NULL) == DDI_FAILURE) {
 976                         ddi_remove_minor_node(devi, NULL);
 977                         return (DDI_FAILURE);
 978                 }
 979         }
 980 
 981         tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
 982             0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
 983 
 984         if (tl_cache == NULL) {
 985                 ddi_remove_minor_node(devi, NULL);
 986                 return (DDI_FAILURE);
 987         }
 988 
 989         tl_minors = id_space_create("tl_minor_space",
 990             TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
 991 
 992         /*
 993          * Create ID space for minor numbers
 994          */
 995         for (i = 0; i < TL_MAXTRANSPORT; i++) {
 996                 tl_transport_state_t *t = &tl_transports[i];
 997 
 998                 if (i == TL_UNUSED)
 999                         continue;
1000 
1001                 /* Socket COTSORD shares namespace with COTS */
1002                 if (i == TL_SOCK_COTSORD) {
1003                         t->tr_ai_hash =
1004                             tl_transports[TL_SOCK_COTS].tr_ai_hash;
1005                         ASSERT(t->tr_ai_hash != NULL);
1006                         t->tr_addr_hash =
1007                             tl_transports[TL_SOCK_COTS].tr_addr_hash;
1008                         ASSERT(t->tr_addr_hash != NULL);
1009                         continue;
1010                 }
1011 
1012                 /*
1013                  * Create hash tables.
1014                  */
1015                 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1016                     t->tr_name);
1017 #ifdef _ILP32
1018                 if (i & TL_SOCKET)
1019                         t->tr_ai_hash =
1020                             mod_hash_create_idhash(name, tl_hash_size - 1,
1021                             mod_hash_null_valdtor);
1022                 else
1023                         t->tr_ai_hash =
1024                             mod_hash_create_ptrhash(name, tl_hash_size,
1025                             mod_hash_null_valdtor, sizeof (queue_t));
1026 #else
1027                 t->tr_ai_hash =
1028                     mod_hash_create_idhash(name, tl_hash_size - 1,
1029                     mod_hash_null_valdtor);
1030 #endif /* _ILP32 */
1031 
1032                 if (i & TL_SOCKET) {
1033                         (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1034                             t->tr_name);
1035                         t->tr_addr_hash = mod_hash_create_ptrhash(name,
1036                             tl_hash_size, mod_hash_null_valdtor,
1037                             sizeof (uintptr_t));
1038                 } else {
1039                         (void) snprintf(name, sizeof (name), "%s_addr_hash",
1040                             t->tr_name);
1041                         t->tr_addr_hash = mod_hash_create_extended(name,
1042                             tl_hash_size, mod_hash_null_keydtor,
1043                             mod_hash_null_valdtor,
1044                             tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1045                 }
1046 
1047                 /* Create serializer for connectionless transports. */
1048                 if (i & TL_TICLTS)
1049                         t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1050         }
1051 
1052         tl_dip = devi;
1053 
1054         return (DDI_SUCCESS);
1055 }
1056 
1057 static int
1058 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1059 {
1060         int i;
1061 
1062         if (cmd == DDI_SUSPEND)
1063                 return (DDI_SUCCESS);
1064 
1065         if (cmd != DDI_DETACH)
1066                 return (DDI_FAILURE);
1067 
1068         /*
1069          * Destroy arenas and hash tables.
1070          */
1071         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1072                 tl_transport_state_t *t = &tl_transports[i];
1073 
1074                 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1075                         continue;
1076 
1077                 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1078                 if (t->tr_serializer != NULL) {
1079                         tl_serializer_refrele(t->tr_serializer);
1080                         t->tr_serializer = NULL;
1081                 }
1082 
1083 #ifdef _ILP32
1084                 if (i & TL_SOCKET)
1085                         mod_hash_destroy_idhash(t->tr_ai_hash);
1086                 else
1087                         mod_hash_destroy_ptrhash(t->tr_ai_hash);
1088 #else
1089                 mod_hash_destroy_idhash(t->tr_ai_hash);
1090 #endif /* _ILP32 */
1091                 t->tr_ai_hash = NULL;
1092                 if (i & TL_SOCKET)
1093                         mod_hash_destroy_ptrhash(t->tr_addr_hash);
1094                 else
1095                         mod_hash_destroy_hash(t->tr_addr_hash);
1096                 t->tr_addr_hash = NULL;
1097         }
1098 
1099         kmem_cache_destroy(tl_cache);
1100         tl_cache = NULL;
1101         id_space_destroy(tl_minors);
1102         tl_minors = NULL;
1103         ddi_remove_minor_node(devi, NULL);
1104         return (DDI_SUCCESS);
1105 }
1106 
1107 /* ARGSUSED */
1108 static int
1109 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1110 {
1111 
1112         int retcode = DDI_FAILURE;
1113 
1114         switch (infocmd) {
1115 
1116         case DDI_INFO_DEVT2DEVINFO:
1117                 if (tl_dip != NULL) {
1118                         *result = (void *)tl_dip;
1119                         retcode = DDI_SUCCESS;
1120                 }
1121                 break;
1122 
1123         case DDI_INFO_DEVT2INSTANCE:
1124                 *result = (void *)0;
1125                 retcode = DDI_SUCCESS;
1126                 break;
1127 
1128         default:
1129                 break;
1130         }
1131         return (retcode);
1132 }
1133 
1134 /*
1135  * Endpoint reference management.
1136  */
1137 static void
1138 tl_refhold(tl_endpt_t *tep)
1139 {
1140         atomic_add_32(&tep->te_refcnt, 1);
1141 }
1142 
1143 static void
1144 tl_refrele(tl_endpt_t *tep)
1145 {
1146         ASSERT(tep->te_refcnt != 0);
1147 
1148         if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1149                 tl_free(tep);
1150 }
1151 
1152 /*ARGSUSED*/
1153 static int
1154 tl_constructor(void *buf, void *cdrarg, int kmflags)
1155 {
1156         tl_endpt_t *tep = buf;
1157 
1158         bzero(tep, sizeof (tl_endpt_t));
1159         mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1160         cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1161         mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1162         cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1163         mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1164 
1165         return (0);
1166 }
1167 
1168 /*ARGSUSED*/
1169 static void
1170 tl_destructor(void *buf, void *cdrarg)
1171 {
1172         tl_endpt_t *tep = buf;
1173 
1174         mutex_destroy(&tep->te_closelock);
1175         cv_destroy(&tep->te_closecv);
1176         mutex_destroy(&tep->te_srv_lock);
1177         cv_destroy(&tep->te_srv_cv);
1178         mutex_destroy(&tep->te_ser_lock);
1179 }
1180 
1181 static void
1182 tl_free(tl_endpt_t *tep)
1183 {
1184         ASSERT(tep->te_refcnt == 0);
1185         ASSERT(tep->te_transport != NULL);
1186         ASSERT(tep->te_rq == NULL);
1187         ASSERT(tep->te_wq == NULL);
1188         ASSERT(tep->te_ser != NULL);
1189         ASSERT(tep->te_ser_count == 0);
1190         ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1191 
1192         if (IS_SOCKET(tep)) {
1193                 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1194                 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1195                 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1196                 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1197         } else if (tep->te_abuf != NULL) {
1198                 kmem_free(tep->te_abuf, tep->te_alen);
1199                 tep->te_alen = -1; /* uninitialized */
1200                 tep->te_abuf = NULL;
1201         } else {
1202                 ASSERT(tep->te_alen == -1);
1203         }
1204 
1205         id_free(tl_minors, tep->te_minor);
1206         ASSERT(tep->te_credp == NULL);
1207 
1208         if (tep->te_hash_hndl != NULL)
1209                 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1210 
1211         if (IS_COTS(tep)) {
1212                 TL_REMOVE_PEER(tep->te_conp);
1213                 TL_REMOVE_PEER(tep->te_oconp);
1214                 tl_serializer_refrele(tep->te_ser);
1215                 tep->te_ser = NULL;
1216                 ASSERT(tep->te_nicon == 0);
1217                 ASSERT(list_head(&tep->te_iconp) == NULL);
1218         } else {
1219                 ASSERT(tep->te_lastep == NULL);
1220                 ASSERT(list_head(&tep->te_flowlist) == NULL);
1221                 ASSERT(tep->te_flowq == NULL);
1222         }
1223 
1224         ASSERT(tep->te_bufcid == 0);
1225         ASSERT(tep->te_timoutid == 0);
1226         bzero(&tep->te_ap, sizeof (tep->te_ap));
1227         tep->te_acceptor_id = 0;
1228 
1229         ASSERT(tep->te_closewait == 0);
1230         ASSERT(!tep->te_rsrv_active);
1231         ASSERT(!tep->te_wsrv_active);
1232         tep->te_closing = 0;
1233         tep->te_nowsrv = B_FALSE;
1234         tep->te_flag = 0;
1235 
1236         kmem_cache_free(tl_cache, tep);
1237 }
1238 
1239 /*
1240  * Allocate/free reference-counted wrappers for serializers.
1241  */
1242 static tl_serializer_t *
1243 tl_serializer_alloc(int flags)
1244 {
1245         tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1246         serializer_t *ser;
1247 
1248         if (s == NULL)
1249                 return (NULL);
1250 
1251         ser = serializer_create(flags);
1252 
1253         if (ser == NULL) {
1254                 kmem_free(s, sizeof (tl_serializer_t));
1255                 return (NULL);
1256         }
1257 
1258         s->ts_refcnt = 1;
1259         s->ts_serializer = ser;
1260         return (s);
1261 }
1262 
1263 static void
1264 tl_serializer_refhold(tl_serializer_t *s)
1265 {
1266         atomic_add_32(&s->ts_refcnt, 1);
1267 }
1268 
1269 static void
1270 tl_serializer_refrele(tl_serializer_t *s)
1271 {
1272         if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1273                 serializer_destroy(s->ts_serializer);
1274                 kmem_free(s, sizeof (tl_serializer_t));
1275         }
1276 }
1277 
1278 /*
1279  * Post a request on the endpoint serializer. For COTS transports keep track of
1280  * the number of pending requests.
1281  */
1282 static void
1283 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1284 {
1285         if (IS_COTS(tep)) {
1286                 mutex_enter(&tep->te_ser_lock);
1287                 tep->te_ser_count++;
1288                 mutex_exit(&tep->te_ser_lock);
1289         }
1290         serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1291 }
1292 
1293 /*
1294  * Complete processing the request on the serializer. Decrement the counter for
1295  * pending requests for COTS transports.
1296  */
1297 static void
1298 tl_serializer_exit(tl_endpt_t *tep)
1299 {
1300         if (IS_COTS(tep)) {
1301                 mutex_enter(&tep->te_ser_lock);
1302                 ASSERT(tep->te_ser_count != 0);
1303                 tep->te_ser_count--;
1304                 mutex_exit(&tep->te_ser_lock);
1305         }
1306 }
1307 
1308 /*
1309  * Hash management functions.
1310  */
1311 
1312 /*
1313  * Return TRUE if two addresses are equal, false otherwise.
1314  */
1315 static boolean_t
1316 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1317 {
1318         return ((ap1->ta_alen > 0) &&
1319             (ap1->ta_alen == ap2->ta_alen) &&
1320             (ap1->ta_zoneid == ap2->ta_zoneid) &&
1321             (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1322 }
1323 
1324 /*
1325  * This function is called whenever an endpoint is found in the hash table.
1326  */
1327 /* ARGSUSED0 */
1328 static void
1329 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1330 {
1331         tl_refhold((tl_endpt_t *)val);
1332 }
1333 
1334 /*
1335  * Address hash function.
1336  */
1337 /* ARGSUSED */
1338 static uint_t
1339 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1340 {
1341         tl_addr_t *ap = (tl_addr_t *)key;
1342         size_t  len = ap->ta_alen;
1343         uchar_t *p = ap->ta_abuf;
1344         uint_t i, g;
1345 
1346         ASSERT((len > 0) && (p != NULL));
1347 
1348         for (i = ap->ta_zoneid; len -- != 0; p++) {
1349                 i = (i << 4) + (*p);
1350                 if ((g = (i & 0xf0000000U)) != 0) {
1351                         i ^= (g >> 24);
1352                         i ^= g;
1353                 }
1354         }
1355         return (i);
1356 }
1357 
1358 /*
1359  * This function is used by hash lookups. It compares two generic addresses.
1360  */
1361 static int
1362 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1363 {
1364 #ifdef  DEBUG
1365         tl_addr_t *ap1 = (tl_addr_t *)key1;
1366         tl_addr_t *ap2 = (tl_addr_t *)key2;
1367 
1368         ASSERT(key1 != NULL);
1369         ASSERT(key2 != NULL);
1370 
1371         ASSERT(ap1->ta_abuf != NULL);
1372         ASSERT(ap2->ta_abuf != NULL);
1373         ASSERT(ap1->ta_alen > 0);
1374         ASSERT(ap2->ta_alen > 0);
1375 #endif
1376 
1377         return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1378 }
1379 
1380 /*
1381  * Prevent endpoint from closing if possible.
1382  * Return B_TRUE on success, B_FALSE on failure.
1383  */
1384 static boolean_t
1385 tl_noclose(tl_endpt_t *tep)
1386 {
1387         boolean_t rc = B_FALSE;
1388 
1389         mutex_enter(&tep->te_closelock);
1390         if (! tep->te_closing) {
1391                 ASSERT(tep->te_closewait == 0);
1392                 tep->te_closewait++;
1393                 rc = B_TRUE;
1394         }
1395         mutex_exit(&tep->te_closelock);
1396         return (rc);
1397 }
1398 
1399 /*
1400  * Allow endpoint to close if needed.
1401  */
1402 static void
1403 tl_closeok(tl_endpt_t *tep)
1404 {
1405         ASSERT(tep->te_closewait > 0);
1406         mutex_enter(&tep->te_closelock);
1407         ASSERT(tep->te_closewait == 1);
1408         tep->te_closewait--;
1409         cv_signal(&tep->te_closecv);
1410         mutex_exit(&tep->te_closelock);
1411 }
1412 
1413 /*
1414  * STREAMS open entry point.
1415  */
1416 /* ARGSUSED */
1417 static int
1418 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t  *credp)
1419 {
1420         tl_endpt_t *tep;
1421         minor_t     minor = getminor(*devp);
1422 
1423         /*
1424          * Driver is called directly. Both CLONEOPEN and MODOPEN
1425          * are illegal
1426          */
1427         if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1428                 return (ENXIO);
1429 
1430         if (rq->q_ptr != NULL)
1431                 return (0);
1432 
1433         /* Minor number should specify the mode used for the driver. */
1434         if ((minor >= TL_UNUSED))
1435                 return (ENXIO);
1436 
1437         if (oflag & SO_SOCKSTR) {
1438                 minor |= TL_SOCKET;
1439         }
1440 
1441         tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1442         tep->te_refcnt = 1;
1443         tep->te_cpid = curproc->p_pid;
1444         rq->q_ptr = WR(rq)->q_ptr = tep;
1445         tep->te_state = TS_UNBND;
1446         tep->te_credp = credp;
1447         crhold(credp);
1448         tep->te_zoneid = getzoneid();
1449 
1450         tep->te_flag = minor & TL_MINOR_MASK;
1451         tep->te_transport = &tl_transports[minor];
1452 
1453         /* Allocate a unique minor number for this instance. */
1454         tep->te_minor = (minor_t)id_alloc(tl_minors);
1455 
1456         /* Reserve hash handle for bind(). */
1457         (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1458 
1459         /* Transport-specific initialization */
1460         if (IS_COTS(tep)) {
1461                 /* Use private serializer */
1462                 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1463 
1464                 /* Create list for pending connections */
1465                 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1466                     offsetof(tl_icon_t, ti_node));
1467                 tep->te_qlen = 0;
1468                 tep->te_nicon = 0;
1469                 tep->te_oconp = NULL;
1470                 tep->te_conp = NULL;
1471         } else {
1472                 /* Use shared serializer */
1473                 tep->te_ser = tep->te_transport->tr_serializer;
1474                 bzero(&tep->te_flows, sizeof (list_node_t));
1475                 /* Create list for flow control */
1476                 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1477                     offsetof(tl_endpt_t, te_flows));
1478                 tep->te_flowq = NULL;
1479                 tep->te_lastep = NULL;
1480 
1481         }
1482 
1483         /* Initialize endpoint address */
1484         if (IS_SOCKET(tep)) {
1485                 /* Socket-specific address handling. */
1486                 tep->te_alen = TL_SOUX_ADDRLEN;
1487                 tep->te_abuf = &tep->te_uxaddr;
1488                 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1489                 tep->te_magic = SOU_MAGIC_IMPLICIT;
1490         } else {
1491                 tep->te_alen = -1;
1492                 tep->te_abuf = NULL;
1493         }
1494 
1495         /* clone the driver */
1496         *devp = makedevice(getmajor(*devp), tep->te_minor);
1497 
1498         tep->te_rq = rq;
1499         tep->te_wq = WR(rq);
1500 
1501 #ifdef  _ILP32
1502         if (IS_SOCKET(tep))
1503                 tep->te_acceptor_id = tep->te_minor;
1504         else
1505                 tep->te_acceptor_id = (t_uscalar_t)rq;
1506 #else
1507         tep->te_acceptor_id = tep->te_minor;
1508 #endif  /* _ILP32 */
1509 
1510 
1511         qprocson(rq);
1512 
1513         /*
1514          * Insert acceptor ID in the hash. The AI hash always sleeps on
1515          * insertion so insertion can't fail.
1516          */
1517         (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1518             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1519             (mod_hash_val_t)tep);
1520 
1521         return (0);
1522 }
1523 
1524 /* ARGSUSED1 */
1525 static int
1526 tl_close(queue_t *rq, int flag, cred_t *credp)
1527 {
1528         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1529         tl_endpt_t *elp = NULL;
1530         queue_t *wq = tep->te_wq;
1531         int rc;
1532 
1533         ASSERT(wq == WR(rq));
1534 
1535         /*
1536          * Remove the endpoint from acceptor hash.
1537          */
1538         rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1539             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1540             (mod_hash_val_t *)&elp);
1541         ASSERT(rc == 0 && tep == elp);
1542         if ((rc != 0) || (tep != elp)) {
1543                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1544                     SL_TRACE|SL_ERROR,
1545                     "tl_close:inconsistency in AI hash"));
1546         }
1547 
1548         /*
1549          * Wait till close is safe, then mark endpoint as closing.
1550          */
1551         mutex_enter(&tep->te_closelock);
1552         while (tep->te_closewait)
1553                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1554         tep->te_closing = B_TRUE;
1555         /*
1556          * Will wait for the serializer part of the close to finish, so set
1557          * te_closewait now.
1558          */
1559         tep->te_closewait = 1;
1560         tep->te_nowsrv = B_FALSE;
1561         mutex_exit(&tep->te_closelock);
1562 
1563         /*
1564          * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1565          * It is safe because close will wait for tl_close_ser to finish.
1566          */
1567         tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1568 
1569         /*
1570          * Wait for the first phase of close to complete before qprocsoff().
1571          */
1572         mutex_enter(&tep->te_closelock);
1573         while (tep->te_closewait)
1574                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1575         mutex_exit(&tep->te_closelock);
1576 
1577         qprocsoff(rq);
1578 
1579         if (tep->te_bufcid) {
1580                 qunbufcall(rq, tep->te_bufcid);
1581                 tep->te_bufcid = 0;
1582         }
1583         if (tep->te_timoutid) {
1584                 (void) quntimeout(rq, tep->te_timoutid);
1585                 tep->te_timoutid = 0;
1586         }
1587 
1588         /*
1589          * Finish close behind serializer.
1590          *
1591          * For a CLTS endpoint increase a refcount and continue close processing
1592          * with serializer protection. This processing may happen asynchronously
1593          * with the completion of tl_close().
1594          *
1595          * Fot a COTS endpoint wait before destroying tep since the serializer
1596          * may go away together with tep and we need to destroy serializer
1597          * outside of serializer context.
1598          */
1599         ASSERT(tep->te_closewait == 0);
1600         if (IS_COTS(tep))
1601                 tep->te_closewait = 1;
1602         else
1603                 tl_refhold(tep);
1604 
1605         tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1606 
1607         /*
1608          * For connection-oriented transports wait for all serializer activity
1609          * to settle down.
1610          */
1611         if (IS_COTS(tep)) {
1612                 mutex_enter(&tep->te_closelock);
1613                 while (tep->te_closewait)
1614                         cv_wait(&tep->te_closecv, &tep->te_closelock);
1615                 mutex_exit(&tep->te_closelock);
1616         }
1617 
1618         crfree(tep->te_credp);
1619         tep->te_credp = NULL;
1620         tep->te_wq = NULL;
1621         tl_refrele(tep);
1622         /*
1623          * tep is likely to be destroyed now, so can't reference it any more.
1624          */
1625 
1626         rq->q_ptr = wq->q_ptr = NULL;
1627         return (0);
1628 }
1629 
1630 /*
1631  * First phase of close processing done behind the serializer.
1632  *
1633  * Do not drop the reference in the end - tl_close() wants this reference to
1634  * stay.
1635  */
1636 /* ARGSUSED0 */
1637 static void
1638 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1639 {
1640         ASSERT(tep->te_closing);
1641         ASSERT(tep->te_closewait == 1);
1642         ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1643 
1644         tep->te_flag |= TL_CLOSE_SER;
1645 
1646         /*
1647          * Drain out all messages on queue except for TL_TICOTS where the
1648          * abortive release semantics permit discarding of data on close
1649          */
1650         if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1651                 tl_wsrv_ser(NULL, tep);
1652         }
1653 
1654         /* Remove address from hash table. */
1655         tl_addr_unbind(tep);
1656         /*
1657          * qprocsoff() gets confused when q->q_next is not NULL on the write
1658          * queue of the driver, so clear these before qprocsoff() is called.
1659          * Also clear q_next for the peer since this queue is going away.
1660          */
1661         if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1662                 tl_endpt_t *peer_tep = tep->te_conp;
1663 
1664                 tep->te_wq->q_next = NULL;
1665                 if ((peer_tep != NULL) && !peer_tep->te_closing)
1666                         peer_tep->te_wq->q_next = NULL;
1667         }
1668 
1669         tep->te_rq = NULL;
1670 
1671         /* wake up tl_close() */
1672         tl_closeok(tep);
1673         tl_serializer_exit(tep);
1674 }
1675 
1676 /*
1677  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1678  * the reference for CLTS.
1679  *
1680  * Called from serializer. Should drop reference count for CLTS only.
1681  */
1682 /* ARGSUSED0 */
1683 static void
1684 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1685 {
1686         ASSERT(tep->te_closing);
1687         IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1688         IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1689 
1690         tep->te_state = -1;  /* Uninitialized */
1691         if (IS_COTS(tep)) {
1692                 tl_co_unconnect(tep);
1693         } else {
1694                 /* Connectionless specific cleanup */
1695                 TL_REMOVE_PEER(tep->te_lastep);
1696                 /*
1697                  * Backenable anybody that is flow controlled waiting for
1698                  * this endpoint.
1699                  */
1700                 tl_cl_backenable(tep);
1701                 if (tep->te_flowq != NULL) {
1702                         list_remove(&(tep->te_flowq->te_flowlist), tep);
1703                         tep->te_flowq = NULL;
1704                 }
1705         }
1706 
1707         tl_serializer_exit(tep);
1708         if (IS_COTS(tep))
1709                 tl_closeok(tep);
1710         else
1711                 tl_refrele(tep);
1712 }
1713 
1714 /*
1715  * STREAMS write-side put procedure.
1716  * Enter serializer for most of the processing.
1717  *
1718  * The T_CONN_REQ is processed outside of serializer.
1719  */
1720 static void
1721 tl_wput(queue_t *wq, mblk_t *mp)
1722 {
1723         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
1724         ssize_t                 msz = MBLKL(mp);
1725         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
1726         tlproc_t                *tl_proc = NULL;
1727 
1728         switch (DB_TYPE(mp)) {
1729         case M_DATA:
1730                 /* Only valid for connection-oriented transports */
1731                 if (IS_CLTS(tep)) {
1732                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1733                             SL_TRACE|SL_ERROR,
1734                             "tl_wput:M_DATA invalid for ticlts driver"));
1735                         tl_merror(wq, mp, EPROTO);
1736                         return;
1737                 }
1738                 tl_proc = tl_wput_data_ser;
1739                 break;
1740 
1741         case M_IOCTL:
1742                 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1743                 case TL_IOC_CREDOPT:
1744                         /* FALLTHROUGH */
1745                 case TL_IOC_UCREDOPT:
1746                         /*
1747                          * Serialize endpoint state change.
1748                          */
1749                         tl_proc = tl_do_ioctl_ser;
1750                         break;
1751 
1752                 default:
1753                         miocnak(wq, mp, 0, EINVAL);
1754                         return;
1755                 }
1756                 break;
1757 
1758         case M_FLUSH:
1759                 /*
1760                  * do canonical M_FLUSH processing
1761                  */
1762                 if (*mp->b_rptr & FLUSHW) {
1763                         flushq(wq, FLUSHALL);
1764                         *mp->b_rptr &= ~FLUSHW;
1765                 }
1766                 if (*mp->b_rptr & FLUSHR) {
1767                         flushq(RD(wq), FLUSHALL);
1768                         qreply(wq, mp);
1769                 } else {
1770                         freemsg(mp);
1771                 }
1772                 return;
1773 
1774         case M_PROTO:
1775                 if (msz < sizeof (prim->type)) {
1776                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1777                             SL_TRACE|SL_ERROR,
1778                             "tl_wput:M_PROTO data too short"));
1779                         tl_merror(wq, mp, EPROTO);
1780                         return;
1781                 }
1782                 switch (prim->type) {
1783                 case T_OPTMGMT_REQ:
1784                 case T_SVR4_OPTMGMT_REQ:
1785                         /*
1786                          * Process TPI option management requests immediately
1787                          * in put procedure regardless of in-order processing
1788                          * of already queued messages.
1789                          * (Note: This driver supports AF_UNIX socket
1790                          * implementation.  Unless we implement this processing,
1791                          * setsockopt() on socket endpoint will block on flow
1792                          * controlled endpoints which it should not. That is
1793                          * required for successful execution of VSU socket tests
1794                          * and is consistent with BSD socket behavior).
1795                          */
1796                         tl_optmgmt(wq, mp);
1797                         return;
1798                 case O_T_BIND_REQ:
1799                 case T_BIND_REQ:
1800                         tl_proc = tl_bind_ser;
1801                         break;
1802                 case T_CONN_REQ:
1803                         if (IS_CLTS(tep)) {
1804                                 tl_merror(wq, mp, EPROTO);
1805                                 return;
1806                         }
1807                         tl_conn_req(wq, mp);
1808                         return;
1809                 case T_DATA_REQ:
1810                 case T_OPTDATA_REQ:
1811                 case T_EXDATA_REQ:
1812                 case T_ORDREL_REQ:
1813                         tl_proc = tl_putq_ser;
1814                         break;
1815                 case T_UNITDATA_REQ:
1816                         if (IS_COTS(tep) ||
1817                             (msz < sizeof (struct T_unitdata_req))) {
1818                                 tl_merror(wq, mp, EPROTO);
1819                                 return;
1820                         }
1821                         if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1822                                 tl_proc = tl_unitdata_ser;
1823                         } else {
1824                                 tl_proc = tl_putq_ser;
1825                         }
1826                         break;
1827                 default:
1828                         /*
1829                          * process in service procedure if message already
1830                          * queued (maintain in-order processing)
1831                          */
1832                         if (wq->q_first != NULL) {
1833                                 tl_proc = tl_putq_ser;
1834                         } else {
1835                                 tl_proc = tl_wput_ser;
1836                         }
1837                         break;
1838                 }
1839                 break;
1840 
1841         case M_PCPROTO:
1842                 /*
1843                  * Check that the message has enough data to figure out TPI
1844                  * primitive.
1845                  */
1846                 if (msz < sizeof (prim->type)) {
1847                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1848                             SL_TRACE|SL_ERROR,
1849                             "tl_wput:M_PCROTO data too short"));
1850                         tl_merror(wq, mp, EPROTO);
1851                         return;
1852                 }
1853                 switch (prim->type) {
1854                 case T_CAPABILITY_REQ:
1855                         tl_capability_req(mp, tep);
1856                         return;
1857                 case T_INFO_REQ:
1858                         tl_proc = tl_info_req_ser;
1859                         break;
1860                 case T_ADDR_REQ:
1861                         tl_proc = tl_addr_req_ser;
1862                         break;
1863 
1864                 default:
1865                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1866                             SL_TRACE|SL_ERROR,
1867                             "tl_wput:unknown TPI msg primitive"));
1868                         tl_merror(wq, mp, EPROTO);
1869                         return;
1870                 }
1871                 break;
1872         default:
1873                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1874                     "tl_wput:default:unexpected Streams message"));
1875                 freemsg(mp);
1876                 return;
1877         }
1878 
1879         /*
1880          * Continue processing via serializer.
1881          */
1882         ASSERT(tl_proc != NULL);
1883         tl_refhold(tep);
1884         tl_serializer_enter(tep, tl_proc, mp);
1885 }
1886 
1887 /*
1888  * Place message on the queue while preserving order.
1889  */
1890 static void
1891 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1892 {
1893         if (tep->te_closing) {
1894                 tl_wput_ser(mp, tep);
1895         } else {
1896                 TL_PUTQ(tep, mp);
1897                 tl_serializer_exit(tep);
1898                 tl_refrele(tep);
1899         }
1900 
1901 }
1902 
1903 static void
1904 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1905 {
1906         ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1907 
1908         switch (DB_TYPE(mp)) {
1909         case M_DATA:
1910                 tl_data(mp, tep);
1911                 break;
1912         case M_PROTO:
1913                 tl_do_proto(mp, tep);
1914                 break;
1915         default:
1916                 freemsg(mp);
1917                 break;
1918         }
1919 }
1920 
1921 /*
1922  * Write side put procedure called from serializer.
1923  */
1924 static void
1925 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1926 {
1927         tl_wput_common_ser(mp, tep);
1928         tl_serializer_exit(tep);
1929         tl_refrele(tep);
1930 }
1931 
1932 /*
1933  * M_DATA processing. Called from serializer.
1934  */
1935 static void
1936 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1937 {
1938         tl_endpt_t      *peer_tep = tep->te_conp;
1939         queue_t         *peer_rq;
1940 
1941         ASSERT(DB_TYPE(mp) == M_DATA);
1942         ASSERT(IS_COTS(tep));
1943 
1944         IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1945 
1946         /*
1947          * fastpath for data. Ignore flow control if tep is closing.
1948          */
1949         if ((peer_tep != NULL) &&
1950             !peer_tep->te_closing &&
1951             ((tep->te_state == TS_DATA_XFER) ||
1952             (tep->te_state == TS_WREQ_ORDREL)) &&
1953             (tep->te_wq != NULL) &&
1954             (tep->te_wq->q_first == NULL) &&
1955             ((peer_tep->te_state == TS_DATA_XFER) ||
1956             (peer_tep->te_state == TS_WREQ_ORDREL))  &&
1957             ((peer_rq = peer_tep->te_rq) != NULL) &&
1958             (canputnext(peer_rq) || tep->te_closing)) {
1959                 putnext(peer_rq, mp);
1960         } else if (tep->te_closing) {
1961                 /*
1962                  * It is possible that by the time we got here tep started to
1963                  * close. If the write queue is not empty, and the state is
1964                  * TS_DATA_XFER the data should be delivered in order, so we
1965                  * call putq() instead of freeing the data.
1966                  */
1967                 if ((tep->te_wq != NULL) &&
1968                     ((tep->te_state == TS_DATA_XFER) ||
1969                     (tep->te_state == TS_WREQ_ORDREL))) {
1970                         TL_PUTQ(tep, mp);
1971                 } else {
1972                         freemsg(mp);
1973                 }
1974         } else {
1975                 TL_PUTQ(tep, mp);
1976         }
1977 
1978         tl_serializer_exit(tep);
1979         tl_refrele(tep);
1980 }
1981 
1982 /*
1983  * Write side service routine.
1984  *
1985  * All actual processing happens within serializer which is entered
1986  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1987  * messages that need processing may have arrived, so tl_wsrv repeats until
1988  * queue is empty or te_nowsrv is set.
1989  */
1990 static void
1991 tl_wsrv(queue_t *wq)
1992 {
1993         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1994 
1995         while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1996                 mutex_enter(&tep->te_srv_lock);
1997                 ASSERT(tep->te_wsrv_active == B_FALSE);
1998                 tep->te_wsrv_active = B_TRUE;
1999                 mutex_exit(&tep->te_srv_lock);
2000 
2001                 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2002 
2003                 /*
2004                  * Wait for serializer job to complete.
2005                  */
2006                 mutex_enter(&tep->te_srv_lock);
2007                 while (tep->te_wsrv_active) {
2008                         cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2009                 }
2010                 cv_signal(&tep->te_srv_cv);
2011                 mutex_exit(&tep->te_srv_lock);
2012         }
2013 }
2014 
2015 /*
2016  * Serialized write side processing of the STREAMS queue.
2017  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2018  * is NULL.
2019  */
2020 static void
2021 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2022 {
2023         mblk_t *mp;
2024         queue_t *wq = tep->te_wq;
2025 
2026         ASSERT(wq != NULL);
2027         while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2028                 tl_wput_common_ser(mp, tep);
2029         }
2030 
2031         /*
2032          * Wakeup service routine unless called from close.
2033          * If ser_mp is specified, the caller is tl_wsrv().
2034          * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2035          * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2036          * be no matching tl_serializer_exit() in this case.
2037          * Also, there is no need to wakeup anyone since tl_close_ser() is not
2038          * waiting on te_srv_cv.
2039          */
2040         if (ser_mp != NULL) {
2041                 /*
2042                  * We are called from tl_wsrv.
2043                  */
2044                 mutex_enter(&tep->te_srv_lock);
2045                 ASSERT(tep->te_wsrv_active);
2046                 tep->te_wsrv_active = B_FALSE;
2047                 cv_signal(&tep->te_srv_cv);
2048                 mutex_exit(&tep->te_srv_lock);
2049                 tl_serializer_exit(tep);
2050         }
2051 }
2052 
2053 /*
2054  * Called when the stream is backenabled. Enter serializer and qenable everyone
2055  * flow controlled by tep.
2056  *
2057  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2058  * is possible that two instances of tl_rsrv will be running reusing the same
2059  * rsrv mblk.
2060  */
2061 static void
2062 tl_rsrv(queue_t *rq)
2063 {
2064         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2065 
2066         ASSERT(rq->q_first == NULL);
2067         ASSERT(tep->te_rsrv_active == 0);
2068 
2069         tep->te_rsrv_active = B_TRUE;
2070         tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2071         /*
2072          * Wait for serializer job to complete.
2073          */
2074         mutex_enter(&tep->te_srv_lock);
2075         while (tep->te_rsrv_active) {
2076                 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2077         }
2078         cv_signal(&tep->te_srv_cv);
2079         mutex_exit(&tep->te_srv_lock);
2080 }
2081 
2082 /* ARGSUSED */
2083 static void
2084 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2085 {
2086         tl_endpt_t *peer_tep;
2087 
2088         if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2089                 tl_cl_backenable(tep);
2090         } else if (
2091             IS_COTS(tep) &&
2092             ((peer_tep = tep->te_conp) != NULL) &&
2093             !peer_tep->te_closing &&
2094             ((tep->te_state == TS_DATA_XFER) ||
2095             (tep->te_state == TS_WIND_ORDREL)||
2096             (tep->te_state == TS_WREQ_ORDREL))) {
2097                 TL_QENABLE(peer_tep);
2098         }
2099 
2100         /*
2101          * Wakeup read side service routine.
2102          */
2103         mutex_enter(&tep->te_srv_lock);
2104         ASSERT(tep->te_rsrv_active);
2105         tep->te_rsrv_active = B_FALSE;
2106         cv_signal(&tep->te_srv_cv);
2107         mutex_exit(&tep->te_srv_lock);
2108         tl_serializer_exit(tep);
2109 }
2110 
2111 /*
2112  * process M_PROTO messages. Always called from serializer.
2113  */
2114 static void
2115 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2116 {
2117         ssize_t                 msz = MBLKL(mp);
2118         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
2119 
2120         /* Message size was validated by tl_wput(). */
2121         ASSERT(msz >= sizeof (prim->type));
2122 
2123         switch (prim->type) {
2124         case T_UNBIND_REQ:
2125                 tl_unbind(mp, tep);
2126                 break;
2127 
2128         case T_ADDR_REQ:
2129                 tl_addr_req(mp, tep);
2130                 break;
2131 
2132         case O_T_CONN_RES:
2133         case T_CONN_RES:
2134                 if (IS_CLTS(tep)) {
2135                         tl_merror(tep->te_wq, mp, EPROTO);
2136                         break;
2137                 }
2138                 tl_conn_res(mp, tep);
2139                 break;
2140 
2141         case T_DISCON_REQ:
2142                 if (IS_CLTS(tep)) {
2143                         tl_merror(tep->te_wq, mp, EPROTO);
2144                         break;
2145                 }
2146                 tl_discon_req(mp, tep);
2147                 break;
2148 
2149         case T_DATA_REQ:
2150                 if (IS_CLTS(tep)) {
2151                         tl_merror(tep->te_wq, mp, EPROTO);
2152                         break;
2153                 }
2154                 tl_data(mp, tep);
2155                 break;
2156 
2157         case T_OPTDATA_REQ:
2158                 if (IS_CLTS(tep)) {
2159                         tl_merror(tep->te_wq, mp, EPROTO);
2160                         break;
2161                 }
2162                 tl_data(mp, tep);
2163                 break;
2164 
2165         case T_EXDATA_REQ:
2166                 if (IS_CLTS(tep)) {
2167                         tl_merror(tep->te_wq, mp, EPROTO);
2168                         break;
2169                 }
2170                 tl_exdata(mp, tep);
2171                 break;
2172 
2173         case T_ORDREL_REQ:
2174                 if (! IS_COTSORD(tep)) {
2175                         tl_merror(tep->te_wq, mp, EPROTO);
2176                         break;
2177                 }
2178                 tl_ordrel(mp, tep);
2179                 break;
2180 
2181         case T_UNITDATA_REQ:
2182                 if (IS_COTS(tep)) {
2183                         tl_merror(tep->te_wq, mp, EPROTO);
2184                         break;
2185                 }
2186                 tl_unitdata(mp, tep);
2187                 break;
2188 
2189         default:
2190                 tl_merror(tep->te_wq, mp, EPROTO);
2191                 break;
2192         }
2193 }
2194 
2195 /*
2196  * Process ioctl from serializer.
2197  * This is a wrapper around tl_do_ioctl().
2198  */
2199 static void
2200 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2201 {
2202         if (! tep->te_closing)
2203                 tl_do_ioctl(mp, tep);
2204         else
2205                 freemsg(mp);
2206 
2207         tl_serializer_exit(tep);
2208         tl_refrele(tep);
2209 }
2210 
2211 static void
2212 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2213 {
2214         struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2215         int cmd = iocbp->ioc_cmd;
2216         queue_t *wq = tep->te_wq;
2217         int error;
2218         int thisopt, otheropt;
2219 
2220         ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2221 
2222         switch (cmd) {
2223         case TL_IOC_CREDOPT:
2224                 if (cmd == TL_IOC_CREDOPT) {
2225                         thisopt = TL_SETCRED;
2226                         otheropt = TL_SETUCRED;
2227                 } else {
2228                         /* FALLTHROUGH */
2229         case TL_IOC_UCREDOPT:
2230                         thisopt = TL_SETUCRED;
2231                         otheropt = TL_SETCRED;
2232                 }
2233                 /*
2234                  * The credentials passing does not apply to sockets.
2235                  * Only one of the cred options can be set at a given time.
2236                  */
2237                 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2238                         miocnak(wq, mp, 0, EINVAL);
2239                         return;
2240                 }
2241 
2242                 /*
2243                  * Turn on generation of credential options for
2244                  * T_conn_req, T_conn_con, T_unidata_ind.
2245                  */
2246                 error = miocpullup(mp, sizeof (uint32_t));
2247                 if (error != 0) {
2248                         miocnak(wq, mp, 0, error);
2249                         return;
2250                 }
2251                 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2252                         miocnak(wq, mp, 0, EINVAL);
2253                         return;
2254                 }
2255 
2256                 if (*(uint32_t *)mp->b_cont->b_rptr)
2257                         tep->te_flag |= thisopt;
2258                 else
2259                         tep->te_flag &= ~thisopt;
2260 
2261                 miocack(wq, mp, 0, 0);
2262                 break;
2263 
2264         default:
2265                 /* Should not be here */
2266                 miocnak(wq, mp, 0, EINVAL);
2267                 break;
2268         }
2269 }
2270 
2271 
2272 /*
2273  * send T_ERROR_ACK
2274  * Note: assumes enough memory or caller passed big enough mp
2275  *      - no recovery from allocb failures
2276  */
2277 
2278 static void
2279 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2280     t_scalar_t unix_err, t_scalar_t type)
2281 {
2282         struct T_error_ack *err_ack;
2283         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2284             M_PCPROTO, T_ERROR_ACK);
2285 
2286         if (ackmp == NULL) {
2287                 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2288                     "tl_error_ack:out of mblk memory"));
2289                 tl_merror(wq, NULL, ENOSR);
2290                 return;
2291         }
2292         err_ack = (struct T_error_ack *)ackmp->b_rptr;
2293         err_ack->ERROR_prim = type;
2294         err_ack->TLI_error = tli_err;
2295         err_ack->UNIX_error = unix_err;
2296 
2297         /*
2298          * send error ack message
2299          */
2300         qreply(wq, ackmp);
2301 }
2302 
2303 
2304 
2305 /*
2306  * send T_OK_ACK
2307  * Note: assumes enough memory or caller passed big enough mp
2308  *      - no recovery from allocb failures
2309  */
2310 static void
2311 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2312 {
2313         struct T_ok_ack *ok_ack;
2314         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2315             M_PCPROTO, T_OK_ACK);
2316 
2317         if (ackmp == NULL) {
2318                 tl_merror(wq, NULL, ENOMEM);
2319                 return;
2320         }
2321 
2322         ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2323         ok_ack->CORRECT_prim = type;
2324 
2325         (void) qreply(wq, ackmp);
2326 }
2327 
2328 /*
2329  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2330  * This is a wrapper around tl_bind().
2331  */
2332 static void
2333 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2334 {
2335         if (! tep->te_closing)
2336                 tl_bind(mp, tep);
2337         else
2338                 freemsg(mp);
2339 
2340         tl_serializer_exit(tep);
2341         tl_refrele(tep);
2342 }
2343 
2344 /*
2345  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2346  * Assumes that the endpoint is in the unbound.
2347  */
2348 static void
2349 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2350 {
2351         queue_t                 *wq = tep->te_wq;
2352         struct T_bind_ack       *b_ack;
2353         struct T_bind_req       *bind = (struct T_bind_req *)mp->b_rptr;
2354         mblk_t                  *ackmp, *bamp;
2355         soux_addr_t             ux_addr;
2356         t_uscalar_t             qlen = 0;
2357         t_scalar_t              alen, aoff;
2358         tl_addr_t               addr_req;
2359         void                    *addr_startp;
2360         ssize_t                 msz = MBLKL(mp), basize;
2361         t_scalar_t              tli_err = 0, unix_err = 0;
2362         t_scalar_t              save_prim_type = bind->PRIM_type;
2363         t_scalar_t              save_state = tep->te_state;
2364 
2365         if (tep->te_state != TS_UNBND) {
2366                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2367                     SL_TRACE|SL_ERROR,
2368                     "tl_wput:bind_request:out of state, state=%d",
2369                     tep->te_state));
2370                 tli_err = TOUTSTATE;
2371                 goto error;
2372         }
2373 
2374         if (msz < sizeof (struct T_bind_req)) {
2375                 tli_err = TSYSERR; unix_err = EINVAL;
2376                 goto error;
2377         }
2378 
2379         tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2380 
2381         ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2382             (bind->PRIM_type == T_BIND_REQ));
2383 
2384         alen = bind->ADDR_length;
2385         aoff = bind->ADDR_offset;
2386 
2387         /* negotiate max conn req pending */
2388         if (IS_COTS(tep)) {
2389                 qlen = bind->CONIND_number;
2390                 if (qlen > tl_maxqlen)
2391                         qlen = tl_maxqlen;
2392         }
2393 
2394         /*
2395          * Reserve hash handle. It can only be NULL if the endpoint is unbound
2396          * and bound again.
2397          */
2398         if ((tep->te_hash_hndl == NULL) &&
2399             ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2400             mod_hash_reserve_nosleep(tep->te_addrhash,
2401             &tep->te_hash_hndl) != 0) {
2402                 tli_err = TSYSERR; unix_err = ENOSR;
2403                 goto error;
2404         }
2405 
2406         /*
2407          * Verify address correctness.
2408          */
2409         if (IS_SOCKET(tep)) {
2410                 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2411 
2412                 if ((alen != TL_SOUX_ADDRLEN) ||
2413                     (aoff < 0) ||
2414                     (aoff + alen > msz)) {
2415                         (void) (STRLOG(TL_ID, tep->te_minor,
2416                             1, SL_TRACE|SL_ERROR,
2417                             "tl_bind: invalid socket addr"));
2418                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2419                         tli_err = TSYSERR; unix_err = EINVAL;
2420                         goto error;
2421                 }
2422                 /* Copy address from message to local buffer. */
2423                 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2424                 /*
2425                  * Check that we got correct address from sockets
2426                  */
2427                 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2428                     (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2429                         (void) (STRLOG(TL_ID, tep->te_minor,
2430                             1, SL_TRACE|SL_ERROR,
2431                             "tl_bind: invalid socket magic"));
2432                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2433                         tli_err = TSYSERR; unix_err = EINVAL;
2434                         goto error;
2435                 }
2436                 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2437                     (ux_addr.soua_vp != NULL)) {
2438                         (void) (STRLOG(TL_ID, tep->te_minor,
2439                             1, SL_TRACE|SL_ERROR,
2440                             "tl_bind: implicit addr non-empty"));
2441                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442                         tli_err = TSYSERR; unix_err = EINVAL;
2443                         goto error;
2444                 }
2445                 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2446                     (ux_addr.soua_vp == NULL)) {
2447                         (void) (STRLOG(TL_ID, tep->te_minor,
2448                             1, SL_TRACE|SL_ERROR,
2449                             "tl_bind: explicit addr empty"));
2450                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451                         tli_err = TSYSERR; unix_err = EINVAL;
2452                         goto error;
2453                 }
2454         } else {
2455                 if ((alen > 0) && ((aoff < 0) ||
2456                     ((ssize_t)(aoff + alen) > msz) ||
2457                     ((aoff + alen) < 0))) {
2458                         (void) (STRLOG(TL_ID, tep->te_minor,
2459                             1, SL_TRACE|SL_ERROR,
2460                             "tl_bind: invalid message"));
2461                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462                         tli_err = TSYSERR; unix_err = EINVAL;
2463                         goto error;
2464                 }
2465                 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2466                         (void) (STRLOG(TL_ID, tep->te_minor,
2467                             1, SL_TRACE|SL_ERROR,
2468                             "tl_bind: bad addr in  message"));
2469                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2470                         tli_err = TBADADDR;
2471                         goto error;
2472                 }
2473 #ifdef DEBUG
2474                 /*
2475                  * Mild form of ASSERT()ion to detect broken TPI apps.
2476                  * if (! assertion)
2477                  *      log warning;
2478                  */
2479                 if (! ((alen == 0 && aoff == 0) ||
2480                         (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2481                         (void) (STRLOG(TL_ID, tep->te_minor,
2482                                     3, SL_TRACE|SL_ERROR,
2483                                     "tl_bind: addr overlaps TPI message"));
2484                 }
2485 #endif
2486         }
2487 
2488         /*
2489          * Bind the address provided or allocate one if requested.
2490          * Allow rebinds with a new qlen value.
2491          */
2492         if (IS_SOCKET(tep)) {
2493                 /*
2494                  * For anonymous requests the te_ap is already set up properly
2495                  * so use minor number as an address.
2496                  * For explicit requests need to check whether the address is
2497                  * already in use.
2498                  */
2499                 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2500                         int rc;
2501 
2502                         if (tep->te_flag & TL_ADDRHASHED) {
2503                                 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2504                                 if (tep->te_vp == ux_addr.soua_vp)
2505                                         goto skip_addr_bind;
2506                                 else /* Rebind to a new address. */
2507                                         tl_addr_unbind(tep);
2508                         }
2509                         /*
2510                          * Insert address in the hash if it is not already
2511                          * there.  Since we use preallocated handle, the insert
2512                          * can fail only if the key is already present.
2513                          */
2514                         rc = mod_hash_insert_reserve(tep->te_addrhash,
2515                             (mod_hash_key_t)ux_addr.soua_vp,
2516                             (mod_hash_val_t)tep, tep->te_hash_hndl);
2517 
2518                         if (rc != 0) {
2519                                 ASSERT(rc == MH_ERR_DUPLICATE);
2520                                 /*
2521                                  * Violate O_T_BIND_REQ semantics and fail with
2522                                  * TADDRBUSY - sockets will not use any address
2523                                  * other than supplied one for explicit binds.
2524                                  */
2525                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2526                                     SL_TRACE|SL_ERROR,
2527                                     "tl_bind:requested addr %p is busy",
2528                                     ux_addr.soua_vp));
2529                                 tli_err = TADDRBUSY; unix_err = 0;
2530                                 goto error;
2531                         }
2532                         tep->te_uxaddr = ux_addr;
2533                         tep->te_flag |= TL_ADDRHASHED;
2534                         tep->te_hash_hndl = NULL;
2535                 }
2536         } else if (alen == 0) {
2537                 /*
2538                  * assign any free address
2539                  */
2540                 if (! tl_get_any_addr(tep, NULL)) {
2541                         (void) (STRLOG(TL_ID, tep->te_minor,
2542                             1, SL_TRACE|SL_ERROR,
2543                             "tl_bind:failed to get buffer for any "
2544                             "address"));
2545                         tli_err = TSYSERR; unix_err = ENOSR;
2546                         goto error;
2547                 }
2548         } else {
2549                 addr_req.ta_alen = alen;
2550                 addr_req.ta_abuf = (mp->b_rptr + aoff);
2551                 addr_req.ta_zoneid = tep->te_zoneid;
2552 
2553                 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2554                 if (tep->te_abuf == NULL) {
2555                         tli_err = TSYSERR; unix_err = ENOSR;
2556                         goto error;
2557                 }
2558                 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2559                 tep->te_alen = alen;
2560 
2561                 if (mod_hash_insert_reserve(tep->te_addrhash,
2562                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2563                     tep->te_hash_hndl) != 0) {
2564                         if (save_prim_type == T_BIND_REQ) {
2565                                 /*
2566                                  * The bind semantics for this primitive
2567                                  * require a failure if the exact address
2568                                  * requested is busy
2569                                  */
2570                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2571                                     SL_TRACE|SL_ERROR,
2572                                     "tl_bind:requested addr is busy"));
2573                                 tli_err = TADDRBUSY; unix_err = 0;
2574                                 goto error;
2575                         }
2576 
2577                         /*
2578                          * O_T_BIND_REQ semantics say if address if requested
2579                          * address is busy, bind to any available free address
2580                          */
2581                         if (! tl_get_any_addr(tep, &addr_req)) {
2582                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2583                                     SL_TRACE|SL_ERROR,
2584                                     "tl_bind:unable to get any addr buf"));
2585                                 tli_err = TSYSERR; unix_err = ENOMEM;
2586                                 goto error;
2587                         }
2588                 } else {
2589                         tep->te_flag |= TL_ADDRHASHED;
2590                         tep->te_hash_hndl = NULL;
2591                 }
2592         }
2593 
2594         ASSERT(tep->te_alen >= 0);
2595 
2596 skip_addr_bind:
2597         /*
2598          * prepare T_BIND_ACK TPI message
2599          */
2600         basize = sizeof (struct T_bind_ack) + tep->te_alen;
2601         bamp = reallocb(mp, basize, 0);
2602         if (bamp == NULL) {
2603                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2604                     "tl_wput:tl_bind: allocb failed"));
2605                 /*
2606                  * roll back state changes
2607                  */
2608                 tl_addr_unbind(tep);
2609                 tep->te_state = TS_UNBND;
2610                 tl_memrecover(wq, mp, basize);
2611                 return;
2612         }
2613 
2614         DB_TYPE(bamp) = M_PCPROTO;
2615         bamp->b_wptr = bamp->b_rptr + basize;
2616         b_ack = (struct T_bind_ack *)bamp->b_rptr;
2617         b_ack->PRIM_type = T_BIND_ACK;
2618         b_ack->CONIND_number = qlen;
2619         b_ack->ADDR_length = tep->te_alen;
2620         b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2621         addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2622         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2623 
2624         if (IS_COTS(tep)) {
2625                 tep->te_qlen = qlen;
2626                 if (qlen > 0)
2627                         tep->te_flag |= TL_LISTENER;
2628         }
2629 
2630         tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2631         /*
2632          * send T_BIND_ACK message
2633          */
2634         (void) qreply(wq, bamp);
2635         return;
2636 
2637 error:
2638         ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2639         if (ackmp == NULL) {
2640                 /*
2641                  * roll back state changes
2642                  */
2643                 tep->te_state = save_state;
2644                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2645                 return;
2646         }
2647         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2648         tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2649 }
2650 
2651 /*
2652  * Process T_UNBIND_REQ.
2653  * Called from serializer.
2654  */
2655 static void
2656 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2657 {
2658         queue_t *wq;
2659         mblk_t *ackmp;
2660 
2661         if (tep->te_closing) {
2662                 freemsg(mp);
2663                 return;
2664         }
2665 
2666         wq = tep->te_wq;
2667 
2668         /*
2669          * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2670          * ==> allocate for T_ERROR_ACK (known max)
2671          */
2672         if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2673                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2674                 return;
2675         }
2676         /*
2677          * memory resources committed
2678          * Note: no message validation. T_UNBIND_REQ message is
2679          * same size as PRIM_type field so already verified earlier.
2680          */
2681 
2682         /*
2683          * validate state
2684          */
2685         if (tep->te_state != TS_IDLE) {
2686                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2687                     SL_TRACE|SL_ERROR,
2688                     "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2689                     tep->te_state));
2690                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2691                 return;
2692         }
2693         tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2694 
2695         /*
2696          * TPI says on T_UNBIND_REQ:
2697          *    send up a M_FLUSH to flush both
2698          *    read and write queues
2699          */
2700         (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2701 
2702         if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2703             tep->te_magic != SOU_MAGIC_EXPLICIT) {
2704 
2705                 /*
2706                  * Sockets use bind with qlen==0 followed by bind() to
2707                  * the same address with qlen > 0 for listeners.
2708                  * We allow rebind with a new qlen value.
2709                  */
2710                 tl_addr_unbind(tep);
2711         }
2712 
2713         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2714         /*
2715          * send  T_OK_ACK
2716          */
2717         tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2718 }
2719 
2720 
2721 /*
2722  * Option management code from drv/ip is used here
2723  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2724  *      database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2725  *      However, that is what we want as that option is 'unorthodox'
2726  *      and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2727  *      and not in T_SVR4_OPTMGMT_REQ/ACK
2728  * Note2: use of optcom_req means this routine is an exception to
2729  *       recovery from allocb() failures.
2730  */
2731 
2732 static void
2733 tl_optmgmt(queue_t *wq, mblk_t *mp)
2734 {
2735         tl_endpt_t *tep;
2736         mblk_t *ackmp;
2737         union T_primitives *prim;
2738         cred_t *cr;
2739 
2740         tep = (tl_endpt_t *)wq->q_ptr;
2741         prim = (union T_primitives *)mp->b_rptr;
2742 
2743         /*
2744          * All Solaris components should pass a db_credp
2745          * for this TPI message, hence we ASSERT.
2746          * But in case there is some other M_PROTO that looks
2747          * like a TPI message sent by some other kernel
2748          * component, we check and return an error.
2749          */
2750         cr = msg_getcred(mp, NULL);
2751         ASSERT(cr != NULL);
2752         if (cr == NULL) {
2753                 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2754                 return;
2755         }
2756 
2757         /*  all states OK for AF_UNIX options ? */
2758         if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2759             prim->type == T_SVR4_OPTMGMT_REQ) {
2760                 /*
2761                  * Broken TLI semantics that options can only be managed
2762                  * in TS_IDLE state. Needed for Sparc ABI test suite that
2763                  * tests this TLI (mis)feature using this device driver.
2764                  */
2765                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2766                     SL_TRACE|SL_ERROR,
2767                     "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2768                     tep->te_state));
2769                 /*
2770                  * preallocate memory for T_ERROR_ACK
2771                  */
2772                 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2773                 if (! ackmp) {
2774                         tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2775                         return;
2776                 }
2777 
2778                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2779                 freemsg(mp);
2780                 return;
2781         }
2782 
2783         /*
2784          * call common option management routine from drv/ip
2785          */
2786         if (prim->type == T_SVR4_OPTMGMT_REQ) {
2787                 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2788         } else {
2789                 ASSERT(prim->type == T_OPTMGMT_REQ);
2790                 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2791         }
2792 }
2793 
2794 /*
2795  * Handle T_conn_req - the driver part of accept().
2796  * If TL_SET[U]CRED generate the credentials options.
2797  * If this is a socket pass through options unmodified.
2798  * For sockets generate the T_CONN_CON here instead of
2799  * waiting for the T_CONN_RES.
2800  */
2801 static void
2802 tl_conn_req(queue_t *wq, mblk_t *mp)
2803 {
2804         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
2805         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_rptr;
2806         ssize_t                 msz = MBLKL(mp);
2807         t_scalar_t              alen, aoff, olen, ooff, err = 0;
2808         tl_endpt_t              *peer_tep = NULL;
2809         mblk_t                  *ackmp;
2810         mblk_t                  *dimp;
2811         struct T_discon_ind     *di;
2812         soux_addr_t             ux_addr;
2813         tl_addr_t               dst;
2814 
2815         ASSERT(IS_COTS(tep));
2816 
2817         if (tep->te_closing) {
2818                 freemsg(mp);
2819                 return;
2820         }
2821 
2822         /*
2823          * preallocate memory for:
2824          * 1. max of T_ERROR_ACK and T_OK_ACK
2825          *      ==> known max T_ERROR_ACK
2826          * 2. max of T_DISCON_IND and T_CONN_IND
2827          */
2828         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2829         if (! ackmp) {
2830                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2831                 return;
2832         }
2833         /*
2834          * memory committed for T_OK_ACK/T_ERROR_ACK now
2835          * will be committed for T_DISCON_IND/T_CONN_IND later
2836          */
2837 
2838         if (tep->te_state != TS_IDLE) {
2839                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2840                     SL_TRACE|SL_ERROR,
2841                     "tl_wput:T_CONN_REQ:out of state, state=%d",
2842                     tep->te_state));
2843                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2844                 freemsg(mp);
2845                 return;
2846         }
2847 
2848         /*
2849          * validate the message
2850          * Note: dereference fields in struct inside message only
2851          * after validating the message length.
2852          */
2853         if (msz < sizeof (struct T_conn_req)) {
2854                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2855                     "tl_conn_req:invalid message length"));
2856                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2857                 freemsg(mp);
2858                 return;
2859         }
2860         alen = creq->DEST_length;
2861         aoff = creq->DEST_offset;
2862         olen = creq->OPT_length;
2863         ooff = creq->OPT_offset;
2864         if (olen == 0)
2865                 ooff = 0;
2866 
2867         if (IS_SOCKET(tep)) {
2868                 if ((alen != TL_SOUX_ADDRLEN) ||
2869                     (aoff < 0) ||
2870                     (aoff + alen > msz) ||
2871                     (alen > msz - sizeof (struct T_conn_req))) {
2872                         (void) (STRLOG(TL_ID, tep->te_minor,
2873                                     1, SL_TRACE|SL_ERROR,
2874                                     "tl_conn_req: invalid socket addr"));
2875                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2876                         freemsg(mp);
2877                         return;
2878                 }
2879                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2880                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2881                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2882                         (void) (STRLOG(TL_ID, tep->te_minor,
2883                             1, SL_TRACE|SL_ERROR,
2884                             "tl_conn_req: invalid socket magic"));
2885                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2886                         freemsg(mp);
2887                         return;
2888                 }
2889         } else {
2890                 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2891                     (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2892                     ooff + olen < 0)) ||
2893                     olen < 0 || ooff < 0) {
2894                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2895                             SL_TRACE|SL_ERROR,
2896                             "tl_conn_req:invalid message"));
2897                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2898                         freemsg(mp);
2899                         return;
2900                 }
2901 
2902                 if (alen <= 0 || aoff < 0 ||
2903                     (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2904                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2905                                     SL_TRACE|SL_ERROR,
2906                                     "tl_conn_req:bad addr in message, "
2907                                     "alen=%d, msz=%ld",
2908                                     alen, msz));
2909                         tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2910                         freemsg(mp);
2911                         return;
2912                 }
2913 #ifdef DEBUG
2914                 /*
2915                  * Mild form of ASSERT()ion to detect broken TPI apps.
2916                  * if (! assertion)
2917                  *      log warning;
2918                  */
2919                 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2920                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
2921                             SL_TRACE|SL_ERROR,
2922                             "tl_conn_req: addr overlaps TPI message"));
2923                 }
2924 #endif
2925                 if (olen) {
2926                         /*
2927                          * no opts in connect req
2928                          * supported in this provider except for sockets.
2929                          */
2930                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2931                             SL_TRACE|SL_ERROR,
2932                             "tl_conn_req:options not supported "
2933                             "in message"));
2934                         tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2935                         freemsg(mp);
2936                         return;
2937                 }
2938         }
2939 
2940         /*
2941          * Prevent tep from closing on us.
2942          */
2943         if (! tl_noclose(tep)) {
2944                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2945                     "tl_conn_req:endpoint is closing"));
2946                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2947                 freemsg(mp);
2948                 return;
2949         }
2950 
2951         tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2952         /*
2953          * get endpoint to connect to
2954          * check that peer with DEST addr is bound to addr
2955          * and has CONIND_number > 0
2956          */
2957         dst.ta_alen = alen;
2958         dst.ta_abuf = mp->b_rptr + aoff;
2959         dst.ta_zoneid = tep->te_zoneid;
2960 
2961         /*
2962          * Verify if remote addr is in use
2963          */
2964         peer_tep = (IS_SOCKET(tep) ?
2965             tl_sock_find_peer(tep, &ux_addr) :
2966             tl_find_peer(tep, &dst));
2967 
2968         if (peer_tep == NULL) {
2969                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2970                     "tl_conn_req:no one at connect address"));
2971                 err = ECONNREFUSED;
2972         } else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2973                 /*
2974                  * validate that number of incoming connection is
2975                  * not to capacity on destination endpoint
2976                  */
2977                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2978                     "tl_conn_req: qlen overflow connection refused"));
2979                         err = ECONNREFUSED;
2980         }
2981 
2982         /*
2983          * Send T_DISCON_IND in case of error
2984          */
2985         if (err != 0) {
2986                 if (peer_tep != NULL)
2987                         tl_refrele(peer_tep);
2988                 /* We are still expected to send T_OK_ACK */
2989                 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2990                 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2991                 tl_closeok(tep);
2992                 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2993                     M_PROTO, T_DISCON_IND);
2994                 if (dimp == NULL) {
2995                         tl_merror(wq, NULL, ENOSR);
2996                         return;
2997                 }
2998                 di = (struct T_discon_ind *)dimp->b_rptr;
2999                 di->DISCON_reason = err;
3000                 di->SEQ_number = BADSEQNUM;
3001 
3002                 tep->te_state = TS_IDLE;
3003                 /*
3004                  * send T_DISCON_IND message
3005                  */
3006                 putnext(tep->te_rq, dimp);
3007                 return;
3008         }
3009 
3010         ASSERT(IS_COTS(peer_tep));
3011 
3012         /*
3013          * Found the listener. At this point processing will continue on
3014          * listener serializer. Close of the endpoint should be blocked while we
3015          * switch serializers.
3016          */
3017         tl_serializer_refhold(peer_tep->te_ser);
3018         tl_serializer_refrele(tep->te_ser);
3019         tep->te_ser = peer_tep->te_ser;
3020         ASSERT(tep->te_oconp == NULL);
3021         tep->te_oconp = peer_tep;
3022 
3023         /*
3024          * It is safe to close now. Close may continue on listener serializer.
3025          */
3026         tl_closeok(tep);
3027 
3028         /*
3029          * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3030          * data, so we link mp to ackmp.
3031          */
3032         ackmp->b_cont = mp;
3033         mp = ackmp;
3034 
3035         tl_refhold(tep);
3036         tl_serializer_enter(tep, tl_conn_req_ser, mp);
3037 }
3038 
3039 /*
3040  * Finish T_CONN_REQ processing on listener serializer.
3041  */
3042 static void
3043 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3044 {
3045         queue_t         *wq;
3046         tl_endpt_t      *peer_tep = tep->te_oconp;
3047         mblk_t          *confmp, *cimp, *indmp;
3048         void            *opts = NULL;
3049         mblk_t          *ackmp = mp;
3050         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3051         struct T_conn_ind       *ci;
3052         tl_icon_t       *tip;
3053         void            *addr_startp;
3054         t_scalar_t      olen = creq->OPT_length;
3055         t_scalar_t      ooff = creq->OPT_offset;
3056         size_t          ci_msz;
3057         size_t          size;
3058         cred_t          *cr = NULL;
3059         pid_t           cpid;
3060 
3061         if (tep->te_closing) {
3062                 TL_UNCONNECT(tep->te_oconp);
3063                 tl_serializer_exit(tep);
3064                 tl_refrele(tep);
3065                 freemsg(mp);
3066                 return;
3067         }
3068 
3069         wq = tep->te_wq;
3070         tep->te_flag |= TL_EAGER;
3071 
3072         /*
3073          * Extract preallocated ackmp from mp.
3074          */
3075         mp = mp->b_cont;
3076         ackmp->b_cont = NULL;
3077 
3078         if (olen == 0)
3079                 ooff = 0;
3080 
3081         if (peer_tep->te_closing ||
3082             !((peer_tep->te_state == TS_IDLE) ||
3083             (peer_tep->te_state == TS_WRES_CIND))) {
3084                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3085                     "tl_conn_req:peer in bad state (%d)",
3086                     peer_tep->te_state));
3087                 TL_UNCONNECT(tep->te_oconp);
3088                 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3089                 freemsg(ackmp);
3090                 tl_serializer_exit(tep);
3091                 tl_refrele(tep);
3092                 return;
3093         }
3094 
3095         /*
3096          * preallocate now for T_DISCON_IND or T_CONN_IND
3097          */
3098         /*
3099          * calculate length of T_CONN_IND message
3100          */
3101         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3102                 cr = msg_getcred(mp, &cpid);
3103                 ASSERT(cr != NULL);
3104                 if (peer_tep->te_flag & TL_SETCRED) {
3105                         ooff = 0;
3106                         olen = (t_scalar_t) sizeof (struct opthdr) +
3107                             OPTLEN(sizeof (tl_credopt_t));
3108                         /* 1 option only */
3109                 } else {
3110                         ooff = 0;
3111                         olen = (t_scalar_t)sizeof (struct opthdr) +
3112                             OPTLEN(ucredminsize(cr));
3113                         /* 1 option only */
3114                 }
3115         }
3116         ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3117         ci_msz = T_ALIGN(ci_msz) + olen;
3118         size = max(ci_msz, sizeof (struct T_discon_ind));
3119 
3120         /*
3121          * Save options from mp - we'll need them for T_CONN_IND.
3122          */
3123         if (ooff != 0) {
3124                 opts = kmem_alloc(olen, KM_NOSLEEP);
3125                 if (opts == NULL) {
3126                         /*
3127                          * roll back state changes
3128                          */
3129                         tep->te_state = TS_IDLE;
3130                         tl_memrecover(wq, mp, size);
3131                         freemsg(ackmp);
3132                         TL_UNCONNECT(tep->te_oconp);
3133                         tl_serializer_exit(tep);
3134                         tl_refrele(tep);
3135                         return;
3136                 }
3137                 /* Copy options to a temp buffer */
3138                 bcopy(mp->b_rptr + ooff, opts, olen);
3139         }
3140 
3141         if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3142                 /*
3143                  * Generate a T_CONN_CON that has the identical address
3144                  * (and options) as the T_CONN_REQ.
3145                  * NOTE: assumes that the T_conn_req and T_conn_con structures
3146                  * are isomorphic.
3147                  */
3148                 confmp = copyb(mp);
3149                 if (! confmp) {
3150                         /*
3151                          * roll back state changes
3152                          */
3153                         tep->te_state = TS_IDLE;
3154                         tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3155                         freemsg(ackmp);
3156                         if (opts != NULL)
3157                                 kmem_free(opts, olen);
3158                         TL_UNCONNECT(tep->te_oconp);
3159                         tl_serializer_exit(tep);
3160                         tl_refrele(tep);
3161                         return;
3162                 }
3163                 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3164                     T_CONN_CON;
3165         } else {
3166                 confmp = NULL;
3167         }
3168         if ((indmp = reallocb(mp, size, 0)) == NULL) {
3169                 /*
3170                  * roll back state changes
3171                  */
3172                 tep->te_state = TS_IDLE;
3173                 tl_memrecover(wq, mp, size);
3174                 freemsg(ackmp);
3175                 if (opts != NULL)
3176                         kmem_free(opts, olen);
3177                 freemsg(confmp);
3178                 TL_UNCONNECT(tep->te_oconp);
3179                 tl_serializer_exit(tep);
3180                 tl_refrele(tep);
3181                 return;
3182         }
3183 
3184         tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3185         if (tip == NULL) {
3186                 /*
3187                  * roll back state changes
3188                  */
3189                 tep->te_state = TS_IDLE;
3190                 tl_memrecover(wq, indmp, sizeof (*tip));
3191                 freemsg(ackmp);
3192                 if (opts != NULL)
3193                         kmem_free(opts, olen);
3194                 freemsg(confmp);
3195                 TL_UNCONNECT(tep->te_oconp);
3196                 tl_serializer_exit(tep);
3197                 tl_refrele(tep);
3198                 return;
3199         }
3200         tip->ti_mp = NULL;
3201 
3202         /*
3203          * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3204          * and tl_icon_t cell.
3205          */
3206 
3207         /*
3208          * ack validity of request and send the peer credential in the ACK.
3209          */
3210         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3211 
3212         if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3213             confmp != NULL) {
3214                 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3215         }
3216 
3217         tl_ok_ack(wq, ackmp, T_CONN_REQ);
3218 
3219         /*
3220          * prepare message to send T_CONN_IND
3221          */
3222         /*
3223          * allocate the message - original data blocks retained
3224          * in the returned mblk
3225          */
3226         cimp = tl_resizemp(indmp, size);
3227         if (! cimp) {
3228                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3229                     "tl_conn_req:con_ind:allocb failure"));
3230                 tl_merror(wq, indmp, ENOMEM);
3231                 TL_UNCONNECT(tep->te_oconp);
3232                 tl_serializer_exit(tep);
3233                 tl_refrele(tep);
3234                 if (opts != NULL)
3235                         kmem_free(opts, olen);
3236                 freemsg(confmp);
3237                 ASSERT(tip->ti_mp == NULL);
3238                 kmem_free(tip, sizeof (*tip));
3239                 return;
3240         }
3241 
3242         DB_TYPE(cimp) = M_PROTO;
3243         ci = (struct T_conn_ind *)cimp->b_rptr;
3244         ci->PRIM_type  = T_CONN_IND;
3245         ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3246         ci->SRC_length = tep->te_alen;
3247         ci->SEQ_number = tep->te_seqno;
3248 
3249         addr_startp = cimp->b_rptr + ci->SRC_offset;
3250         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3251         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3252 
3253                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254                     ci->SRC_length);
3255                 ci->OPT_length = olen; /* because only 1 option */
3256                 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3257                     cr, cpid,
3258                     peer_tep->te_flag, peer_tep->te_credp);
3259         } else if (ooff != 0) {
3260                 /* Copy option from T_CONN_REQ */
3261                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3262                     ci->SRC_length);
3263                 ci->OPT_length = olen;
3264                 ASSERT(opts != NULL);
3265                 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3266         } else {
3267                 ci->OPT_offset = 0;
3268                 ci->OPT_length = 0;
3269         }
3270         if (opts != NULL)
3271                 kmem_free(opts, olen);
3272 
3273         /*
3274          * register connection request with server peer
3275          * append to list of incoming connections
3276          * increment references for both peer_tep and tep: peer_tep is placed on
3277          * te_oconp and tep is placed on listeners queue.
3278          */
3279         tip->ti_tep = tep;
3280         tip->ti_seqno = tep->te_seqno;
3281         list_insert_tail(&peer_tep->te_iconp, tip);
3282         peer_tep->te_nicon++;
3283 
3284         peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3285         /*
3286          * send the T_CONN_IND message
3287          */
3288         putnext(peer_tep->te_rq, cimp);
3289 
3290         /*
3291          * Send a T_CONN_CON message for sockets.
3292          * Disable the queues until we have reached the correct state!
3293          */
3294         if (confmp != NULL) {
3295                 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3296                 noenable(wq);
3297                 putnext(tep->te_rq, confmp);
3298         }
3299         /*
3300          * Now we need to increment tep reference because tep is referenced by
3301          * server list of pending connections. We also need to decrement
3302          * reference before exiting serializer. Two operations void each other
3303          * so we don't modify reference at all.
3304          */
3305         ASSERT(tep->te_refcnt >= 2);
3306         ASSERT(peer_tep->te_refcnt >= 2);
3307         tl_serializer_exit(tep);
3308 }
3309 
3310 
3311 
3312 /*
3313  * Handle T_conn_res on listener stream. Called on listener serializer.
3314  * tl_conn_req has already generated the T_CONN_CON.
3315  * tl_conn_res is called on listener serializer.
3316  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3317  * Switch eager serializer to acceptor's.
3318  *
3319  * If TL_SET[U]CRED generate the credentials options.
3320  * For sockets tl_conn_req has already generated the T_CONN_CON.
3321  */
3322 static void
3323 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3324 {
3325         queue_t                 *wq;
3326         struct T_conn_res       *cres = (struct T_conn_res *)mp->b_rptr;
3327         ssize_t                 msz = MBLKL(mp);
3328         t_scalar_t              olen, ooff, err = 0;
3329         t_scalar_t              prim = cres->PRIM_type;
3330         uchar_t                 *addr_startp;
3331         tl_endpt_t              *acc_ep = NULL, *cl_ep = NULL;
3332         tl_icon_t               *tip;
3333         size_t                  size;
3334         mblk_t                  *ackmp, *respmp;
3335         mblk_t                  *dimp, *ccmp = NULL;
3336         struct T_discon_ind     *di;
3337         struct T_conn_con       *cc;
3338         boolean_t               client_noclose_set = B_FALSE;
3339         boolean_t               switch_client_serializer = B_TRUE;
3340 
3341         ASSERT(IS_COTS(tep));
3342 
3343         if (tep->te_closing) {
3344                 freemsg(mp);
3345                 return;
3346         }
3347 
3348         wq = tep->te_wq;
3349 
3350         /*
3351          * preallocate memory for:
3352          * 1. max of T_ERROR_ACK and T_OK_ACK
3353          *      ==> known max T_ERROR_ACK
3354          * 2. max of T_DISCON_IND and T_CONN_CON
3355          */
3356         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3357         if (! ackmp) {
3358                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3359                 return;
3360         }
3361         /*
3362          * memory committed for T_OK_ACK/T_ERROR_ACK now
3363          * will be committed for T_DISCON_IND/T_CONN_CON later
3364          */
3365 
3366 
3367         ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3368 
3369         /*
3370          * validate state
3371          */
3372         if (tep->te_state != TS_WRES_CIND) {
3373                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3374                     SL_TRACE|SL_ERROR,
3375                     "tl_wput:T_CONN_RES:out of state, state=%d",
3376                     tep->te_state));
3377                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3378                 freemsg(mp);
3379                 return;
3380         }
3381 
3382         /*
3383          * validate the message
3384          * Note: dereference fields in struct inside message only
3385          * after validating the message length.
3386          */
3387         if (msz < sizeof (struct T_conn_res)) {
3388                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3389                     "tl_conn_res:invalid message length"));
3390                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3391                 freemsg(mp);
3392                 return;
3393         }
3394         olen = cres->OPT_length;
3395         ooff = cres->OPT_offset;
3396         if (((olen > 0) && ((ooff + olen) > msz))) {
3397                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398                     "tl_conn_res:invalid message"));
3399                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3400                 freemsg(mp);
3401                 return;
3402         }
3403         if (olen) {
3404                 /*
3405                  * no opts in connect res
3406                  * supported in this provider
3407                  */
3408                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3409                     "tl_conn_res:options not supported in message"));
3410                 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3411                 freemsg(mp);
3412                 return;
3413         }
3414 
3415         tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3416         ASSERT(tep->te_state == TS_WACK_CRES);
3417 
3418         if (cres->SEQ_number < TL_MINOR_START &&
3419             cres->SEQ_number >= BADSEQNUM) {
3420                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3421                     "tl_conn_res:remote endpoint sequence number bad"));
3422                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3423                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3424                 freemsg(mp);
3425                 return;
3426         }
3427 
3428         /*
3429          * find accepting endpoint. Will have extra reference if found.
3430          */
3431         if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3432             (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3433             (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3434                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3435                     "tl_conn_res:bad accepting endpoint"));
3436                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3437                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3438                 freemsg(mp);
3439                 return;
3440         }
3441 
3442         /*
3443          * Prevent acceptor from closing.
3444          */
3445         if (! tl_noclose(acc_ep)) {
3446                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3447                     "tl_conn_res:bad accepting endpoint"));
3448                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3449                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3450                 tl_refrele(acc_ep);
3451                 freemsg(mp);
3452                 return;
3453         }
3454 
3455         acc_ep->te_flag |= TL_ACCEPTOR;
3456 
3457         /*
3458          * validate that accepting endpoint, if different from listening
3459          * has address bound => state is TS_IDLE
3460          * TROUBLE in XPG4 !!?
3461          */
3462         if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3463                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3464                     "tl_conn_res:accepting endpoint has no address bound,"
3465                     "state=%d", acc_ep->te_state));
3466                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3467                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468                 freemsg(mp);
3469                 tl_closeok(acc_ep);
3470                 tl_refrele(acc_ep);
3471                 return;
3472         }
3473 
3474         /*
3475          * validate if accepting endpt same as listening, then
3476          * no other incoming connection should be on the queue
3477          */
3478 
3479         if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3480                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3481                     "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3482                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3483                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3484                 freemsg(mp);
3485                 tl_closeok(acc_ep);
3486                 tl_refrele(acc_ep);
3487                 return;
3488         }
3489 
3490         /*
3491          * Mark for deletion, the entry corresponding to client
3492          * on list of pending connections made by the listener
3493          *  search list to see if client is one of the
3494          * recorded as a listener.
3495          */
3496         tip = tl_icon_find(tep, cres->SEQ_number);
3497         if (tip == NULL) {
3498                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3499                     "tl_conn_res:no client in listener list"));
3500                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3501                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3502                 freemsg(mp);
3503                 tl_closeok(acc_ep);
3504                 tl_refrele(acc_ep);
3505                 return;
3506         }
3507 
3508         /*
3509          * If ti_tep is NULL the client has already closed. In this case
3510          * the code below will avoid any action on the client side
3511          * but complete the server and acceptor state transitions.
3512          */
3513         ASSERT(tip->ti_tep == NULL ||
3514             tip->ti_tep->te_seqno == cres->SEQ_number);
3515         cl_ep = tip->ti_tep;
3516 
3517         /*
3518          * If the client is present it is switched from listener's to acceptor's
3519          * serializer. We should block client closes while serializers are
3520          * being switched.
3521          *
3522          * It is possible that the client is present but is currently being
3523          * closed. There are two possible cases:
3524          *
3525          * 1) The client has already entered tl_close_finish_ser() and sent
3526          *    T_ORDREL_IND. In this case we can just ignore the client (but we
3527          *    still need to send all messages from tip->ti_mp to the acceptor).
3528          *
3529          * 2) The client started the close but has not entered
3530          *    tl_close_finish_ser() yet. In this case, the client is already
3531          *    proceeding asynchronously on the listener's serializer, so we're
3532          *    forced to change the acceptor to use the listener's serializer to
3533          *    ensure that any operations on the acceptor are serialized with
3534          *    respect to the close that's in-progress.
3535          */
3536         if (cl_ep != NULL) {
3537                 if (tl_noclose(cl_ep)) {
3538                         client_noclose_set = B_TRUE;
3539                 } else {
3540                         /*
3541                          * Client is closing. If it it has sent the
3542                          * T_ORDREL_IND, we can simply ignore it - otherwise,
3543                          * we have to let let the client continue until it is
3544                          * sent.
3545                          *
3546                          * If we do continue using the client, acceptor will
3547                          * switch to client's serializer which is used by client
3548                          * for its close.
3549                          */
3550                         tl_client_closing_when_accepting++;
3551                         switch_client_serializer = B_FALSE;
3552                         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3553                             cl_ep->te_state == -1)
3554                                 cl_ep = NULL;
3555                 }
3556         }
3557 
3558         if (cl_ep != NULL) {
3559                 /*
3560                  * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3561                  * (latter for sockets only)
3562                  */
3563                 if (cl_ep->te_state != TS_WCON_CREQ &&
3564                     (cl_ep->te_state != TS_DATA_XFER &&
3565                     IS_SOCKET(cl_ep))) {
3566                         err = ECONNREFUSED;
3567                         /*
3568                          * T_DISCON_IND sent later after committing memory
3569                          * and acking validity of request
3570                          */
3571                         (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3572                             "tl_conn_res:peer in bad state"));
3573                 }
3574 
3575                 /*
3576                  * preallocate now for T_DISCON_IND or T_CONN_CONN
3577                  * ack validity of request (T_OK_ACK) after memory committed
3578                  */
3579 
3580                 if (err)
3581                         size = sizeof (struct T_discon_ind);
3582                 else {
3583                         /*
3584                          * calculate length of T_CONN_CON message
3585                          */
3586                         olen = 0;
3587                         if (cl_ep->te_flag & TL_SETCRED) {
3588                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3589                                     OPTLEN(sizeof (tl_credopt_t));
3590                         } else if (cl_ep->te_flag & TL_SETUCRED) {
3591                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3592                                     OPTLEN(ucredminsize(acc_ep->te_credp));
3593                         }
3594                         size = T_ALIGN(sizeof (struct T_conn_con) +
3595                             acc_ep->te_alen) + olen;
3596                 }
3597                 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3598                         /*
3599                          * roll back state changes
3600                          */
3601                         tep->te_state = TS_WRES_CIND;
3602                         tl_memrecover(wq, mp, size);
3603                         freemsg(ackmp);
3604                         if (client_noclose_set)
3605                                 tl_closeok(cl_ep);
3606                         tl_closeok(acc_ep);
3607                         tl_refrele(acc_ep);
3608                         return;
3609                 }
3610                 mp = NULL;
3611         }
3612 
3613         /*
3614          * Now ack validity of request
3615          */
3616         if (tep->te_nicon == 1) {
3617                 if (tep == acc_ep)
3618                         tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3619                 else
3620                         tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3621         } else
3622                 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3623 
3624         /*
3625          * send T_DISCON_IND now if client state validation failed earlier
3626          */
3627         if (err) {
3628                 tl_ok_ack(wq, ackmp, prim);
3629                 /*
3630                  * flush the queues - why always ?
3631                  */
3632                 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3633 
3634                 dimp = tl_resizemp(respmp, size);
3635                 if (! dimp) {
3636                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3637                             SL_TRACE|SL_ERROR,
3638                             "tl_conn_res:con_ind:allocb failure"));
3639                         tl_merror(wq, respmp, ENOMEM);
3640                         tl_closeok(acc_ep);
3641                         if (client_noclose_set)
3642                                 tl_closeok(cl_ep);
3643                         tl_refrele(acc_ep);
3644                         return;
3645                 }
3646                 if (dimp->b_cont) {
3647                         /* no user data in provider generated discon ind */
3648                         freemsg(dimp->b_cont);
3649                         dimp->b_cont = NULL;
3650                 }
3651 
3652                 DB_TYPE(dimp) = M_PROTO;
3653                 di = (struct T_discon_ind *)dimp->b_rptr;
3654                 di->PRIM_type  = T_DISCON_IND;
3655                 di->DISCON_reason = err;
3656                 di->SEQ_number = BADSEQNUM;
3657 
3658                 tep->te_state = TS_IDLE;
3659                 /*
3660                  * send T_DISCON_IND message
3661                  */
3662                 putnext(acc_ep->te_rq, dimp);
3663                 if (client_noclose_set)
3664                         tl_closeok(cl_ep);
3665                 tl_closeok(acc_ep);
3666                 tl_refrele(acc_ep);
3667                 return;
3668         }
3669 
3670         /*
3671          * now start connecting the accepting endpoint
3672          */
3673         if (tep != acc_ep)
3674                 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3675 
3676         if (cl_ep == NULL) {
3677                 /*
3678                  * The client has already closed. Send up any queued messages
3679                  * and change the state accordingly.
3680                  */
3681                 tl_ok_ack(wq, ackmp, prim);
3682                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3683 
3684                 /*
3685                  * remove endpoint from incoming connection
3686                  * delete client from list of incoming connections
3687                  */
3688                 tl_freetip(tep, tip);
3689                 freemsg(mp);
3690                 tl_closeok(acc_ep);
3691                 tl_refrele(acc_ep);
3692                 return;
3693         } else if (tip->ti_mp != NULL) {
3694                 /*
3695                  * The client could have queued a T_DISCON_IND which needs
3696                  * to be sent up.
3697                  * Note that t_discon_req can not operate the same as
3698                  * t_data_req since it is not possible for it to putbq
3699                  * the message and return -1 due to the use of qwriter.
3700                  */
3701                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3702         }
3703 
3704         /*
3705          * prepare connect confirm T_CONN_CON message
3706          */
3707 
3708         /*
3709          * allocate the message - original data blocks
3710          * retained in the returned mblk
3711          */
3712         if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3713                 ccmp = tl_resizemp(respmp, size);
3714                 if (ccmp == NULL) {
3715                         tl_ok_ack(wq, ackmp, prim);
3716                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3717                             SL_TRACE|SL_ERROR,
3718                             "tl_conn_res:conn_con:allocb failure"));
3719                         tl_merror(wq, respmp, ENOMEM);
3720                         tl_closeok(acc_ep);
3721                         if (client_noclose_set)
3722                                 tl_closeok(cl_ep);
3723                         tl_refrele(acc_ep);
3724                         return;
3725                 }
3726 
3727                 DB_TYPE(ccmp) = M_PROTO;
3728                 cc = (struct T_conn_con *)ccmp->b_rptr;
3729                 cc->PRIM_type  = T_CONN_CON;
3730                 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3731                 cc->RES_length = acc_ep->te_alen;
3732                 addr_startp = ccmp->b_rptr + cc->RES_offset;
3733                 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3734                 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3735                         cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3736                             cc->RES_length);
3737                         cc->OPT_length = olen;
3738                         tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3739                             acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3740                             cl_ep->te_credp);
3741                 } else {
3742                         cc->OPT_offset = 0;
3743                         cc->OPT_length = 0;
3744                 }
3745                 /*
3746                  * Forward the credential in the packet so it can be picked up
3747                  * at the higher layers for more complete credential processing
3748                  */
3749                 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3750         } else {
3751                 freemsg(respmp);
3752                 respmp = NULL;
3753         }
3754 
3755         /*
3756          * make connection linking
3757          * accepting and client endpoints
3758          * No need to increment references:
3759          *      on client: it should already have one from tip->ti_tep linkage.
3760          *      on acceptor is should already have one from the table lookup.
3761          *
3762          * At this point both client and acceptor can't close. Set client
3763          * serializer to acceptor's.
3764          */
3765         ASSERT(cl_ep->te_refcnt >= 2);
3766         ASSERT(acc_ep->te_refcnt >= 2);
3767         ASSERT(cl_ep->te_conp == NULL);
3768         ASSERT(acc_ep->te_conp == NULL);
3769         cl_ep->te_conp = acc_ep;
3770         acc_ep->te_conp = cl_ep;
3771         ASSERT(cl_ep->te_ser == tep->te_ser);
3772         if (switch_client_serializer) {
3773                 mutex_enter(&cl_ep->te_ser_lock);
3774                 if (cl_ep->te_ser_count > 0) {
3775                         switch_client_serializer = B_FALSE;
3776                         tl_serializer_noswitch++;
3777                 } else {
3778                         /*
3779                          * Move client to the acceptor's serializer.
3780                          */
3781                         tl_serializer_refhold(acc_ep->te_ser);
3782                         tl_serializer_refrele(cl_ep->te_ser);
3783                         cl_ep->te_ser = acc_ep->te_ser;
3784                 }
3785                 mutex_exit(&cl_ep->te_ser_lock);
3786         }
3787         if (!switch_client_serializer) {
3788                 /*
3789                  * It is not possible to switch client to use acceptor's.
3790                  * Move acceptor to client's serializer (which is the same as
3791                  * listener's).
3792                  */
3793                 tl_serializer_refhold(cl_ep->te_ser);
3794                 tl_serializer_refrele(acc_ep->te_ser);
3795                 acc_ep->te_ser = cl_ep->te_ser;
3796         }
3797 
3798         TL_REMOVE_PEER(cl_ep->te_oconp);
3799         TL_REMOVE_PEER(acc_ep->te_oconp);
3800 
3801         /*
3802          * remove endpoint from incoming connection
3803          * delete client from list of incoming connections
3804          */
3805         tip->ti_tep = NULL;
3806         tl_freetip(tep, tip);
3807         tl_ok_ack(wq, ackmp, prim);
3808 
3809         /*
3810          * data blocks already linked in reallocb()
3811          */
3812 
3813         /*
3814          * link queues so that I_SENDFD will work
3815          */
3816         if (! IS_SOCKET(tep)) {
3817                 acc_ep->te_wq->q_next = cl_ep->te_rq;
3818                 cl_ep->te_wq->q_next = acc_ep->te_rq;
3819         }
3820 
3821         /*
3822          * send T_CONN_CON up on client side unless it was already
3823          * done (for a socket). In cases any data or ordrel req has been
3824          * queued make sure that the service procedure runs.
3825          */
3826         if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3827                 enableok(cl_ep->te_wq);
3828                 TL_QENABLE(cl_ep);
3829                 if (ccmp != NULL)
3830                         freemsg(ccmp);
3831         } else {
3832                 /*
3833                  * change client state on TE_CONN_CON event
3834                  */
3835                 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3836                 putnext(cl_ep->te_rq, ccmp);
3837         }
3838 
3839         /* Mark the both endpoints as accepted */
3840         cl_ep->te_flag |= TL_ACCEPTED;
3841         acc_ep->te_flag |= TL_ACCEPTED;
3842 
3843         /*
3844          * Allow client and acceptor to close.
3845          */
3846         tl_closeok(acc_ep);
3847         if (client_noclose_set)
3848                 tl_closeok(cl_ep);
3849 }
3850 
3851 
3852 
3853 
3854 static void
3855 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3856 {
3857         queue_t                 *wq;
3858         struct T_discon_req     *dr;
3859         ssize_t                 msz;
3860         tl_endpt_t              *peer_tep = tep->te_conp;
3861         tl_endpt_t              *srv_tep = tep->te_oconp;
3862         tl_icon_t               *tip;
3863         size_t                  size;
3864         mblk_t                  *ackmp, *dimp, *respmp;
3865         struct T_discon_ind     *di;
3866         t_scalar_t              save_state, new_state;
3867 
3868         if (tep->te_closing) {
3869                 freemsg(mp);
3870                 return;
3871         }
3872 
3873         if ((peer_tep != NULL) && peer_tep->te_closing) {
3874                 TL_UNCONNECT(tep->te_conp);
3875                 peer_tep = NULL;
3876         }
3877         if ((srv_tep != NULL) && srv_tep->te_closing) {
3878                 TL_UNCONNECT(tep->te_oconp);
3879                 srv_tep = NULL;
3880         }
3881 
3882         wq = tep->te_wq;
3883 
3884         /*
3885          * preallocate memory for:
3886          * 1. max of T_ERROR_ACK and T_OK_ACK
3887          *      ==> known max T_ERROR_ACK
3888          * 2. for  T_DISCON_IND
3889          */
3890         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3891         if (! ackmp) {
3892                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3893                 return;
3894         }
3895         /*
3896          * memory committed for T_OK_ACK/T_ERROR_ACK now
3897          * will be committed for T_DISCON_IND  later
3898          */
3899 
3900         dr = (struct T_discon_req *)mp->b_rptr;
3901         msz = MBLKL(mp);
3902 
3903         /*
3904          * validate the state
3905          */
3906         save_state = new_state = tep->te_state;
3907         if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3908             ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3909                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3910                     SL_TRACE|SL_ERROR,
3911                     "tl_wput:T_DISCON_REQ:out of state, state=%d",
3912                     tep->te_state));
3913                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3914                 freemsg(mp);
3915                 return;
3916         }
3917         /*
3918          * Defer committing the state change until it is determined if
3919          * the message will be queued with the tl_icon or not.
3920          */
3921         new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3922 
3923         /* validate the message */
3924         if (msz < sizeof (struct T_discon_req)) {
3925                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3926                     "tl_discon_req:invalid message"));
3927                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3928                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3929                 freemsg(mp);
3930                 return;
3931         }
3932 
3933         /*
3934          * if server, then validate that client exists
3935          * by connection sequence number etc.
3936          */
3937         if (tep->te_nicon > 0) { /* server */
3938 
3939                 /*
3940                  * search server list for disconnect client
3941                  */
3942                 tip = tl_icon_find(tep, dr->SEQ_number);
3943                 if (tip == NULL) {
3944                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
3945                             SL_TRACE|SL_ERROR,
3946                             "tl_discon_req:no disconnect endpoint"));
3947                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3948                         tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3949                         freemsg(mp);
3950                         return;
3951                 }
3952                 /*
3953                  * If ti_tep is NULL the client has already closed. In this case
3954                  * the code below will avoid any action on the client side.
3955                  */
3956 
3957                 IMPLY(tip->ti_tep != NULL,
3958                     tip->ti_tep->te_seqno == dr->SEQ_number);
3959                 peer_tep = tip->ti_tep;
3960         }
3961 
3962         /*
3963          * preallocate now for T_DISCON_IND
3964          * ack validity of request (T_OK_ACK) after memory committed
3965          */
3966         size = sizeof (struct T_discon_ind);
3967         if ((respmp = reallocb(mp, size, 0)) == NULL) {
3968                 tl_memrecover(wq, mp, size);
3969                 freemsg(ackmp);
3970                 return;
3971         }
3972 
3973         /*
3974          * prepare message to ack validity of request
3975          */
3976         if (tep->te_nicon == 0)
3977                 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3978         else
3979                 if (tep->te_nicon == 1)
3980                         new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3981                 else
3982                         new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3983 
3984         /*
3985          * Flushing queues according to TPI. Using the old state.
3986          */
3987         if ((tep->te_nicon <= 1) &&
3988             ((save_state == TS_DATA_XFER) ||
3989             (save_state == TS_WIND_ORDREL) ||
3990             (save_state == TS_WREQ_ORDREL)))
3991                 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3992 
3993         /* send T_OK_ACK up  */
3994         tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3995 
3996         /*
3997          * now do disconnect business
3998          */
3999         if (tep->te_nicon > 0) { /* listener */
4000                 if (peer_tep != NULL && !peer_tep->te_closing) {
4001                         /*
4002                          * disconnect incoming connect request pending to tep
4003                          */
4004                         if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4005                                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4006                                     SL_TRACE|SL_ERROR,
4007                                     "tl_discon_req: reallocb failed"));
4008                                 tep->te_state = new_state;
4009                                 tl_merror(wq, respmp, ENOMEM);
4010                                 return;
4011                         }
4012                         di = (struct T_discon_ind *)dimp->b_rptr;
4013                         di->SEQ_number = BADSEQNUM;
4014                         save_state = peer_tep->te_state;
4015                         peer_tep->te_state = TS_IDLE;
4016 
4017                         TL_REMOVE_PEER(peer_tep->te_oconp);
4018                         enableok(peer_tep->te_wq);
4019                         TL_QENABLE(peer_tep);
4020                 } else {
4021                         freemsg(respmp);
4022                         dimp = NULL;
4023                 }
4024 
4025                 /*
4026                  * remove endpoint from incoming connection list
4027                  * - remove disconnect client from list on server
4028                  */
4029                 tl_freetip(tep, tip);
4030         } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4031                 /*
4032                  * disconnect an outgoing request pending from tep
4033                  */
4034 
4035                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4036                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4037                             SL_TRACE|SL_ERROR,
4038                             "tl_discon_req: reallocb failed"));
4039                         tep->te_state = new_state;
4040                         tl_merror(wq, respmp, ENOMEM);
4041                         return;
4042                 }
4043                 di = (struct T_discon_ind *)dimp->b_rptr;
4044                 DB_TYPE(dimp) = M_PROTO;
4045                 di->PRIM_type  = T_DISCON_IND;
4046                 di->DISCON_reason = ECONNRESET;
4047                 di->SEQ_number = tep->te_seqno;
4048 
4049                 /*
4050                  * If this is a socket the T_DISCON_IND is queued with
4051                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4052                  * from the list of pending connections.
4053                  * Note that when te_oconp is set the peer better have
4054                  * a t_connind_t for the client.
4055                  */
4056                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4057                         /*
4058                          * No need to check that
4059                          * ti_tep == NULL since the T_DISCON_IND
4060                          * takes precedence over other queued
4061                          * messages.
4062                          */
4063                         tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4064                         peer_tep = NULL;
4065                         dimp = NULL;
4066                         /*
4067                          * Can't clear te_oconp since tl_co_unconnect needs
4068                          * it as a hint not to free the tep.
4069                          * Keep the state unchanged since tl_conn_res inspects
4070                          * it.
4071                          */
4072                         new_state = tep->te_state;
4073                 } else {
4074                         /* Found - delete it */
4075                         tip = tl_icon_find(peer_tep, tep->te_seqno);
4076                         if (tip != NULL) {
4077                                 ASSERT(tep == tip->ti_tep);
4078                                 save_state = peer_tep->te_state;
4079                                 if (peer_tep->te_nicon == 1)
4080                                         peer_tep->te_state =
4081                                             NEXTSTATE(TE_DISCON_IND2,
4082                                             peer_tep->te_state);
4083                                 else
4084                                         peer_tep->te_state =
4085                                             NEXTSTATE(TE_DISCON_IND3,
4086                                             peer_tep->te_state);
4087                                 tl_freetip(peer_tep, tip);
4088                         }
4089                         ASSERT(tep->te_oconp != NULL);
4090                         TL_UNCONNECT(tep->te_oconp);
4091                 }
4092         } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4093                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4094                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4095                             SL_TRACE|SL_ERROR,
4096                             "tl_discon_req: reallocb failed"));
4097                         tep->te_state = new_state;
4098                         tl_merror(wq, respmp, ENOMEM);
4099                         return;
4100                 }
4101                 di = (struct T_discon_ind *)dimp->b_rptr;
4102                 di->SEQ_number = BADSEQNUM;
4103 
4104                 save_state = peer_tep->te_state;
4105                 peer_tep->te_state = TS_IDLE;
4106         } else {
4107                 /* Not connected */
4108                 tep->te_state = new_state;
4109                 freemsg(respmp);
4110                 return;
4111         }
4112 
4113         /* Commit state changes */
4114         tep->te_state = new_state;
4115 
4116         if (peer_tep == NULL) {
4117                 ASSERT(dimp == NULL);
4118                 goto done;
4119         }
4120         /*
4121          * Flush queues on peer before sending up
4122          * T_DISCON_IND according to TPI
4123          */
4124 
4125         if ((save_state == TS_DATA_XFER) ||
4126             (save_state == TS_WIND_ORDREL) ||
4127             (save_state == TS_WREQ_ORDREL))
4128                 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4129 
4130         DB_TYPE(dimp) = M_PROTO;
4131         di->PRIM_type  = T_DISCON_IND;
4132         di->DISCON_reason = ECONNRESET;
4133 
4134         /*
4135          * data blocks already linked into dimp by reallocb()
4136          */
4137         /*
4138          * send indication message to peer user module
4139          */
4140         ASSERT(dimp != NULL);
4141         putnext(peer_tep->te_rq, dimp);
4142 done:
4143         if (tep->te_conp) {  /* disconnect pointers if connected */
4144                 ASSERT(! peer_tep->te_closing);
4145 
4146                 /*
4147                  * Messages may be queued on peer's write queue
4148                  * waiting to be processed by its write service
4149                  * procedure. Before the pointer to the peer transport
4150                  * structure is set to NULL, qenable the peer's write
4151                  * queue so that the queued up messages are processed.
4152                  */
4153                 if ((save_state == TS_DATA_XFER) ||
4154                     (save_state == TS_WIND_ORDREL) ||
4155                     (save_state == TS_WREQ_ORDREL))
4156                         TL_QENABLE(peer_tep);
4157                 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4158                 TL_UNCONNECT(peer_tep->te_conp);
4159                 if (! IS_SOCKET(tep)) {
4160                         /*
4161                          * unlink the streams
4162                          */
4163                         tep->te_wq->q_next = NULL;
4164                         peer_tep->te_wq->q_next = NULL;
4165                 }
4166                 TL_UNCONNECT(tep->te_conp);
4167         }
4168 }
4169 
4170 static void
4171 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4172 {
4173         if (!tep->te_closing)
4174                 tl_addr_req(mp, tep);
4175         else
4176                 freemsg(mp);
4177 
4178         tl_serializer_exit(tep);
4179         tl_refrele(tep);
4180 }
4181 
4182 static void
4183 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4184 {
4185         queue_t                 *wq;
4186         size_t                  ack_sz;
4187         mblk_t                  *ackmp;
4188         struct T_addr_ack       *taa;
4189 
4190         if (tep->te_closing) {
4191                 freemsg(mp);
4192                 return;
4193         }
4194 
4195         wq = tep->te_wq;
4196 
4197         /*
4198          * Note: T_ADDR_REQ message has only PRIM_type field
4199          * so it is already validated earlier.
4200          */
4201 
4202         if (IS_CLTS(tep) ||
4203             (tep->te_state > TS_WREQ_ORDREL) ||
4204             (tep->te_state < TS_DATA_XFER)) {
4205                 /*
4206                  * Either connectionless or connection oriented but not
4207                  * in connected data transfer state or half-closed states.
4208                  */
4209                 ack_sz = sizeof (struct T_addr_ack);
4210                 if (tep->te_state >= TS_IDLE)
4211                         /* is bound */
4212                         ack_sz += tep->te_alen;
4213                 ackmp = reallocb(mp, ack_sz, 0);
4214                 if (ackmp == NULL) {
4215                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4216                             SL_TRACE|SL_ERROR,
4217                             "tl_addr_req: reallocb failed"));
4218                         tl_memrecover(wq, mp, ack_sz);
4219                         return;
4220                 }
4221 
4222                 taa = (struct T_addr_ack *)ackmp->b_rptr;
4223 
4224                 bzero(taa, sizeof (struct T_addr_ack));
4225 
4226                 taa->PRIM_type = T_ADDR_ACK;
4227                 ackmp->b_datap->db_type = M_PCPROTO;
4228                 ackmp->b_wptr = (uchar_t *)&taa[1];
4229 
4230                 if (tep->te_state >= TS_IDLE) {
4231                         /* endpoint is bound */
4232                         taa->LOCADDR_length = tep->te_alen;
4233                         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4234 
4235                         bcopy(tep->te_abuf, ackmp->b_wptr,
4236                             tep->te_alen);
4237                         ackmp->b_wptr += tep->te_alen;
4238                         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4239                 }
4240 
4241                 (void) qreply(wq, ackmp);
4242         } else {
4243                 ASSERT(tep->te_state == TS_DATA_XFER ||
4244                     tep->te_state == TS_WIND_ORDREL ||
4245                     tep->te_state == TS_WREQ_ORDREL);
4246                 /* connection oriented in data transfer */
4247                 tl_connected_cots_addr_req(mp, tep);
4248         }
4249 }
4250 
4251 
4252 static void
4253 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4254 {
4255         tl_endpt_t              *peer_tep;
4256         size_t                  ack_sz;
4257         mblk_t                  *ackmp;
4258         struct T_addr_ack       *taa;
4259         uchar_t                 *addr_startp;
4260 
4261         if (tep->te_closing) {
4262                 freemsg(mp);
4263                 return;
4264         }
4265 
4266         ASSERT(tep->te_state >= TS_IDLE);
4267 
4268         ack_sz = sizeof (struct T_addr_ack);
4269         ack_sz += T_ALIGN(tep->te_alen);
4270         peer_tep = tep->te_conp;
4271         ack_sz += peer_tep->te_alen;
4272 
4273         ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4274         if (ackmp == NULL) {
4275                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4276                     "tl_connected_cots_addr_req: reallocb failed"));
4277                 tl_memrecover(tep->te_wq, mp, ack_sz);
4278                 return;
4279         }
4280 
4281         taa = (struct T_addr_ack *)ackmp->b_rptr;
4282 
4283         /* endpoint is bound */
4284         taa->LOCADDR_length = tep->te_alen;
4285         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4286 
4287         addr_startp = (uchar_t *)&taa[1];
4288 
4289         bcopy(tep->te_abuf, addr_startp,
4290             tep->te_alen);
4291 
4292         taa->REMADDR_length = peer_tep->te_alen;
4293         taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4294             taa->LOCADDR_length);
4295         addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4296         bcopy(peer_tep->te_abuf, addr_startp,
4297             peer_tep->te_alen);
4298         ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4299             taa->REMADDR_offset + peer_tep->te_alen;
4300         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4301 
4302         putnext(tep->te_rq, ackmp);
4303 }
4304 
4305 static void
4306 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4307 {
4308         if (IS_CLTS(tep)) {
4309                 *ia = tl_clts_info_ack;
4310                 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4311         } else {
4312                 *ia = tl_cots_info_ack;
4313                 if (IS_COTSORD(tep))
4314                         ia->SERV_type = T_COTS_ORD;
4315         }
4316         ia->TIDU_size = tl_tidusz;
4317         ia->CURRENT_state = tep->te_state;
4318 }
4319 
4320 /*
4321  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4322  * tl_wput.
4323  */
4324 static void
4325 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4326 {
4327         mblk_t                  *ackmp;
4328         t_uscalar_t             cap_bits1;
4329         struct T_capability_ack *tcap;
4330 
4331         if (tep->te_closing) {
4332                 freemsg(mp);
4333                 return;
4334         }
4335 
4336         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4337 
4338         ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4339             M_PCPROTO, T_CAPABILITY_ACK);
4340         if (ackmp == NULL) {
4341                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4342                     "tl_capability_req: reallocb failed"));
4343                 tl_memrecover(tep->te_wq, mp,
4344                     sizeof (struct T_capability_ack));
4345                 return;
4346         }
4347 
4348         tcap = (struct T_capability_ack *)ackmp->b_rptr;
4349         tcap->CAP_bits1 = 0;
4350 
4351         if (cap_bits1 & TC1_INFO) {
4352                 tl_copy_info(&tcap->INFO_ack, tep);
4353                 tcap->CAP_bits1 |= TC1_INFO;
4354         }
4355 
4356         if (cap_bits1 & TC1_ACCEPTOR_ID) {
4357                 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4358                 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4359         }
4360 
4361         putnext(tep->te_rq, ackmp);
4362 }
4363 
4364 static void
4365 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4366 {
4367         if (! tep->te_closing)
4368                 tl_info_req(mp, tep);
4369         else
4370                 freemsg(mp);
4371 
4372         tl_serializer_exit(tep);
4373         tl_refrele(tep);
4374 }
4375 
4376 static void
4377 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4378 {
4379         mblk_t *ackmp;
4380 
4381         ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4382             M_PCPROTO, T_INFO_ACK);
4383         if (ackmp == NULL) {
4384                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4385                     "tl_info_req: reallocb failed"));
4386                 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4387                 return;
4388         }
4389 
4390         /*
4391          * fill in T_INFO_ACK contents
4392          */
4393         tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4394 
4395         /*
4396          * send ack message
4397          */
4398         putnext(tep->te_rq, ackmp);
4399 }
4400 
4401 /*
4402  * Handle M_DATA, T_data_req and T_optdata_req.
4403  * If this is a socket pass through T_optdata_req options unmodified.
4404  */
4405 static void
4406 tl_data(mblk_t *mp, tl_endpt_t *tep)
4407 {
4408         queue_t                 *wq = tep->te_wq;
4409         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4410         ssize_t                 msz = MBLKL(mp);
4411         tl_endpt_t              *peer_tep;
4412         queue_t                 *peer_rq;
4413         boolean_t               closing = tep->te_closing;
4414 
4415         if (IS_CLTS(tep)) {
4416                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4417                     SL_TRACE|SL_ERROR,
4418                     "tl_wput:clts:unattached M_DATA"));
4419                 if (!closing) {
4420                         tl_merror(wq, mp, EPROTO);
4421                 } else {
4422                         freemsg(mp);
4423                 }
4424                 return;
4425         }
4426 
4427         /*
4428          * If the endpoint is closing it should still forward any data to the
4429          * peer (if it has one). If it is not allowed to forward it can just
4430          * free the message.
4431          */
4432         if (closing &&
4433             (tep->te_state != TS_DATA_XFER) &&
4434             (tep->te_state != TS_WREQ_ORDREL)) {
4435                 freemsg(mp);
4436                 return;
4437         }
4438 
4439         if (DB_TYPE(mp) == M_PROTO) {
4440                 if (prim->type == T_DATA_REQ &&
4441                     msz < sizeof (struct T_data_req)) {
4442                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4443                                 SL_TRACE|SL_ERROR,
4444                                 "tl_data:T_DATA_REQ:invalid message"));
4445                         if (!closing) {
4446                                 tl_merror(wq, mp, EPROTO);
4447                         } else {
4448                                 freemsg(mp);
4449                         }
4450                         return;
4451                 } else if (prim->type == T_OPTDATA_REQ &&
4452                     (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4453                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4454                             SL_TRACE|SL_ERROR,
4455                             "tl_data:T_OPTDATA_REQ:invalid message"));
4456                         if (!closing) {
4457                                 tl_merror(wq, mp, EPROTO);
4458                         } else {
4459                                 freemsg(mp);
4460                         }
4461                         return;
4462                 }
4463         }
4464 
4465         /*
4466          * connection oriented provider
4467          */
4468         switch (tep->te_state) {
4469         case TS_IDLE:
4470                 /*
4471                  * Other end not here - do nothing.
4472                  */
4473                 freemsg(mp);
4474                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4475                     "tl_data:cots with endpoint idle"));
4476                 return;
4477 
4478         case TS_DATA_XFER:
4479                 /* valid states */
4480                 if (tep->te_conp != NULL)
4481                         break;
4482 
4483                 if (tep->te_oconp == NULL) {
4484                         if (!closing) {
4485                                 tl_merror(wq, mp, EPROTO);
4486                         } else {
4487                                 freemsg(mp);
4488                         }
4489                         return;
4490                 }
4491                 /*
4492                  * For a socket the T_CONN_CON is sent early thus
4493                  * the peer might not yet have accepted the connection.
4494                  * If we are closing queue the packet with the T_CONN_IND.
4495                  * Otherwise defer processing the packet until the peer
4496                  * accepts the connection.
4497                  * Note that the queue is noenabled when we go into this
4498                  * state.
4499                  */
4500                 if (!closing) {
4501                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4502                             SL_TRACE|SL_ERROR,
4503                             "tl_data: ocon"));
4504                         TL_PUTBQ(tep, mp);
4505                         return;
4506                 }
4507                 if (DB_TYPE(mp) == M_PROTO) {
4508                         if (msz < sizeof (t_scalar_t)) {
4509                                 freemsg(mp);
4510                                 return;
4511                         }
4512                         /* reuse message block - just change REQ to IND */
4513                         if (prim->type == T_DATA_REQ)
4514                                 prim->type = T_DATA_IND;
4515                         else
4516                                 prim->type = T_OPTDATA_IND;
4517                 }
4518                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4519                 return;
4520 
4521         case TS_WREQ_ORDREL:
4522                 if (tep->te_conp == NULL) {
4523                         /*
4524                          * Other end closed - generate discon_ind
4525                          * with reason 0 to cause an EPIPE but no
4526                          * read side error on AF_UNIX sockets.
4527                          */
4528                         freemsg(mp);
4529                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4530                             SL_TRACE|SL_ERROR,
4531                             "tl_data: WREQ_ORDREL and no peer"));
4532                         tl_discon_ind(tep, 0);
4533                         return;
4534                 }
4535                 break;
4536 
4537         default:
4538                 /* invalid state for event TE_DATA_REQ */
4539                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4540                     "tl_data:cots:out of state"));
4541                 tl_merror(wq, mp, EPROTO);
4542                 return;
4543         }
4544         /*
4545          * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4546          * (State stays same on this event)
4547          */
4548 
4549         /*
4550          * get connected endpoint
4551          */
4552         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4553                 freemsg(mp);
4554                 /* Peer closed */
4555                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4556                     "tl_data: peer gone"));
4557                 return;
4558         }
4559 
4560         ASSERT(tep->te_serializer == peer_tep->te_serializer);
4561         peer_rq = peer_tep->te_rq;
4562 
4563         /*
4564          * Put it back if flow controlled
4565          * Note: Messages already on queue when we are closing is bounded
4566          * so we can ignore flow control.
4567          */
4568         if (!canputnext(peer_rq) && !closing) {
4569                 TL_PUTBQ(tep, mp);
4570                 return;
4571         }
4572 
4573         /*
4574          * validate peer state
4575          */
4576         switch (peer_tep->te_state) {
4577         case TS_DATA_XFER:
4578         case TS_WIND_ORDREL:
4579                 /* valid states */
4580                 break;
4581         default:
4582                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4583                     "tl_data:rx side:invalid state"));
4584                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4585                 return;
4586         }
4587         if (DB_TYPE(mp) == M_PROTO) {
4588                 /* reuse message block - just change REQ to IND */
4589                 if (prim->type == T_DATA_REQ)
4590                         prim->type = T_DATA_IND;
4591                 else
4592                         prim->type = T_OPTDATA_IND;
4593         }
4594         /*
4595          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4596          * (peer state stays same on this event)
4597          */
4598         /*
4599          * send data to connected peer
4600          */
4601         putnext(peer_rq, mp);
4602 }
4603 
4604 
4605 
4606 static void
4607 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4608 {
4609         queue_t                 *wq = tep->te_wq;
4610         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4611         ssize_t                 msz = MBLKL(mp);
4612         tl_endpt_t              *peer_tep;
4613         queue_t                 *peer_rq;
4614         boolean_t               closing = tep->te_closing;
4615 
4616         if (msz < sizeof (struct T_exdata_req)) {
4617                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4618                     "tl_exdata:invalid message"));
4619                 if (!closing) {
4620                         tl_merror(wq, mp, EPROTO);
4621                 } else {
4622                         freemsg(mp);
4623                 }
4624                 return;
4625         }
4626 
4627         /*
4628          * If the endpoint is closing it should still forward any data to the
4629          * peer (if it has one). If it is not allowed to forward it can just
4630          * free the message.
4631          */
4632         if (closing &&
4633             (tep->te_state != TS_DATA_XFER) &&
4634             (tep->te_state != TS_WREQ_ORDREL)) {
4635                 freemsg(mp);
4636                 return;
4637         }
4638 
4639         /*
4640          * validate state
4641          */
4642         switch (tep->te_state) {
4643         case TS_IDLE:
4644                 /*
4645                  * Other end not here - do nothing.
4646                  */
4647                 freemsg(mp);
4648                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4649                     "tl_exdata:cots with endpoint idle"));
4650                 return;
4651 
4652         case TS_DATA_XFER:
4653                 /* valid states */
4654                 if (tep->te_conp != NULL)
4655                         break;
4656 
4657                 if (tep->te_oconp == NULL) {
4658                         if (!closing) {
4659                                 tl_merror(wq, mp, EPROTO);
4660                         } else {
4661                                 freemsg(mp);
4662                         }
4663                         return;
4664                 }
4665                 /*
4666                  * For a socket the T_CONN_CON is sent early thus
4667                  * the peer might not yet have accepted the connection.
4668                  * If we are closing queue the packet with the T_CONN_IND.
4669                  * Otherwise defer processing the packet until the peer
4670                  * accepts the connection.
4671                  * Note that the queue is noenabled when we go into this
4672                  * state.
4673                  */
4674                 if (!closing) {
4675                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4676                             SL_TRACE|SL_ERROR,
4677                             "tl_exdata: ocon"));
4678                         TL_PUTBQ(tep, mp);
4679                         return;
4680                 }
4681                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4682                     "tl_exdata: closing socket ocon"));
4683                 prim->type = T_EXDATA_IND;
4684                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4685                 return;
4686 
4687         case TS_WREQ_ORDREL:
4688                 if (tep->te_conp == NULL) {
4689                         /*
4690                          * Other end closed - generate discon_ind
4691                          * with reason 0 to cause an EPIPE but no
4692                          * read side error on AF_UNIX sockets.
4693                          */
4694                         freemsg(mp);
4695                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4696                             SL_TRACE|SL_ERROR,
4697                             "tl_exdata: WREQ_ORDREL and no peer"));
4698                         tl_discon_ind(tep, 0);
4699                         return;
4700                 }
4701                 break;
4702 
4703         default:
4704                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4705                     SL_TRACE|SL_ERROR,
4706                     "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4707                     tep->te_state));
4708                 tl_merror(wq, mp, EPROTO);
4709                 return;
4710         }
4711         /*
4712          * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4713          * (state stays same on this event)
4714          */
4715 
4716         /*
4717          * get connected endpoint
4718          */
4719         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4720                 freemsg(mp);
4721                 /* Peer closed */
4722                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4723                     "tl_exdata: peer gone"));
4724                 return;
4725         }
4726 
4727         peer_rq = peer_tep->te_rq;
4728 
4729         /*
4730          * Put it back if flow controlled
4731          * Note: Messages already on queue when we are closing is bounded
4732          * so we can ignore flow control.
4733          */
4734         if (!canputnext(peer_rq) && !closing) {
4735                 TL_PUTBQ(tep, mp);
4736                 return;
4737         }
4738 
4739         /*
4740          * validate state on peer
4741          */
4742         switch (peer_tep->te_state) {
4743         case TS_DATA_XFER:
4744         case TS_WIND_ORDREL:
4745                 /* valid states */
4746                 break;
4747         default:
4748                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4749                     "tl_exdata:rx side:invalid state"));
4750                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4751                 return;
4752         }
4753         /*
4754          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4755          * (peer state stays same on this event)
4756          */
4757         /*
4758          * reuse message block
4759          */
4760         prim->type = T_EXDATA_IND;
4761 
4762         /*
4763          * send data to connected peer
4764          */
4765         putnext(peer_rq, mp);
4766 }
4767 
4768 
4769 
4770 static void
4771 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4772 {
4773         queue_t                 *wq =  tep->te_wq;
4774         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4775         ssize_t                 msz = MBLKL(mp);
4776         tl_endpt_t              *peer_tep;
4777         queue_t                 *peer_rq;
4778         boolean_t               closing = tep->te_closing;
4779 
4780         if (msz < sizeof (struct T_ordrel_req)) {
4781                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4782                     "tl_ordrel:invalid message"));
4783                 if (!closing) {
4784                         tl_merror(wq, mp, EPROTO);
4785                 } else {
4786                         freemsg(mp);
4787                 }
4788                 return;
4789         }
4790 
4791         /*
4792          * validate state
4793          */
4794         switch (tep->te_state) {
4795         case TS_DATA_XFER:
4796         case TS_WREQ_ORDREL:
4797                 /* valid states */
4798                 if (tep->te_conp != NULL)
4799                         break;
4800 
4801                 if (tep->te_oconp == NULL)
4802                         break;
4803 
4804                 /*
4805                  * For a socket the T_CONN_CON is sent early thus
4806                  * the peer might not yet have accepted the connection.
4807                  * If we are closing queue the packet with the T_CONN_IND.
4808                  * Otherwise defer processing the packet until the peer
4809                  * accepts the connection.
4810                  * Note that the queue is noenabled when we go into this
4811                  * state.
4812                  */
4813                 if (!closing) {
4814                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4815                             SL_TRACE|SL_ERROR,
4816                             "tl_ordlrel: ocon"));
4817                         TL_PUTBQ(tep, mp);
4818                         return;
4819                 }
4820                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4821                     "tl_ordlrel: closing socket ocon"));
4822                 prim->type = T_ORDREL_IND;
4823                 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4824                 return;
4825 
4826         default:
4827                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4828                     SL_TRACE|SL_ERROR,
4829                     "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4830                     tep->te_state));
4831                 if (!closing) {
4832                         tl_merror(wq, mp, EPROTO);
4833                 } else {
4834                         freemsg(mp);
4835                 }
4836                 return;
4837         }
4838         tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4839 
4840         /*
4841          * get connected endpoint
4842          */
4843         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4844                 /* Peer closed */
4845                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4846                     "tl_ordrel: peer gone"));
4847                 freemsg(mp);
4848                 return;
4849         }
4850 
4851         peer_rq = peer_tep->te_rq;
4852 
4853         /*
4854          * Put it back if flow controlled except when we are closing.
4855          * Note: Messages already on queue when we are closing is bounded
4856          * so we can ignore flow control.
4857          */
4858         if (! canputnext(peer_rq) && !closing) {
4859                 TL_PUTBQ(tep, mp);
4860                 return;
4861         }
4862 
4863         /*
4864          * validate state on peer
4865          */
4866         switch (peer_tep->te_state) {
4867         case TS_DATA_XFER:
4868         case TS_WIND_ORDREL:
4869                 /* valid states */
4870                 break;
4871         default:
4872                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4873                     "tl_ordrel:rx side:invalid state"));
4874                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4875                 return;
4876         }
4877         peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4878 
4879         /*
4880          * reuse message block
4881          */
4882         prim->type = T_ORDREL_IND;
4883         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4884             "tl_ordrel: send ordrel_ind"));
4885 
4886         /*
4887          * send data to connected peer
4888          */
4889         putnext(peer_rq, mp);
4890 }
4891 
4892 
4893 /*
4894  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4895  */
4896 static void
4897 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4898 {
4899         size_t                  err_sz;
4900         tl_endpt_t              *tep;
4901         struct T_unitdata_req   *udreq;
4902         mblk_t                  *err_mp;
4903         t_scalar_t              alen;
4904         t_scalar_t              olen;
4905         struct T_uderror_ind    *uderr;
4906         uchar_t                 *addr_startp;
4907 
4908         err_sz = sizeof (struct T_uderror_ind);
4909         tep = (tl_endpt_t *)wq->q_ptr;
4910         udreq = (struct T_unitdata_req *)mp->b_rptr;
4911         alen = udreq->DEST_length;
4912         olen = udreq->OPT_length;
4913 
4914         if (alen > 0)
4915                 err_sz = T_ALIGN(err_sz + alen);
4916         if (olen > 0)
4917                 err_sz += olen;
4918 
4919         err_mp = allocb(err_sz, BPRI_MED);
4920         if (! err_mp) {
4921                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4922                     "tl_uderr:allocb failure"));
4923                 /*
4924                  * Note: no rollback of state needed as it does
4925                  * not change in connectionless transport
4926                  */
4927                 tl_memrecover(wq, mp, err_sz);
4928                 return;
4929         }
4930 
4931         DB_TYPE(err_mp) = M_PROTO;
4932         err_mp->b_wptr = err_mp->b_rptr + err_sz;
4933         uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4934         uderr->PRIM_type = T_UDERROR_IND;
4935         uderr->ERROR_type = err;
4936         uderr->DEST_length = alen;
4937         uderr->OPT_length = olen;
4938         if (alen <= 0) {
4939                 uderr->DEST_offset = 0;
4940         } else {
4941                 uderr->DEST_offset =
4942                     (t_scalar_t)sizeof (struct T_uderror_ind);
4943                 addr_startp  = mp->b_rptr + udreq->DEST_offset;
4944                 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4945                     (size_t)alen);
4946         }
4947         if (olen <= 0) {
4948                 uderr->OPT_offset = 0;
4949         } else {
4950                 uderr->OPT_offset =
4951                     (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4952                     uderr->DEST_length);
4953                 addr_startp  = mp->b_rptr + udreq->OPT_offset;
4954                 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4955                     (size_t)olen);
4956         }
4957         freemsg(mp);
4958 
4959         /*
4960          * send indication message
4961          */
4962         tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4963 
4964         qreply(wq, err_mp);
4965 }
4966 
4967 static void
4968 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4969 {
4970         queue_t *wq = tep->te_wq;
4971 
4972         if (!tep->te_closing && (wq->q_first != NULL)) {
4973                 TL_PUTQ(tep, mp);
4974         } else if (tep->te_rq != NULL)
4975                 tl_unitdata(mp, tep);
4976         else
4977                 freemsg(mp);
4978 
4979         tl_serializer_exit(tep);
4980         tl_refrele(tep);
4981 }
4982 
4983 /*
4984  * Handle T_unitdata_req.
4985  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4986  * If this is a socket pass through options unmodified.
4987  */
4988 static void
4989 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4990 {
4991         queue_t                 *wq = tep->te_wq;
4992         soux_addr_t             ux_addr;
4993         tl_addr_t               destaddr;
4994         uchar_t                 *addr_startp;
4995         tl_endpt_t              *peer_tep;
4996         struct T_unitdata_ind   *udind;
4997         struct T_unitdata_req   *udreq;
4998         ssize_t                 msz, ui_sz;
4999         t_scalar_t              alen, aoff, olen, ooff;
5000         t_scalar_t              oldolen = 0;
5001         cred_t                  *cr = NULL;
5002         pid_t                   cpid;
5003 
5004         udreq = (struct T_unitdata_req *)mp->b_rptr;
5005         msz = MBLKL(mp);
5006 
5007         /*
5008          * validate the state
5009          */
5010         if (tep->te_state != TS_IDLE) {
5011                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5012                     SL_TRACE|SL_ERROR,
5013                     "tl_wput:T_CONN_REQ:out of state"));
5014                 tl_merror(wq, mp, EPROTO);
5015                 return;
5016         }
5017         /*
5018          * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5019          * (state does not change on this event)
5020          */
5021 
5022         /*
5023          * validate the message
5024          * Note: dereference fields in struct inside message only
5025          * after validating the message length.
5026          */
5027         if (msz < sizeof (struct T_unitdata_req)) {
5028                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5029                     "tl_unitdata:invalid message length"));
5030                 tl_merror(wq, mp, EINVAL);
5031                 return;
5032         }
5033         alen = udreq->DEST_length;
5034         aoff = udreq->DEST_offset;
5035         oldolen = olen = udreq->OPT_length;
5036         ooff = udreq->OPT_offset;
5037         if (olen == 0)
5038                 ooff = 0;
5039 
5040         if (IS_SOCKET(tep)) {
5041                 if ((alen != TL_SOUX_ADDRLEN) ||
5042                     (aoff < 0) ||
5043                     (aoff + alen > msz) ||
5044                     (olen < 0) || (ooff < 0) ||
5045                     ((olen > 0) && ((ooff + olen) > msz))) {
5046                         (void) (STRLOG(TL_ID, tep->te_minor,
5047                             1, SL_TRACE|SL_ERROR,
5048                             "tl_unitdata_req: invalid socket addr "
5049                             "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5050                             (int)msz, alen, aoff, olen, ooff));
5051                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5052                         return;
5053                 }
5054                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5055 
5056                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5057                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5058                         (void) (STRLOG(TL_ID, tep->te_minor,
5059                             1, SL_TRACE|SL_ERROR,
5060                             "tl_conn_req: invalid socket magic"));
5061                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5062                         return;
5063                 }
5064         } else {
5065                 if ((alen < 0) ||
5066                     (aoff < 0) ||
5067                     ((alen > 0) && ((aoff + alen) > msz)) ||
5068                     ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5069                     ((aoff + alen) < 0) ||
5070                     ((olen > 0) && ((ooff + olen) > msz)) ||
5071                     (olen < 0) ||
5072                     (ooff < 0) ||
5073                     ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5074                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5075                                     SL_TRACE|SL_ERROR,
5076                                     "tl_unitdata:invalid unit data message"));
5077                         tl_merror(wq, mp, EINVAL);
5078                         return;
5079                 }
5080         }
5081 
5082         /* Options not supported unless it's a socket */
5083         if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5084                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5085                     "tl_unitdata:option use(unsupported) or zero len addr"));
5086                 tl_uderr(wq, mp, EPROTO);
5087                 return;
5088         }
5089 #ifdef DEBUG
5090         /*
5091          * Mild form of ASSERT()ion to detect broken TPI apps.
5092          * if (! assertion)
5093          *      log warning;
5094          */
5095         if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5096                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5097                     "tl_unitdata:addr overlaps TPI message"));
5098         }
5099 #endif
5100         /*
5101          * get destination endpoint
5102          */
5103         destaddr.ta_alen = alen;
5104         destaddr.ta_abuf = mp->b_rptr + aoff;
5105         destaddr.ta_zoneid = tep->te_zoneid;
5106 
5107         /*
5108          * Check whether the destination is the same that was used previously
5109          * and the destination endpoint is in the right state. If something is
5110          * wrong, find destination again and cache it.
5111          */
5112         peer_tep = tep->te_lastep;
5113 
5114         if ((peer_tep == NULL) || peer_tep->te_closing ||
5115             (peer_tep->te_state != TS_IDLE) ||
5116             !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5117                 /*
5118                  * Not the same as cached destination , need to find the right
5119                  * destination.
5120                  */
5121                 peer_tep = (IS_SOCKET(tep) ?
5122                     tl_sock_find_peer(tep, &ux_addr) :
5123                     tl_find_peer(tep, &destaddr));
5124 
5125                 if (peer_tep == NULL) {
5126                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5127                             SL_TRACE|SL_ERROR,
5128                             "tl_unitdata:no one at destination address"));
5129                         tl_uderr(wq, mp, ECONNRESET);
5130                         return;
5131                 }
5132 
5133                 /*
5134                  * Cache the new peer.
5135                  */
5136                 if (tep->te_lastep != NULL)
5137                         tl_refrele(tep->te_lastep);
5138 
5139                 tep->te_lastep = peer_tep;
5140         }
5141 
5142         if (peer_tep->te_state != TS_IDLE) {
5143                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5144                     "tl_unitdata:provider in invalid state"));
5145                 tl_uderr(wq, mp, EPROTO);
5146                 return;
5147         }
5148 
5149         ASSERT(peer_tep->te_rq != NULL);
5150 
5151         /*
5152          * Put it back if flow controlled except when we are closing.
5153          * Note: Messages already on queue when we are closing is bounded
5154          * so we can ignore flow control.
5155          */
5156         if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5157                 /* record what we are flow controlled on */
5158                 if (tep->te_flowq != NULL) {
5159                         list_remove(&tep->te_flowq->te_flowlist, tep);
5160                 }
5161                 list_insert_head(&peer_tep->te_flowlist, tep);
5162                 tep->te_flowq = peer_tep;
5163                 TL_PUTBQ(tep, mp);
5164                 return;
5165         }
5166         /*
5167          * prepare indication message
5168          */
5169 
5170         /*
5171          * calculate length of message
5172          */
5173         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5174                 cr = msg_getcred(mp, &cpid);
5175                 ASSERT(cr != NULL);
5176 
5177                 if (peer_tep->te_flag & TL_SETCRED) {
5178                         ASSERT(olen == 0);
5179                         olen = (t_scalar_t)sizeof (struct opthdr) +
5180                             OPTLEN(sizeof (tl_credopt_t));
5181                                                 /* 1 option only */
5182                 } else if (peer_tep->te_flag & TL_SETUCRED) {
5183                         ASSERT(olen == 0);
5184                         olen = (t_scalar_t)sizeof (struct opthdr) +
5185                             OPTLEN(ucredminsize(cr));
5186                                                 /* 1 option only */
5187                 } else {
5188                         /* Possibly more than one option */
5189                         olen += (t_scalar_t)sizeof (struct T_opthdr) +
5190                             OPTLEN(ucredminsize(cr));
5191                 }
5192         }
5193 
5194         ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5195             olen;
5196         /*
5197          * If the unitdata_ind fits and we are not adding options
5198          * reuse the udreq mblk.
5199          */
5200         if (msz >= ui_sz && alen >= tep->te_alen &&
5201             !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5202                 /*
5203                  * Reuse the original mblk. Leave options in place.
5204                  */
5205                 udind =  (struct T_unitdata_ind *)mp->b_rptr;
5206                 udind->PRIM_type = T_UNITDATA_IND;
5207                 udind->SRC_length = tep->te_alen;
5208                 addr_startp = mp->b_rptr + udind->SRC_offset;
5209                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5210         } else {
5211                 /* Allocate a new T_unidata_ind message */
5212                 mblk_t *ui_mp;
5213 
5214                 ui_mp = allocb(ui_sz, BPRI_MED);
5215                 if (! ui_mp) {
5216                         (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5217                             "tl_unitdata:allocb failure:message queued"));
5218                         tl_memrecover(wq, mp, ui_sz);
5219                         return;
5220                 }
5221 
5222                 /*
5223                  * fill in T_UNITDATA_IND contents
5224                  */
5225                 DB_TYPE(ui_mp) = M_PROTO;
5226                 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5227                 udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5228                 udind->PRIM_type = T_UNITDATA_IND;
5229                 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5230                 udind->SRC_length = tep->te_alen;
5231                 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5232                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5233                 udind->OPT_offset =
5234                     (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5235                 udind->OPT_length = olen;
5236                 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5237 
5238                         if (oldolen != 0) {
5239                                 bcopy((void *)((uintptr_t)udreq + ooff),
5240                                     (void *)((uintptr_t)udind +
5241                                     udind->OPT_offset),
5242                                     oldolen);
5243                         }
5244                         ASSERT(cr != NULL);
5245 
5246                         tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5247                             oldolen, cr, cpid,
5248                             peer_tep->te_flag, peer_tep->te_credp);
5249                 } else {
5250                         bcopy((void *)((uintptr_t)udreq + ooff),
5251                             (void *)((uintptr_t)udind + udind->OPT_offset),
5252                             olen);
5253                 }
5254 
5255                 /*
5256                  * relink data blocks from mp to ui_mp
5257                  */
5258                 ui_mp->b_cont = mp->b_cont;
5259                 freeb(mp);
5260                 mp = ui_mp;
5261         }
5262         /*
5263          * send indication message
5264          */
5265         peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5266         putnext(peer_tep->te_rq, mp);
5267 }
5268 
5269 
5270 
5271 /*
5272  * Check if a given addr is in use.
5273  * Endpoint ptr returned or NULL if not found.
5274  * The name space is separate for each mode. This implies that
5275  * sockets get their own name space.
5276  */
5277 static tl_endpt_t *
5278 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5279 {
5280         tl_endpt_t *peer_tep = NULL;
5281         int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5282             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5283 
5284         ASSERT(! IS_SOCKET(tep));
5285 
5286         ASSERT(ap != NULL && ap->ta_alen > 0);
5287         ASSERT(ap->ta_zoneid == tep->te_zoneid);
5288         ASSERT(ap->ta_abuf != NULL);
5289         EQUIV(rc == 0, peer_tep != NULL);
5290         IMPLY(rc == 0,
5291             (tep->te_zoneid == peer_tep->te_zoneid) &&
5292             (tep->te_transport == peer_tep->te_transport));
5293 
5294         if ((rc == 0) && (peer_tep->te_closing)) {
5295                 tl_refrele(peer_tep);
5296                 peer_tep = NULL;
5297         }
5298 
5299         return (peer_tep);
5300 }
5301 
5302 /*
5303  * Find peer for a socket based on unix domain address.
5304  * For implicit addresses our peer can be found by minor number in ai hash. For
5305  * explicit binds we look vnode address at addr_hash.
5306  */
5307 static tl_endpt_t *
5308 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5309 {
5310         tl_endpt_t *peer_tep = NULL;
5311         mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5312             tep->te_aihash : tep->te_addrhash;
5313         int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5314             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5315 
5316         ASSERT(IS_SOCKET(tep));
5317         EQUIV(rc == 0, peer_tep != NULL);
5318         IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5319 
5320         if (peer_tep != NULL) {
5321                 /* Don't attempt to use closing peer. */
5322                 if (peer_tep->te_closing)
5323                         goto errout;
5324 
5325                 /*
5326                  * Cross-zone unix sockets are permitted, but for Trusted
5327                  * Extensions only, the "server" for these must be in the
5328                  * global zone.
5329                  */
5330                 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5331                     is_system_labeled() &&
5332                     (peer_tep->te_zoneid != GLOBAL_ZONEID))
5333                         goto errout;
5334         }
5335 
5336         return (peer_tep);
5337 
5338 errout:
5339         tl_refrele(peer_tep);
5340         return (NULL);
5341 }
5342 
5343 /*
5344  * Generate a free addr and return it in struct pointed by ap
5345  * but allocating space for address buffer.
5346  * The generated address will be at least 4 bytes long and, if req->ta_alen
5347  * exceeds 4 bytes, be req->ta_alen bytes long.
5348  *
5349  * If address is found it will be inserted in the hash.
5350  *
5351  * If req->ta_alen is larger than the default alen (4 bytes) the last
5352  * alen-4 bytes will always be the same as in req.
5353  *
5354  * Return 0 for failure.
5355  * Return non-zero for success.
5356  */
5357 static boolean_t
5358 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5359 {
5360         t_scalar_t      alen;
5361         uint32_t        loopcnt;        /* Limit loop to 2^32 */
5362 
5363         ASSERT(tep->te_hash_hndl != NULL);
5364         ASSERT(! IS_SOCKET(tep));
5365 
5366         if (tep->te_hash_hndl == NULL)
5367                 return (B_FALSE);
5368 
5369         /*
5370          * check if default addr is in use
5371          * if it is - bump it and try again
5372          */
5373         if (req == NULL) {
5374                 alen = sizeof (uint32_t);
5375         } else {
5376                 alen = max(req->ta_alen, sizeof (uint32_t));
5377                 ASSERT(tep->te_zoneid == req->ta_zoneid);
5378         }
5379 
5380         if (tep->te_alen < alen) {
5381                 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5382 
5383                 /*
5384                  * Not enough space in tep->ta_ap to hold the address,
5385                  * allocate a bigger space.
5386                  */
5387                 if (abuf == NULL)
5388                         return (B_FALSE);
5389 
5390                 if (tep->te_alen > 0)
5391                         kmem_free(tep->te_abuf, tep->te_alen);
5392 
5393                 tep->te_alen = alen;
5394                 tep->te_abuf = abuf;
5395         }
5396 
5397         /* Copy in the address in req */
5398         if (req != NULL) {
5399                 ASSERT(alen >= req->ta_alen);
5400                 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5401         }
5402 
5403         /*
5404          * First try minor number then try default addresses.
5405          */
5406         bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5407 
5408         for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5409                 if (mod_hash_insert_reserve(tep->te_addrhash,
5410                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5411                     tep->te_hash_hndl) == 0) {
5412                         /*
5413                          * found free address
5414                          */
5415                         tep->te_flag |= TL_ADDRHASHED;
5416                         tep->te_hash_hndl = NULL;
5417 
5418                         return (B_TRUE); /* successful return */
5419                 }
5420                 /*
5421                  * Use default address.
5422                  */
5423                 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5424                 atomic_add_32(&tep->te_defaddr, 1);
5425         }
5426 
5427         /*
5428          * Failed to find anything.
5429          */
5430         (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5431             "tl_get_any_addr:looped 2^32 times"));
5432         return (B_FALSE);
5433 }
5434 
5435 /*
5436  * reallocb + set r/w ptrs to reflect size.
5437  */
5438 static mblk_t *
5439 tl_resizemp(mblk_t *mp, ssize_t new_size)
5440 {
5441         if ((mp = reallocb(mp, new_size, 0)) == NULL)
5442                 return (NULL);
5443 
5444         mp->b_rptr = DB_BASE(mp);
5445         mp->b_wptr = mp->b_rptr + new_size;
5446         return (mp);
5447 }
5448 
5449 static void
5450 tl_cl_backenable(tl_endpt_t *tep)
5451 {
5452         list_t *l = &tep->te_flowlist;
5453         tl_endpt_t *elp;
5454 
5455         ASSERT(IS_CLTS(tep));
5456 
5457         for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5458                 ASSERT(tep->te_ser == elp->te_ser);
5459                 ASSERT(elp->te_flowq == tep);
5460                 if (! elp->te_closing)
5461                         TL_QENABLE(elp);
5462                 elp->te_flowq = NULL;
5463                 list_remove(l, elp);
5464         }
5465 }
5466 
5467 /*
5468  * Unconnect endpoints.
5469  */
5470 static void
5471 tl_co_unconnect(tl_endpt_t *tep)
5472 {
5473         tl_endpt_t      *peer_tep = tep->te_conp;
5474         tl_endpt_t      *srv_tep = tep->te_oconp;
5475         list_t          *l;
5476         tl_icon_t       *tip;
5477         tl_endpt_t      *cl_tep;
5478         mblk_t          *d_mp;
5479 
5480         ASSERT(IS_COTS(tep));
5481         /*
5482          * If our peer is closing, don't use it.
5483          */
5484         if ((peer_tep != NULL) && peer_tep->te_closing) {
5485                 TL_UNCONNECT(tep->te_conp);
5486                 peer_tep = NULL;
5487         }
5488         if ((srv_tep != NULL) && srv_tep->te_closing) {
5489                 TL_UNCONNECT(tep->te_oconp);
5490                 srv_tep = NULL;
5491         }
5492 
5493         if (tep->te_nicon > 0) {
5494                 l = &tep->te_iconp;
5495                 /*
5496                  * If incoming requests pending, change state
5497                  * of clients on disconnect ind event and send
5498                  * discon_ind pdu to modules above them
5499                  * for server: all clients get disconnect
5500                  */
5501 
5502                 while (tep->te_nicon > 0) {
5503                         tip    = list_head(l);
5504                         cl_tep = tip->ti_tep;
5505 
5506                         if (cl_tep == NULL) {
5507                                 tl_freetip(tep, tip);
5508                                 continue;
5509                         }
5510 
5511                         if (cl_tep->te_oconp != NULL) {
5512                                 ASSERT(cl_tep != cl_tep->te_oconp);
5513                                 TL_UNCONNECT(cl_tep->te_oconp);
5514                         }
5515 
5516                         if (cl_tep->te_closing) {
5517                                 tl_freetip(tep, tip);
5518                                 continue;
5519                         }
5520 
5521                         enableok(cl_tep->te_wq);
5522                         TL_QENABLE(cl_tep);
5523                         d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5524                         if (d_mp != NULL) {
5525                                 cl_tep->te_state = TS_IDLE;
5526                                 putnext(cl_tep->te_rq, d_mp);
5527                         } else {
5528                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5529                                     SL_TRACE|SL_ERROR,
5530                                     "tl_co_unconnect:icmng: "
5531                                     "allocb failure"));
5532                         }
5533                         tl_freetip(tep, tip);
5534                 }
5535         } else if (srv_tep != NULL) {
5536                 /*
5537                  * If outgoing request pending, change state
5538                  * of server on discon ind event
5539                  */
5540 
5541                 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5542                     IS_COTSORD(srv_tep) &&
5543                     !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5544                         /*
5545                          * Queue ordrel_ind for server to be picked up
5546                          * when the connection is accepted.
5547                          */
5548                         d_mp = tl_ordrel_ind_alloc();
5549                 } else {
5550                         /*
5551                          * send discon_ind to server
5552                          */
5553                         d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5554                 }
5555                 if (d_mp == NULL) {
5556                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5557                             SL_TRACE|SL_ERROR,
5558                             "tl_co_unconnect:outgoing:allocb failure"));
5559                         TL_UNCONNECT(tep->te_oconp);
5560                         goto discon_peer;
5561                 }
5562 
5563                 /*
5564                  * If this is a socket the T_DISCON_IND is queued with
5565                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5566                  * from the list of pending connections.
5567                  * Note that when te_oconp is set the peer better have
5568                  * a t_connind_t for the client.
5569                  */
5570                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5571                         /*
5572                          * Queue the disconnection message.
5573                          */
5574                         tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5575                 } else {
5576                         tip = tl_icon_find(srv_tep, tep->te_seqno);
5577                         if (tip == NULL) {
5578                                 freemsg(d_mp);
5579                         } else {
5580                                 ASSERT(tep == tip->ti_tep);
5581                                 ASSERT(tep->te_ser == srv_tep->te_ser);
5582                                 /*
5583                                  * Delete tip from the server list.
5584                                  */
5585                                 if (srv_tep->te_nicon == 1) {
5586                                         srv_tep->te_state =
5587                                             NEXTSTATE(TE_DISCON_IND2,
5588                                             srv_tep->te_state);
5589                                 } else {
5590                                         srv_tep->te_state =
5591                                             NEXTSTATE(TE_DISCON_IND3,
5592                                             srv_tep->te_state);
5593                                 }
5594                                 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5595                                     T_DISCON_IND);
5596                                 putnext(srv_tep->te_rq, d_mp);
5597                                 tl_freetip(srv_tep, tip);
5598                         }
5599                         TL_UNCONNECT(tep->te_oconp);
5600                         srv_tep = NULL;
5601                 }
5602         } else if (peer_tep != NULL) {
5603                 /*
5604                  * unconnect existing connection
5605                  * If connected, change state of peer on
5606                  * discon ind event and send discon ind pdu
5607                  * to module above it
5608                  */
5609 
5610                 ASSERT(tep->te_ser == peer_tep->te_ser);
5611                 if (IS_COTSORD(peer_tep) &&
5612                     (peer_tep->te_state == TS_WIND_ORDREL ||
5613                     peer_tep->te_state == TS_DATA_XFER)) {
5614                         /*
5615                          * send ordrel ind
5616                          */
5617                         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5618                         "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5619                             peer_tep->te_state,
5620                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5621                         d_mp = tl_ordrel_ind_alloc();
5622                         if (! d_mp) {
5623                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5624                                     SL_TRACE|SL_ERROR,
5625                                     "tl_co_unconnect:connected:"
5626                                     "allocb failure"));
5627                                 /*
5628                                  * Continue with cleaning up peer as
5629                                  * this side may go away with the close
5630                                  */
5631                                 TL_QENABLE(peer_tep);
5632                                 goto discon_peer;
5633                         }
5634                         peer_tep->te_state =
5635                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5636 
5637                         putnext(peer_tep->te_rq, d_mp);
5638                         /*
5639                          * Handle flow control case.  This will generate
5640                          * a t_discon_ind message with reason 0 if there
5641                          * is data queued on the write side.
5642                          */
5643                         TL_QENABLE(peer_tep);
5644                 } else if (IS_COTSORD(peer_tep) &&
5645                     peer_tep->te_state == TS_WREQ_ORDREL) {
5646                         /*
5647                          * Sent an ordrel_ind. We send a discon with
5648                          * with error 0 to inform that the peer is gone.
5649                          */
5650                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5651                             SL_TRACE|SL_ERROR,
5652                             "tl_co_unconnect: discon in state %d",
5653                             tep->te_state));
5654                         tl_discon_ind(peer_tep, 0);
5655                 } else {
5656                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5657                             SL_TRACE|SL_ERROR,
5658                             "tl_co_unconnect: state %d", tep->te_state));
5659                         tl_discon_ind(peer_tep, ECONNRESET);
5660                 }
5661 
5662 discon_peer:
5663                 /*
5664                  * Disconnect cross-pointers only for close
5665                  */
5666                 if (tep->te_closing) {
5667                         peer_tep = tep->te_conp;
5668                         TL_REMOVE_PEER(peer_tep->te_conp);
5669                         TL_REMOVE_PEER(tep->te_conp);
5670                 }
5671         }
5672 }
5673 
5674 /*
5675  * Note: The following routine does not recover from allocb()
5676  * failures
5677  * The reason should be from the <sys/errno.h> space.
5678  */
5679 static void
5680 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5681 {
5682         mblk_t *d_mp;
5683 
5684         if (tep->te_closing)
5685                 return;
5686 
5687         /*
5688          * flush the queues.
5689          */
5690         flushq(tep->te_rq, FLUSHDATA);
5691         (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5692 
5693         /*
5694          * send discon ind
5695          */
5696         d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5697         if (! d_mp) {
5698                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5699                     "tl_discon_ind:allocb failure"));
5700                 return;
5701         }
5702         tep->te_state = TS_IDLE;
5703         putnext(tep->te_rq, d_mp);
5704 }
5705 
5706 /*
5707  * Note: The following routine does not recover from allocb()
5708  * failures
5709  * The reason should be from the <sys/errno.h> space.
5710  */
5711 static mblk_t *
5712 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5713 {
5714         mblk_t *mp;
5715         struct T_discon_ind *tdi;
5716 
5717         if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5718                 DB_TYPE(mp) = M_PROTO;
5719                 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5720                 tdi = (struct T_discon_ind *)mp->b_rptr;
5721                 tdi->PRIM_type = T_DISCON_IND;
5722                 tdi->DISCON_reason = reason;
5723                 tdi->SEQ_number = seqnum;
5724         }
5725         return (mp);
5726 }
5727 
5728 
5729 /*
5730  * Note: The following routine does not recover from allocb()
5731  * failures
5732  */
5733 static mblk_t *
5734 tl_ordrel_ind_alloc(void)
5735 {
5736         mblk_t *mp;
5737         struct T_ordrel_ind *toi;
5738 
5739         if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5740                 DB_TYPE(mp) = M_PROTO;
5741                 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5742                 toi = (struct T_ordrel_ind *)mp->b_rptr;
5743                 toi->PRIM_type = T_ORDREL_IND;
5744         }
5745         return (mp);
5746 }
5747 
5748 
5749 /*
5750  * Lookup the seqno in the list of queued connections.
5751  */
5752 static tl_icon_t *
5753 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5754 {
5755         list_t *l = &tep->te_iconp;
5756         tl_icon_t *tip = list_head(l);
5757 
5758         ASSERT(seqno != 0);
5759 
5760         for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5761                 ;
5762 
5763         return (tip);
5764 }
5765 
5766 /*
5767  * Queue data for a given T_CONN_IND while verifying that redundant
5768  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5769  * Used when the originator of the connection closes.
5770  */
5771 static void
5772 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5773 {
5774         tl_icon_t               *tip;
5775         mblk_t                  **mpp, *mp;
5776         int                     prim, nprim;
5777 
5778         if (nmp->b_datap->db_type == M_PROTO)
5779                 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5780         else
5781                 nprim = -1;     /* M_DATA */
5782 
5783         tip = tl_icon_find(tep, seqno);
5784         if (tip == NULL) {
5785                 freemsg(nmp);
5786                 return;
5787         }
5788 
5789         ASSERT(tip->ti_seqno != 0);
5790         mpp = &tip->ti_mp;
5791         while (*mpp != NULL) {
5792                 mp = *mpp;
5793 
5794                 if (mp->b_datap->db_type == M_PROTO)
5795                         prim = ((union T_primitives *)mp->b_rptr)->type;
5796                 else
5797                         prim = -1;      /* M_DATA */
5798 
5799                 /*
5800                  * Allow nothing after a T_DISCON_IND
5801                  */
5802                 if (prim == T_DISCON_IND) {
5803                         freemsg(nmp);
5804                         return;
5805                 }
5806                 /*
5807                  * Only allow a T_DISCON_IND after an T_ORDREL_IND
5808                  */
5809                 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5810                         freemsg(nmp);
5811                         return;
5812                 }
5813                 mpp = &(mp->b_next);
5814         }
5815         *mpp = nmp;
5816 }
5817 
5818 /*
5819  * Verify if a certain TPI primitive exists on the connind queue.
5820  * Use prim -1 for M_DATA.
5821  * Return non-zero if found.
5822  */
5823 static boolean_t
5824 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5825 {
5826         tl_icon_t *tip = tl_icon_find(tep, seqno);
5827         boolean_t found = B_FALSE;
5828 
5829         if (tip != NULL) {
5830                 mblk_t *mp;
5831                 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5832                         found = (DB_TYPE(mp) == M_PROTO &&
5833                             ((union T_primitives *)mp->b_rptr)->type == prim);
5834                 }
5835         }
5836         return (found);
5837 }
5838 
5839 /*
5840  * Send the b_next mblk chain that has accumulated before the connection
5841  * was accepted. Perform the necessary state transitions.
5842  */
5843 static void
5844 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5845 {
5846         mblk_t                  *mp;
5847         union T_primitives      *primp;
5848 
5849         if (tep->te_closing) {
5850                 tl_icon_freemsgs(mpp);
5851                 return;
5852         }
5853 
5854         ASSERT(tep->te_state == TS_DATA_XFER);
5855         ASSERT(tep->te_rq->q_first == NULL);
5856 
5857         while ((mp = *mpp) != NULL) {
5858                 *mpp = mp->b_next;
5859                 mp->b_next = NULL;
5860 
5861                 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5862                 switch (DB_TYPE(mp)) {
5863                 default:
5864                         freemsg(mp);
5865                         break;
5866                 case M_DATA:
5867                         putnext(tep->te_rq, mp);
5868                         break;
5869                 case M_PROTO:
5870                         primp = (union T_primitives *)mp->b_rptr;
5871                         switch (primp->type) {
5872                         case T_UNITDATA_IND:
5873                         case T_DATA_IND:
5874                         case T_OPTDATA_IND:
5875                         case T_EXDATA_IND:
5876                                 putnext(tep->te_rq, mp);
5877                                 break;
5878                         case T_ORDREL_IND:
5879                                 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5880                                     tep->te_state);
5881                                 putnext(tep->te_rq, mp);
5882                                 break;
5883                         case T_DISCON_IND:
5884                                 tep->te_state = TS_IDLE;
5885                                 putnext(tep->te_rq, mp);
5886                                 break;
5887                         default:
5888 #ifdef DEBUG
5889                                 cmn_err(CE_PANIC,
5890                                     "tl_icon_sendmsgs: unknown primitive");
5891 #endif /* DEBUG */
5892                                 freemsg(mp);
5893                                 break;
5894                         }
5895                         break;
5896                 }
5897         }
5898 }
5899 
5900 /*
5901  * Free the b_next mblk chain that has accumulated before the connection
5902  * was accepted.
5903  */
5904 static void
5905 tl_icon_freemsgs(mblk_t **mpp)
5906 {
5907         mblk_t *mp;
5908 
5909         while ((mp = *mpp) != NULL) {
5910                 *mpp = mp->b_next;
5911                 mp->b_next = NULL;
5912                 freemsg(mp);
5913         }
5914 }
5915 
5916 /*
5917  * Send M_ERROR
5918  * Note: assumes caller ensured enough space in mp or enough
5919  *      memory available. Does not attempt recovery from allocb()
5920  *      failures
5921  */
5922 
5923 static void
5924 tl_merror(queue_t *wq, mblk_t *mp, int error)
5925 {
5926         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5927 
5928         if (tep->te_closing) {
5929                 freemsg(mp);
5930                 return;
5931         }
5932 
5933         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5934             SL_TRACE|SL_ERROR,
5935             "tl_merror: tep=%p, err=%d", (void *)tep, error));
5936 
5937         /*
5938          * flush all messages on queue. we are shutting
5939          * the stream down on fatal error
5940          */
5941         flushq(wq, FLUSHALL);
5942         if (IS_COTS(tep)) {
5943                 /* connection oriented - unconnect endpoints */
5944                 tl_co_unconnect(tep);
5945         }
5946         if (mp->b_cont) {
5947                 freemsg(mp->b_cont);
5948                 mp->b_cont = NULL;
5949         }
5950 
5951         if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5952                 freemsg(mp);
5953                 mp = allocb(1, BPRI_HI);
5954                 if (!mp) {
5955                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5956                             SL_TRACE|SL_ERROR,
5957                             "tl_merror:M_PROTO: out of memory"));
5958                         return;
5959                 }
5960         }
5961         if (mp) {
5962                 DB_TYPE(mp) = M_ERROR;
5963                 mp->b_rptr = DB_BASE(mp);
5964                 *mp->b_rptr = (char)error;
5965                 mp->b_wptr = mp->b_rptr + sizeof (char);
5966                 qreply(wq, mp);
5967         } else {
5968                 (void) putnextctl1(tep->te_rq, M_ERROR, error);
5969         }
5970 }
5971 
5972 static void
5973 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5974 {
5975         ASSERT(cr != NULL);
5976 
5977         if (flag & TL_SETCRED) {
5978                 struct opthdr *opt = (struct opthdr *)buf;
5979                 tl_credopt_t *tlcred;
5980 
5981                 opt->level = TL_PROT_LEVEL;
5982                 opt->name = TL_OPT_PEER_CRED;
5983                 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5984 
5985                 tlcred = (tl_credopt_t *)(opt + 1);
5986                 tlcred->tc_uid = crgetuid(cr);
5987                 tlcred->tc_gid = crgetgid(cr);
5988                 tlcred->tc_ruid = crgetruid(cr);
5989                 tlcred->tc_rgid = crgetrgid(cr);
5990                 tlcred->tc_suid = crgetsuid(cr);
5991                 tlcred->tc_sgid = crgetsgid(cr);
5992                 tlcred->tc_ngroups = crgetngroups(cr);
5993         } else if (flag & TL_SETUCRED) {
5994                 struct opthdr *opt = (struct opthdr *)buf;
5995 
5996                 opt->level = TL_PROT_LEVEL;
5997                 opt->name = TL_OPT_PEER_UCRED;
5998                 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5999 
6000                 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6001         } else {
6002                 struct T_opthdr *topt = (struct T_opthdr *)buf;
6003                 ASSERT(flag & TL_SOCKUCRED);
6004 
6005                 topt->level = SOL_SOCKET;
6006                 topt->name = SCM_UCRED;
6007                 topt->len = ucredminsize(cr) + sizeof (*topt);
6008                 topt->status = 0;
6009                 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6010         }
6011 }
6012 
6013 /* ARGSUSED */
6014 static int
6015 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6016 {
6017         /* no default value processed in protocol specific code currently */
6018         return (-1);
6019 }
6020 
6021 /* ARGSUSED */
6022 static int
6023 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6024 {
6025         int len;
6026         tl_endpt_t *tep;
6027         int *valp;
6028 
6029         tep = (tl_endpt_t *)wq->q_ptr;
6030 
6031         len = 0;
6032 
6033         /*
6034          * Assumes: option level and name sanity check done elsewhere
6035          */
6036 
6037         switch (level) {
6038         case SOL_SOCKET:
6039                 if (! IS_SOCKET(tep))
6040                         break;
6041                 switch (name) {
6042                 case SO_RECVUCRED:
6043                         len = sizeof (int);
6044                         valp = (int *)ptr;
6045                         *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6046                         break;
6047                 default:
6048                         break;
6049                 }
6050                 break;
6051         case TL_PROT_LEVEL:
6052                 switch (name) {
6053                 case TL_OPT_PEER_CRED:
6054                 case TL_OPT_PEER_UCRED:
6055                         /*
6056                          * option not supposed to retrieved directly
6057                          * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6058                          * when some internal flags set by other options
6059                          * Direct retrieval always designed to fail(ignored)
6060                          * for this option.
6061                          */
6062                         break;
6063                 }
6064         }
6065         return (len);
6066 }
6067 
6068 /* ARGSUSED */
6069 static int
6070 tl_set_opt(
6071         queue_t         *wq,
6072         uint_t          mgmt_flags,
6073         int             level,
6074         int             name,
6075         uint_t          inlen,
6076         uchar_t         *invalp,
6077         uint_t          *outlenp,
6078         uchar_t         *outvalp,
6079         void            *thisdg_attrs,
6080         cred_t          *cr)
6081 {
6082         int error;
6083         tl_endpt_t *tep;
6084 
6085         tep = (tl_endpt_t *)wq->q_ptr;
6086 
6087         error = 0;              /* NOERROR */
6088 
6089         /*
6090          * Assumes: option level and name sanity checks done elsewhere
6091          */
6092 
6093         switch (level) {
6094         case SOL_SOCKET:
6095                 if (! IS_SOCKET(tep)) {
6096                         error = EINVAL;
6097                         break;
6098                 }
6099                 /*
6100                  * TBD: fill in other AF_UNIX socket options and then stop
6101                  * returning error.
6102                  */
6103                 switch (name) {
6104                 case SO_RECVUCRED:
6105                         /*
6106                          * We only support this for datagram sockets;
6107                          * getpeerucred handles the connection oriented
6108                          * transports.
6109                          */
6110                         if (! IS_CLTS(tep)) {
6111                                 error = EINVAL;
6112                                 break;
6113                         }
6114                         if (*(int *)invalp == 0)
6115                                 tep->te_flag &= ~TL_SOCKUCRED;
6116                         else
6117                                 tep->te_flag |= TL_SOCKUCRED;
6118                         break;
6119                 default:
6120                         error = EINVAL;
6121                         break;
6122                 }
6123                 break;
6124         case TL_PROT_LEVEL:
6125                 switch (name) {
6126                 case TL_OPT_PEER_CRED:
6127                 case TL_OPT_PEER_UCRED:
6128                         /*
6129                          * option not supposed to be set directly
6130                          * Its value in initialized for each endpoint at
6131                          * driver open time.
6132                          * Direct setting always designed to fail for this
6133                          * option.
6134                          */
6135                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6136                             SL_TRACE|SL_ERROR,
6137                             "tl_set_opt: option is not supported"));
6138                         error = EPROTO;
6139                         break;
6140                 }
6141         }
6142         return (error);
6143 }
6144 
6145 
6146 static void
6147 tl_timer(void *arg)
6148 {
6149         queue_t *wq = arg;
6150         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6151 
6152         ASSERT(tep);
6153 
6154         tep->te_timoutid = 0;
6155 
6156         enableok(wq);
6157         /*
6158          * Note: can call wsrv directly here and save context switch
6159          * Consider change when qtimeout (not timeout) is active
6160          */
6161         qenable(wq);
6162 }
6163 
6164 static void
6165 tl_buffer(void *arg)
6166 {
6167         queue_t *wq = arg;
6168         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6169 
6170         ASSERT(tep);
6171 
6172         tep->te_bufcid = 0;
6173         tep->te_nowsrv = B_FALSE;
6174 
6175         enableok(wq);
6176         /*
6177          *  Note: can call wsrv directly here and save context switch
6178          * Consider change when qbufcall (not bufcall) is active
6179          */
6180         qenable(wq);
6181 }
6182 
6183 static void
6184 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6185 {
6186         tl_endpt_t *tep;
6187 
6188         tep = (tl_endpt_t *)wq->q_ptr;
6189 
6190         if (tep->te_closing) {
6191                 freemsg(mp);
6192                 return;
6193         }
6194         noenable(wq);
6195 
6196         (void) insq(wq, wq->q_first, mp);
6197 
6198         if (tep->te_bufcid || tep->te_timoutid) {
6199                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6200                     "tl_memrecover:recover %p pending", (void *)wq));
6201                 return;
6202         }
6203 
6204         if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6205                 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6206                     drv_usectohz(TL_BUFWAIT));
6207         }
6208 }
6209 
6210 static void
6211 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6212 {
6213         ASSERT(tip->ti_seqno != 0);
6214 
6215         if (tip->ti_mp != NULL) {
6216                 tl_icon_freemsgs(&tip->ti_mp);
6217                 tip->ti_mp = NULL;
6218         }
6219         if (tip->ti_tep != NULL) {
6220                 tl_refrele(tip->ti_tep);
6221                 tip->ti_tep = NULL;
6222         }
6223         list_remove(&tep->te_iconp, tip);
6224         kmem_free(tip, sizeof (tl_icon_t));
6225         tep->te_nicon--;
6226 }
6227 
6228 /*
6229  * Remove address from address hash.
6230  */
6231 static void
6232 tl_addr_unbind(tl_endpt_t *tep)
6233 {
6234         tl_endpt_t *elp;
6235 
6236         if (tep->te_flag & TL_ADDRHASHED) {
6237                 if (IS_SOCKET(tep)) {
6238                         (void) mod_hash_remove(tep->te_addrhash,
6239                             (mod_hash_key_t)tep->te_vp,
6240                             (mod_hash_val_t *)&elp);
6241                         tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6242                         tep->te_magic = SOU_MAGIC_IMPLICIT;
6243                 } else {
6244                         (void) mod_hash_remove(tep->te_addrhash,
6245                             (mod_hash_key_t)&tep->te_ap,
6246                             (mod_hash_val_t *)&elp);
6247                         (void) kmem_free(tep->te_abuf, tep->te_alen);
6248                         tep->te_alen = -1;
6249                         tep->te_abuf = NULL;
6250                 }
6251                 tep->te_flag &= ~TL_ADDRHASHED;
6252         }
6253 }