1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2012 by Delphix. All rights reserved.
  28  * Copyright 2015 Joyent, Inc.
  29  */
  30 
  31 /*
  32  * Multithreaded STREAMS Local Transport Provider.
  33  *
  34  * OVERVIEW
  35  * ========
  36  *
  37  * This driver provides TLI as well as socket semantics.  It provides
  38  * connectionless, connection oriented, and connection oriented with orderly
  39  * release transports for TLI and sockets. Each transport type has separate name
  40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
  41  * this removes any name space conflicts when binding to socket style transport
  42  * addresses.
  43  *
  44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
  45  * the same namespace. In fact, sockets always use ticotsord type transport.
  46  *
  47  * The driver mode is specified during open() by the minor number used for
  48  * open.
  49  *
  50  *  The sockets in addition have the following semantic differences:
  51  *  No support for passing up credentials (TL_SET[U]CRED).
  52  *
  53  *      Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
  54  *      from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
  55  *      T_OPTDATA_IND.
  56  *
  57  *      The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
  58  *      a T_CONN_RES is received from the acceptor. This means that a socket
  59  *      connect will complete before the peer has called accept.
  60  *
  61  *
  62  * MULTITHREADING
  63  * ==============
  64  *
  65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
  66  * generic "serializer" abstraction. Most of the operations are executed behind
  67  * the serializer and are, essentially single-threaded. All functions executed
  68  * behind the same serializer are strictly serialized. So if one thread calls
  69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
  70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
  71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
  72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
  73  * same time.
  74  *
  75  * Connectionless transport use a single serializer per transport type (one for
  76  * TLI and one for sockets. Connection-oriented transports use finer-grained
  77  * serializers.
  78  *
  79  * All COTS-type endpoints start their life with private serializers. During
  80  * connection request processing the endpoint serializer is switched to the
  81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
  82  * listener serializer. During T_CONN_RES processing the eager serializer is
  83  * switched from listener to acceptor serializer and after that point all
  84  * processing for eager and acceptor happens on this serializer. To avoid races
  85  * with endpoint closes while its serializer may be changing closes are blocked
  86  * while serializers are manipulated.
  87  *
  88  * References accounting
  89  * ---------------------
  90  *
  91  * Endpoints are reference counted and freed when the last reference is
  92  * dropped. Functions within the serializer may access an endpoint state even
  93  * after an endpoint closed. The te_closing being set on the endpoint indicates
  94  * that the endpoint entered its close routine.
  95  *
  96  * One reference is held for each opened endpoint instance. The reference
  97  * counter is incremented when the endpoint is linked to another endpoint and
  98  * decremented when the link disappears. It is also incremented when the
  99  * endpoint is found by the hash table lookup. This increment is atomic with the
 100  * lookup itself and happens while the hash table read lock is held.
 101  *
 102  * Close synchronization
 103  * ---------------------
 104  *
 105  * During close the endpoint as marked as closing using te_closing flag. It is
 106  * usually enough to check for te_closing flag since all other state changes
 107  * happen after this flag is set and the close entered serializer. Immediately
 108  * after setting te_closing flag tl_close() enters serializer and waits until
 109  * the callback finishes. This allows all functions called within serializer to
 110  * simply check te_closing without any locks.
 111  *
 112  * Serializer management.
 113  * ---------------------
 114  *
 115  * For COTS transports serializers are created when the endpoint is constructed
 116  * and destroyed when the endpoint is destructed. CLTS transports use global
 117  * serializers - one for sockets and one for TLI.
 118  *
 119  * COTS serializers have separate reference counts to deal with several
 120  * endpoints sharing the same serializer. There is a subtle problem related to
 121  * the serializer destruction. The serializer should never be destroyed by any
 122  * function executed inside serializer. This means that close has to wait till
 123  * all serializer activity for this endpoint is finished before it can drop the
 124  * last reference on the endpoint (which may as well free the serializer).  This
 125  * is only relevant for COTS transports which manage serializers
 126  * dynamically. For CLTS transports close may complete without waiting for all
 127  * serializer activity to finish since serializer is only destroyed at driver
 128  * detach time.
 129  *
 130  * COTS endpoints keep track of the number of outstanding requests on the
 131  * serializer for the endpoint. The code handling accept() avoids changing
 132  * client serializer if it has any pending messages on the serializer and
 133  * instead moves acceptor to listener's serializer.
 134  *
 135  *
 136  * Use of hash tables
 137  * ------------------
 138  *
 139  * The driver uses modhash hash table implementation. Each transport uses two
 140  * hash tables - one for finding endpoints by acceptor ID and another one for
 141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
 142  * pair of hash tables since sockets only use TICOTSORD.
 143  *
 144  * All hash tables lookups increment a reference count for returned endpoints,
 145  * so we may safely check the endpoint state even when the endpoint is removed
 146  * from the hash by another thread immediately after it is found.
 147  *
 148  *
 149  * CLOSE processing
 150  * ================
 151  *
 152  * The driver enters serializer twice on close(). The close sequence is the
 153  * following:
 154  *
 155  * 1) Wait until closing is safe (te_closewait becomes zero)
 156  *      This step is needed to prevent close during serializer switches. In most
 157  *      cases (close happening after connection establishment) te_closewait is
 158  *      zero.
 159  * 1) Set te_closing.
 160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
 161  *
 162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
 163  *      It also needs to clear write-side q_next pointers - this should be done
 164  *      before qprocsoff().
 165  *
 166  *    This synchronous serializer entry during close is needed to ensure that
 167  *    the queue is valid everywhere inside the serializer.
 168  *
 169  *    Note that in many cases close will execute tl_close_ser() synchronously,
 170  *    so it will not wait at all.
 171  *
 172  * 3) Calls qprocsoff().
 173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
 174  *      complete (for COTS transports). For CLTS transport there is no wait.
 175  *
 176  *      tl_close_finish_ser() Finishes the close process and wakes up waiting
 177  *      close if there is any.
 178  *
 179  *    Note that in most cases close will enter te_close_ser_finish()
 180  *    synchronously and will not wait at all.
 181  *
 182  *
 183  * Flow Control
 184  * ============
 185  *
 186  * The driver implements both read and write side service routines. No one calls
 187  * putq() on the read queue. The read side service routine tl_rsrv() is called
 188  * when the read side stream is back-enabled. It enters serializer synchronously
 189  * (waits till serializer processing is complete). Within serializer it
 190  * back-enables all endpoints blocked by the queue for connection-less
 191  * transports and enables write side service processing for the peer for
 192  * connection-oriented transports.
 193  *
 194  * Read and write side service routines use special mblk_sized space in the
 195  * endpoint structure to enter perimeter.
 196  *
 197  * Write-side flow control
 198  * -----------------------
 199  *
 200  * Write side flow control is a bit tricky. The driver needs to deal with two
 201  * message queues - the explicit STREAMS message queue maintained by
 202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
 203  * queues should be synchronized to preserve message ordering and should
 204  * maintain a single order determined by the order in which messages enter
 205  * tl_wput(). In order to maintain the ordering between these two queues the
 206  * STREAMS queue is only manipulated within the serializer, so the ordering is
 207  * provided by the serializer.
 208  *
 209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
 210  * immediately stop any further processing of the STREAMS message queues the
 211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
 212  * side service processing stops when the flag is set.
 213  *
 214  * The tl_wsrv() function enters serializer synchronously and waits for it to
 215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
 216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
 217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
 218  * always bounded by the amount of messages on the STREAMS queue at the time
 219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
 220  * queue from another serialized entry which can't happen in parallel. This
 221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
 222  * of it draining forever while writer places new messages on the STREAMS
 223  * queue).
 224  *
 225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
 226  *
 227  *
 228  * Unix Domain Sockets
 229  * ===================
 230  *
 231  * The driver knows the structure of Unix Domain sockets addresses and treats
 232  * them differently from generic TLI addresses. For sockets implicit binds are
 233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
 234  * instead of using address length of zero. Explicit binds specify
 235  * SOU_MAGIC_EXPLICIT as magic.
 236  *
 237  * For implicit binds we always use minor number as soua_vp part of the address
 238  * and avoid any hash table lookups. This saves two hash tables lookups per
 239  * anonymous bind.
 240  *
 241  * For explicit address we hash the vnode pointer instead of hashing the
 242  * full-scale address+zone+length. Hashing by pointer is more efficient then
 243  * hashing by the full address.
 244  *
 245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
 246  * tep structure, so it should be never freed.
 247  *
 248  * Also for sockets the driver always uses minor number as acceptor id.
 249  *
 250  * TPI VIOLATIONS
 251  * --------------
 252  *
 253  * This driver violates TPI in several respects for Unix Domain Sockets:
 254  *
 255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
 256  *      is requested and the endpoint is already in use. There is no point in
 257  *      generating an unused address since this address will be rejected by
 258  *      sockfs anyway. For implicit binds it always generates a new address
 259  *      (sets soua_vp to its minor number).
 260  *
 261  * 2) It always uses minor number as acceptor ID and never uses queue
 262  *      pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
 263  *      message and they do not use the queue pointer.
 264  *
 265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
 266  *      followed by listen(). The listen() should be issued with non-zero
 267  *      backlog, so sotpi_listen() issues unbind request followed by bind
 268  *      request to the same address but with a non-zero qlen value. Both
 269  *      tl_bind() and tl_unbind() require write lock on the hash table to
 270  *      insert/remove the address. The driver does not remove the address from
 271  *      the hash for endpoints that are bound to the explicit address and have
 272  *      backlog of zero. During T_BIND_REQ processing if the address requested
 273  *      is equal to the address the endpoint already has it updates the backlog
 274  *      without reinserting the address in the hash table. This optimization
 275  *      avoids two hash table updates for each listener created. It always
 276  *      avoids the problem of a "stolen" address when another listener may use
 277  *      the same address between the unbind and bind and suddenly listen() fails
 278  *      because address is in use even though the bind() succeeded.
 279  *
 280  *
 281  * CONNECTIONLESS TRANSPORTS
 282  * =========================
 283  *
 284  * Connectionless transports all share the same serializer (one for TLI and one
 285  * for Sockets). Functions executing behind serializer can check or modify state
 286  * of any endpoint.
 287  *
 288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
 289  * te_lastep field. The next time X talks to some address A it checks whether A
 290  * is the same as Y's address and if it is there is no need to lookup Y. If the
 291  * address is different or the state of Y is not appropriate (e.g. closed or not
 292  * idle) X does a lookup using tl_find_peer() and caches the new address.
 293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
 294  * on the endpoint found.
 295  *
 296  * During close of endpoint Y it doesn't try to remove itself from other
 297  * endpoints caches. They will detect that Y is gone and will search the peer
 298  * endpoint again.
 299  *
 300  * Flow Control Handling.
 301  * ----------------------
 302  *
 303  * Each connectionless endpoint keeps a list of endpoints which are
 304  * flow-controlled by its queue. It also keeps a pointer to the queue which
 305  * flow-controls itself.  Whenever flow control releases for endpoint X it
 306  * enables all queues from the list. During close it also back-enables everyone
 307  * in the list. If X is flow-controlled when it is closing it removes it from
 308  * the peers list.
 309  *
 310  * DATA STRUCTURES
 311  * ===============
 312  *
 313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
 314  * endpoint state. For connection-oriented transports it has a keeps a list
 315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
 316  * list of endpoints flow controlled by this one.
 317  *
 318  * Each transport type is represented by a per-transport data structure
 319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
 320  * endpoint address hash tables for each transport. It also contains pointer to
 321  * transport serializer for connectionless transports.
 322  *
 323  * Each endpoint keeps a link to its transport structure, so the code can find
 324  * all per-transport information quickly.
 325  */
 326 
 327 #include        <sys/types.h>
 328 #include        <sys/inttypes.h>
 329 #include        <sys/stream.h>
 330 #include        <sys/stropts.h>
 331 #define _SUN_TPI_VERSION 2
 332 #include        <sys/tihdr.h>
 333 #include        <sys/strlog.h>
 334 #include        <sys/debug.h>
 335 #include        <sys/cred.h>
 336 #include        <sys/errno.h>
 337 #include        <sys/kmem.h>
 338 #include        <sys/id_space.h>
 339 #include        <sys/modhash.h>
 340 #include        <sys/mkdev.h>
 341 #include        <sys/tl.h>
 342 #include        <sys/stat.h>
 343 #include        <sys/conf.h>
 344 #include        <sys/modctl.h>
 345 #include        <sys/strsun.h>
 346 #include        <sys/socket.h>
 347 #include        <sys/socketvar.h>
 348 #include        <sys/sysmacros.h>
 349 #include        <sys/xti_xtiopt.h>
 350 #include        <sys/ddi.h>
 351 #include        <sys/sunddi.h>
 352 #include        <sys/zone.h>
 353 #include        <inet/common.h>   /* typedef int (*pfi_t)() for inet/optcom.h */
 354 #include        <inet/optcom.h>
 355 #include        <sys/strsubr.h>
 356 #include        <sys/ucred.h>
 357 #include        <sys/suntpi.h>
 358 #include        <sys/list.h>
 359 #include        <sys/serializer.h>
 360 
 361 /*
 362  * TBD List
 363  * 14 Eliminate state changes through table
 364  * 16. AF_UNIX socket options
 365  * 17. connect() for ticlts
 366  * 18. support for "netstat" to show AF_UNIX plus TLI local
 367  *      transport connections
 368  * 21. sanity check to flushing on sending M_ERROR
 369  */
 370 
 371 /*
 372  * CONSTANT DECLARATIONS
 373  * --------------------
 374  */
 375 
 376 /*
 377  * Local declarations
 378  */
 379 #define NEXTSTATE(EV, ST)       ti_statetbl[EV][ST]
 380 
 381 #define BADSEQNUM       (-1)    /* initial seq number used by T_DISCON_IND */
 382 #define TL_BUFWAIT      (10000) /* usecs to wait for allocb buffer timeout */
 383 #define TL_TIDUSZ (64*1024)     /* tidu size when "strmsgz" is unlimited (0) */
 384 /*
 385  * Hash tables size.
 386  */
 387 #define TL_HASH_SIZE 311
 388 
 389 /*
 390  * Definitions for module_info
 391  */
 392 #define         TL_ID           (104)           /* module ID number */
 393 #define         TL_NAME         "tl"            /* module name */
 394 #define         TL_MINPSZ       (0)             /* min packet size */
 395 #define         TL_MAXPSZ       INFPSZ          /* max packet size ZZZ */
 396 #define         TL_HIWAT        (16*1024)       /* hi water mark */
 397 #define         TL_LOWAT        (256)           /* lo water mark */
 398 /*
 399  * Definition of minor numbers/modes for new transport provider modes.
 400  * We view the socket use as a separate mode to get a separate name space.
 401  */
 402 #define         TL_TICOTS       0       /* connection oriented transport */
 403 #define         TL_TICOTSORD    1       /* COTS w/ orderly release */
 404 #define         TL_TICLTS       2       /* connectionless transport */
 405 #define         TL_UNUSED       3
 406 #define         TL_SOCKET       4       /* Socket */
 407 #define         TL_SOCK_COTS    (TL_SOCKET|TL_TICOTS)
 408 #define         TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
 409 #define         TL_SOCK_CLTS    (TL_SOCKET|TL_TICLTS)
 410 
 411 #define         TL_MINOR_MASK   0x7
 412 #define         TL_MINOR_START  (TL_TICLTS + 1)
 413 
 414 /*
 415  * LOCAL MACROS
 416  */
 417 #define T_ALIGN(p)      P2ROUNDUP((p), sizeof (t_scalar_t))
 418 
 419 /*
 420  * EXTERNAL VARIABLE DECLARATIONS
 421  * -----------------------------
 422  */
 423 /*
 424  * state table defined in the OS space.c
 425  */
 426 extern  char    ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
 427 
 428 /*
 429  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
 430  */
 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
 432 static int tl_close(queue_t *, int, cred_t *);
 433 static void tl_wput(queue_t *, mblk_t *);
 434 static void tl_wsrv(queue_t *);
 435 static void tl_rsrv(queue_t *);
 436 
 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 440 
 441 
 442 /*
 443  * GLOBAL DATA STRUCTURES AND VARIABLES
 444  * -----------------------------------
 445  */
 446 
 447 /*
 448  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
 449  * For now, we only manage the SO_RECVUCRED option but we also have
 450  * harmless dummy options to make things work with some common code we access.
 451  */
 452 opdes_t tl_opt_arr[] = {
 453         /* The SO_TYPE is needed for the hack below */
 454         {
 455                 SO_TYPE,
 456                 SOL_SOCKET,
 457                 OA_R,
 458                 OA_R,
 459                 OP_NP,
 460                 0,
 461                 sizeof (t_scalar_t),
 462                 { 0 }
 463         },
 464         {
 465                 SO_RECVUCRED,
 466                 SOL_SOCKET,
 467                 OA_RW,
 468                 OA_RW,
 469                 OP_NP,
 470                 0,
 471                 sizeof (int),
 472                 { 0 }
 473         }
 474 };
 475 
 476 /*
 477  * Table of all supported levels
 478  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 479  * any supported options so we need this info separately.
 480  *
 481  * This is needed only for topmost tpi providers.
 482  */
 483 optlevel_t      tl_valid_levels_arr[] = {
 484         XTI_GENERIC,
 485         SOL_SOCKET,
 486         TL_PROT_LEVEL
 487 };
 488 
 489 #define TL_VALID_LEVELS_CNT     A_CNT(tl_valid_levels_arr)
 490 /*
 491  * Current upper bound on the amount of space needed to return all options.
 492  * Additional options with data size of sizeof(long) are handled automatically.
 493  * Others need hand job.
 494  */
 495 #define TL_MAX_OPT_BUF_LEN                                              \
 496                 ((A_CNT(tl_opt_arr) << 2) +                               \
 497                 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +          \
 498                 + 64 + sizeof (struct T_optmgmt_ack))
 499 
 500 #define TL_OPT_ARR_CNT  A_CNT(tl_opt_arr)
 501 
 502 /*
 503  *      transport addr structure
 504  */
 505 typedef struct tl_addr {
 506         zoneid_t        ta_zoneid;              /* Zone scope of address */
 507         t_scalar_t      ta_alen;                /* length of abuf */
 508         void            *ta_abuf;               /* the addr itself */
 509 } tl_addr_t;
 510 
 511 /*
 512  * Refcounted version of serializer.
 513  */
 514 typedef struct tl_serializer {
 515         uint_t          ts_refcnt;
 516         serializer_t    *ts_serializer;
 517 } tl_serializer_t;
 518 
 519 /*
 520  * Each transport type has a separate state.
 521  * Per-transport state.
 522  */
 523 typedef struct tl_transport_state {
 524         char            *tr_name;
 525         minor_t         tr_minor;
 526         uint32_t        tr_defaddr;
 527         mod_hash_t      *tr_ai_hash;
 528         mod_hash_t      *tr_addr_hash;
 529         tl_serializer_t *tr_serializer;
 530 } tl_transport_state_t;
 531 
 532 #define TL_DFADDR 0x1000
 533 
 534 static tl_transport_state_t tl_transports[] = {
 535         { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
 536         { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
 537         { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
 538         { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
 539         { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
 540         { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
 541         { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
 542 };
 543 
 544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
 545 
 546 struct tl_endpt;
 547 typedef struct tl_endpt tl_endpt_t;
 548 
 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
 550 
 551 /*
 552  * Data structure used to represent pending connects.
 553  * Records enough information so that the connecting peer can close
 554  * before the connection gets accepted.
 555  */
 556 typedef struct tl_icon {
 557         list_node_t     ti_node;
 558         struct tl_endpt *ti_tep;        /* NULL if peer has already closed */
 559         mblk_t          *ti_mp;         /* b_next list of data + ordrel_ind */
 560         t_scalar_t      ti_seqno;       /* Sequence number */
 561 } tl_icon_t;
 562 
 563 typedef struct so_ux_addr soux_addr_t;
 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
 565 
 566 /*
 567  * Maximum number of unaccepted connection indications allowed per listener.
 568  */
 569 #define TL_MAXQLEN      4096
 570 int tl_maxqlen = TL_MAXQLEN;
 571 
 572 /*
 573  *      transport endpoint structure
 574  */
 575 struct tl_endpt {
 576         queue_t         *te_rq;         /* stream read queue */
 577         queue_t         *te_wq;         /* stream write queue */
 578         uint32_t        te_refcnt;
 579         int32_t         te_state;       /* TPI state of endpoint */
 580         minor_t         te_minor;       /* minor number */
 581 #define te_seqno        te_minor
 582         uint_t          te_flag;        /* flag field */
 583         boolean_t       te_nowsrv;
 584         tl_serializer_t *te_ser;        /* Serializer to use */
 585 #define te_serializer   te_ser->ts_serializer
 586 
 587         soux_addr_t     te_uxaddr;      /* Socket address */
 588 #define te_magic        te_uxaddr.soua_magic
 589 #define te_vp           te_uxaddr.soua_vp
 590         tl_addr_t       te_ap;          /* addr bound to this endpt */
 591 #define te_zoneid te_ap.ta_zoneid
 592 #define te_alen te_ap.ta_alen
 593 #define te_abuf te_ap.ta_abuf
 594 
 595         tl_transport_state_t *te_transport;
 596 #define te_addrhash     te_transport->tr_addr_hash
 597 #define te_aihash       te_transport->tr_ai_hash
 598 #define te_defaddr      te_transport->tr_defaddr
 599         cred_t          *te_credp;      /* endpoint user credentials */
 600         mod_hash_hndl_t te_hash_hndl;   /* Handle for address hash */
 601 
 602         /*
 603          * State specific for connection-oriented and connectionless transports.
 604          */
 605         union {
 606                 /* Connection-oriented state. */
 607                 struct {
 608                         t_uscalar_t _te_nicon;  /* count of conn requests */
 609                         t_uscalar_t _te_qlen;   /* max conn requests */
 610                         tl_endpt_t  *_te_oconp; /* conn request pending */
 611                         tl_endpt_t  *_te_conp;  /* connected endpt */
 612 #ifndef _ILP32
 613                         void        *_te_pad;
 614 #endif
 615                         list_t  _te_iconp;      /* list of conn ind. pending */
 616                 } _te_cots_state;
 617                 /* Connection-less state. */
 618                 struct {
 619                         tl_endpt_t *_te_lastep; /* last dest. endpoint */
 620                         tl_endpt_t *_te_flowq;  /* flow controlled on whom */
 621                         list_node_t _te_flows;  /* lists of connections */
 622                         list_t  _te_flowlist;   /* Who flowcontrols on me */
 623                 } _te_clts_state;
 624         } _te_transport_state;
 625 #define te_nicon        _te_transport_state._te_cots_state._te_nicon
 626 #define te_qlen         _te_transport_state._te_cots_state._te_qlen
 627 #define te_oconp        _te_transport_state._te_cots_state._te_oconp
 628 #define te_conp         _te_transport_state._te_cots_state._te_conp
 629 #define te_iconp        _te_transport_state._te_cots_state._te_iconp
 630 #define te_lastep       _te_transport_state._te_clts_state._te_lastep
 631 #define te_flowq        _te_transport_state._te_clts_state._te_flowq
 632 #define te_flowlist     _te_transport_state._te_clts_state._te_flowlist
 633 #define te_flows        _te_transport_state._te_clts_state._te_flows
 634 
 635         bufcall_id_t    te_bufcid;      /* outstanding bufcall id */
 636         timeout_id_t    te_timoutid;    /* outstanding timeout id */
 637         pid_t           te_cpid;        /* cached pid of endpoint */
 638         t_uscalar_t     te_acceptor_id; /* acceptor id for T_CONN_RES */
 639         /*
 640          * Pieces of the endpoint state needed for closing.
 641          */
 642         kmutex_t        te_closelock;
 643         kcondvar_t      te_closecv;
 644         uint8_t         te_closing;     /* The endpoint started closing */
 645         uint8_t         te_closewait;   /* Wait in close until zero */
 646         mblk_t          te_closemp;     /* for entering serializer on close */
 647         mblk_t          te_rsrvmp;      /* for entering serializer on rsrv */
 648         mblk_t          te_wsrvmp;      /* for entering serializer on wsrv */
 649         kmutex_t        te_srv_lock;
 650         kcondvar_t      te_srv_cv;
 651         uint8_t         te_rsrv_active; /* Running in tl_rsrv() */
 652         uint8_t         te_wsrv_active; /* Running in tl_wsrv() */
 653         /*
 654          * Pieces of the endpoint state needed for serializer transitions.
 655          */
 656         kmutex_t        te_ser_lock;    /* Protects the count below */
 657         uint_t          te_ser_count;   /* Number of messages on serializer */
 658 };
 659 
 660 /*
 661  * Flag values. Lower 4 bits specify that transport used.
 662  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
 663  * they allow to identify the endpoint more easily.
 664  */
 665 #define TL_LISTENER     0x00010 /* the listener endpoint */
 666 #define TL_ACCEPTOR     0x00020 /* the accepting endpoint */
 667 #define TL_EAGER        0x00040 /* connecting endpoint */
 668 #define TL_ACCEPTED     0x00080 /* accepted connection */
 669 #define TL_SETCRED      0x00100 /* flag to indicate sending of credentials */
 670 #define TL_SETUCRED     0x00200 /* flag to indicate sending of ucred */
 671 #define TL_SOCKUCRED    0x00400 /* flag to indicate sending of SCM_UCRED */
 672 #define TL_ADDRHASHED   0x01000 /* Endpoint address is stored in te_addrhash */
 673 #define TL_CLOSE_SER    0x10000 /* Endpoint close has entered the serializer */
 674 /*
 675  * Boolean checks for the endpoint type.
 676  */
 677 #define         IS_CLTS(x)      (((x)->te_flag & TL_TICLTS) != 0)
 678 #define         IS_COTS(x)      (((x)->te_flag & TL_TICLTS) == 0)
 679 #define         IS_COTSORD(x)   (((x)->te_flag & TL_TICOTSORD) != 0)
 680 #define         IS_SOCKET(x)    (((x)->te_flag & TL_SOCKET) != 0)
 681 
 682 /*
 683  * Certain operations are always used together. These macros reduce the chance
 684  * of missing a part of a combination.
 685  */
 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
 688 
 689 #define TL_PUTBQ(x, mp) {               \
 690         ASSERT(!((x)->te_flag & TL_CLOSE_SER));  \
 691         (x)->te_nowsrv = B_TRUE;     \
 692         (void) putbq((x)->te_wq, mp);        \
 693 }
 694 
 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
 697 
 698 /*
 699  * STREAMS driver glue data structures.
 700  */
 701 static  struct  module_info     tl_minfo = {
 702         TL_ID,                  /* mi_idnum */
 703         TL_NAME,                /* mi_idname */
 704         TL_MINPSZ,              /* mi_minpsz */
 705         TL_MAXPSZ,              /* mi_maxpsz */
 706         TL_HIWAT,               /* mi_hiwat */
 707         TL_LOWAT                /* mi_lowat */
 708 };
 709 
 710 static  struct  qinit   tl_rinit = {
 711         NULL,                   /* qi_putp */
 712         (int (*)())tl_rsrv,     /* qi_srvp */
 713         tl_open,                /* qi_qopen */
 714         tl_close,               /* qi_qclose */
 715         NULL,                   /* qi_qadmin */
 716         &tl_minfo,          /* qi_minfo */
 717         NULL                    /* qi_mstat */
 718 };
 719 
 720 static  struct  qinit   tl_winit = {
 721         (int (*)())tl_wput,     /* qi_putp */
 722         (int (*)())tl_wsrv,     /* qi_srvp */
 723         NULL,                   /* qi_qopen */
 724         NULL,                   /* qi_qclose */
 725         NULL,                   /* qi_qadmin */
 726         &tl_minfo,          /* qi_minfo */
 727         NULL                    /* qi_mstat */
 728 };
 729 
 730 static  struct streamtab        tlinfo = {
 731         &tl_rinit,          /* st_rdinit */
 732         &tl_winit,          /* st_wrinit */
 733         NULL,                   /* st_muxrinit */
 734         NULL                    /* st_muxwrinit */
 735 };
 736 
 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
 738     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
 739 
 740 static struct modldrv modldrv = {
 741         &mod_driverops,             /* Type of module -- pseudo driver here */
 742         "TPI Local Transport (tl)",
 743         &tl_devops,         /* driver ops */
 744 };
 745 
 746 /*
 747  * Module linkage information for the kernel.
 748  */
 749 static struct modlinkage modlinkage = {
 750         MODREV_1,
 751         { &modldrv, NULL }
 752 };
 753 
 754 /*
 755  * Templates for response to info request
 756  * Check sanity of unlimited connect data etc.
 757  */
 758 
 759 #define         TL_CLTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 760 #define         TL_COTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 761 
 762 static struct T_info_ack tl_cots_info_ack =
 763         {
 764                 T_INFO_ACK,     /* PRIM_type -always T_INFO_ACK */
 765                 T_INFINITE,     /* TSDU size */
 766                 T_INFINITE,     /* ETSDU size */
 767                 T_INFINITE,     /* CDATA_size */
 768                 T_INFINITE,     /* DDATA_size */
 769                 T_INFINITE,     /* ADDR_size  */
 770                 T_INFINITE,     /* OPT_size */
 771                 0,              /* TIDU_size - fill at run time */
 772                 T_COTS,         /* SERV_type */
 773                 -1,             /* CURRENT_state */
 774                 TL_COTS_PROVIDER_FLAG   /* PROVIDER_flag */
 775         };
 776 
 777 static struct T_info_ack tl_clts_info_ack =
 778         {
 779                 T_INFO_ACK,     /* PRIM_type - always T_INFO_ACK */
 780                 0,              /* TSDU_size - fill at run time */
 781                 -2,             /* ETSDU_size -2 => not supported */
 782                 -2,             /* CDATA_size -2 => not supported */
 783                 -2,             /* DDATA_size  -2 => not supported */
 784                 -1,             /* ADDR_size -1 => infinite */
 785                 -1,             /* OPT_size */
 786                 0,              /* TIDU_size - fill at run time */
 787                 T_CLTS,         /* SERV_type */
 788                 -1,             /* CURRENT_state */
 789                 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
 790         };
 791 
 792 /*
 793  * private copy of devinfo pointer used in tl_info
 794  */
 795 static dev_info_t *tl_dip;
 796 
 797 /*
 798  * Endpoints cache.
 799  */
 800 static kmem_cache_t *tl_cache;
 801 /*
 802  * Minor number space.
 803  */
 804 static id_space_t *tl_minors;
 805 
 806 /*
 807  * Default Data Unit size.
 808  */
 809 static t_scalar_t tl_tidusz;
 810 
 811 /*
 812  * Size of hash tables.
 813  */
 814 static size_t tl_hash_size = TL_HASH_SIZE;
 815 
 816 /*
 817  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
 818  * for sockets.
 819  */
 820 static int tl_disable_early_connect = 0;
 821 static int tl_client_closing_when_accepting;
 822 
 823 static int tl_serializer_noswitch;
 824 
 825 /*
 826  * LOCAL FUNCTION PROTOTYPES
 827  * -------------------------
 828  */
 829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
 830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
 831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
 832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
 833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
 834         t_scalar_t);
 835 static void tl_bind(mblk_t *, tl_endpt_t *);
 836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
 837 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
 838 static void tl_unbind(mblk_t *, tl_endpt_t *);
 839 static void tl_optmgmt(queue_t *, mblk_t *);
 840 static void tl_conn_req(queue_t *, mblk_t *);
 841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
 842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
 843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
 844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
 845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
 846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
 847 static void tl_info_req(mblk_t *, tl_endpt_t *);
 848 static void tl_addr_req(mblk_t *, tl_endpt_t *);
 849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
 850 static void tl_data(mblk_t  *, tl_endpt_t *);
 851 static void tl_exdata(mblk_t *, tl_endpt_t *);
 852 static void tl_ordrel(mblk_t *, tl_endpt_t *);
 853 static void tl_unitdata(mblk_t *, tl_endpt_t *);
 854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
 855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
 856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
 857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
 858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
 859 static void tl_cl_backenable(tl_endpt_t *);
 860 static void tl_co_unconnect(tl_endpt_t *);
 861 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
 862 static void tl_discon_ind(tl_endpt_t *, uint32_t);
 863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
 864 static mblk_t *tl_ordrel_ind_alloc(void);
 865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
 866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
 867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
 868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
 869 static void tl_icon_freemsgs(mblk_t **);
 870 static void tl_merror(queue_t *, mblk_t *, int);
 871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
 872 static int tl_default_opt(queue_t *, int, int, uchar_t *);
 873 static int tl_get_opt(queue_t *, int, int, uchar_t *);
 874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
 875     uchar_t *, void *, cred_t *);
 876 static void tl_memrecover(queue_t *, mblk_t *, size_t);
 877 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
 878 static void tl_free(tl_endpt_t *);
 879 static int  tl_constructor(void *, void *, int);
 880 static void tl_destructor(void *, void *);
 881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
 882 static tl_serializer_t *tl_serializer_alloc(int);
 883 static void tl_serializer_refhold(tl_serializer_t *);
 884 static void tl_serializer_refrele(tl_serializer_t *);
 885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
 886 static void tl_serializer_exit(tl_endpt_t *);
 887 static boolean_t tl_noclose(tl_endpt_t *);
 888 static void tl_closeok(tl_endpt_t *);
 889 static void tl_refhold(tl_endpt_t *);
 890 static void tl_refrele(tl_endpt_t *);
 891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
 892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
 893 static void tl_close_ser(mblk_t *, tl_endpt_t *);
 894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
 895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
 896 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
 897 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
 898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
 899 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
 900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
 901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
 902 static void tl_addr_unbind(tl_endpt_t *);
 903 
 904 /*
 905  * Intialize option database object for TL
 906  */
 907 
 908 optdb_obj_t tl_opt_obj = {
 909         tl_default_opt,         /* TL default value function pointer */
 910         tl_get_opt,             /* TL get function pointer */
 911         tl_set_opt,             /* TL set function pointer */
 912         TL_OPT_ARR_CNT,         /* TL option database count of entries */
 913         tl_opt_arr,             /* TL option database */
 914         TL_VALID_LEVELS_CNT,    /* TL valid level count of entries */
 915         tl_valid_levels_arr     /* TL valid level array */
 916 };
 917 
 918 /*
 919  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
 920  * ---------------------------------------
 921  */
 922 
 923 /*
 924  * Loadable module routines
 925  */
 926 int
 927 _init(void)
 928 {
 929         return (mod_install(&modlinkage));
 930 }
 931 
 932 int
 933 _fini(void)
 934 {
 935         return (mod_remove(&modlinkage));
 936 }
 937 
 938 int
 939 _info(struct modinfo *modinfop)
 940 {
 941         return (mod_info(&modlinkage, modinfop));
 942 }
 943 
 944 /*
 945  * Driver Entry Points and Other routines
 946  */
 947 static int
 948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 949 {
 950         int i;
 951         char name[32];
 952 
 953         /*
 954          * Resume from a checkpoint state.
 955          */
 956         if (cmd == DDI_RESUME)
 957                 return (DDI_SUCCESS);
 958 
 959         if (cmd != DDI_ATTACH)
 960                 return (DDI_FAILURE);
 961 
 962         /*
 963          * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
 964          * streams message sizes can be unlimited. We use a defined constant
 965          * instead.
 966          */
 967         tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
 968 
 969         /*
 970          * Create subdevices for each transport.
 971          */
 972         for (i = 0; i < TL_UNUSED; i++) {
 973                 if (ddi_create_minor_node(devi,
 974                     tl_transports[i].tr_name,
 975                     S_IFCHR, tl_transports[i].tr_minor,
 976                     DDI_PSEUDO, NULL) == DDI_FAILURE) {
 977                         ddi_remove_minor_node(devi, NULL);
 978                         return (DDI_FAILURE);
 979                 }
 980         }
 981 
 982         tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
 983             0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
 984 
 985         if (tl_cache == NULL) {
 986                 ddi_remove_minor_node(devi, NULL);
 987                 return (DDI_FAILURE);
 988         }
 989 
 990         tl_minors = id_space_create("tl_minor_space",
 991             TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
 992 
 993         /*
 994          * Create ID space for minor numbers
 995          */
 996         for (i = 0; i < TL_MAXTRANSPORT; i++) {
 997                 tl_transport_state_t *t = &tl_transports[i];
 998 
 999                 if (i == TL_UNUSED)
1000                         continue;
1001 
1002                 /* Socket COTSORD shares namespace with COTS */
1003                 if (i == TL_SOCK_COTSORD) {
1004                         t->tr_ai_hash =
1005                             tl_transports[TL_SOCK_COTS].tr_ai_hash;
1006                         ASSERT(t->tr_ai_hash != NULL);
1007                         t->tr_addr_hash =
1008                             tl_transports[TL_SOCK_COTS].tr_addr_hash;
1009                         ASSERT(t->tr_addr_hash != NULL);
1010                         continue;
1011                 }
1012 
1013                 /*
1014                  * Create hash tables.
1015                  */
1016                 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1017                     t->tr_name);
1018 #ifdef _ILP32
1019                 if (i & TL_SOCKET)
1020                         t->tr_ai_hash =
1021                             mod_hash_create_idhash(name, tl_hash_size - 1,
1022                             mod_hash_null_valdtor);
1023                 else
1024                         t->tr_ai_hash =
1025                             mod_hash_create_ptrhash(name, tl_hash_size,
1026                             mod_hash_null_valdtor, sizeof (queue_t));
1027 #else
1028                 t->tr_ai_hash =
1029                     mod_hash_create_idhash(name, tl_hash_size - 1,
1030                     mod_hash_null_valdtor);
1031 #endif /* _ILP32 */
1032 
1033                 if (i & TL_SOCKET) {
1034                         (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1035                             t->tr_name);
1036                         t->tr_addr_hash = mod_hash_create_ptrhash(name,
1037                             tl_hash_size, mod_hash_null_valdtor,
1038                             sizeof (uintptr_t));
1039                 } else {
1040                         (void) snprintf(name, sizeof (name), "%s_addr_hash",
1041                             t->tr_name);
1042                         t->tr_addr_hash = mod_hash_create_extended(name,
1043                             tl_hash_size, mod_hash_null_keydtor,
1044                             mod_hash_null_valdtor,
1045                             tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1046                 }
1047 
1048                 /* Create serializer for connectionless transports. */
1049                 if (i & TL_TICLTS)
1050                         t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1051         }
1052 
1053         tl_dip = devi;
1054 
1055         return (DDI_SUCCESS);
1056 }
1057 
1058 static int
1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1060 {
1061         int i;
1062 
1063         if (cmd == DDI_SUSPEND)
1064                 return (DDI_SUCCESS);
1065 
1066         if (cmd != DDI_DETACH)
1067                 return (DDI_FAILURE);
1068 
1069         /*
1070          * Destroy arenas and hash tables.
1071          */
1072         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1073                 tl_transport_state_t *t = &tl_transports[i];
1074 
1075                 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1076                         continue;
1077 
1078                 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1079                 if (t->tr_serializer != NULL) {
1080                         tl_serializer_refrele(t->tr_serializer);
1081                         t->tr_serializer = NULL;
1082                 }
1083 
1084 #ifdef _ILP32
1085                 if (i & TL_SOCKET)
1086                         mod_hash_destroy_idhash(t->tr_ai_hash);
1087                 else
1088                         mod_hash_destroy_ptrhash(t->tr_ai_hash);
1089 #else
1090                 mod_hash_destroy_idhash(t->tr_ai_hash);
1091 #endif /* _ILP32 */
1092                 t->tr_ai_hash = NULL;
1093                 if (i & TL_SOCKET)
1094                         mod_hash_destroy_ptrhash(t->tr_addr_hash);
1095                 else
1096                         mod_hash_destroy_hash(t->tr_addr_hash);
1097                 t->tr_addr_hash = NULL;
1098         }
1099 
1100         kmem_cache_destroy(tl_cache);
1101         tl_cache = NULL;
1102         id_space_destroy(tl_minors);
1103         tl_minors = NULL;
1104         ddi_remove_minor_node(devi, NULL);
1105         return (DDI_SUCCESS);
1106 }
1107 
1108 /* ARGSUSED */
1109 static int
1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1111 {
1112 
1113         int retcode = DDI_FAILURE;
1114 
1115         switch (infocmd) {
1116 
1117         case DDI_INFO_DEVT2DEVINFO:
1118                 if (tl_dip != NULL) {
1119                         *result = (void *)tl_dip;
1120                         retcode = DDI_SUCCESS;
1121                 }
1122                 break;
1123 
1124         case DDI_INFO_DEVT2INSTANCE:
1125                 *result = (void *)0;
1126                 retcode = DDI_SUCCESS;
1127                 break;
1128 
1129         default:
1130                 break;
1131         }
1132         return (retcode);
1133 }
1134 
1135 /*
1136  * Endpoint reference management.
1137  */
1138 static void
1139 tl_refhold(tl_endpt_t *tep)
1140 {
1141         atomic_inc_32(&tep->te_refcnt);
1142 }
1143 
1144 static void
1145 tl_refrele(tl_endpt_t *tep)
1146 {
1147         ASSERT(tep->te_refcnt != 0);
1148 
1149         if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1150                 tl_free(tep);
1151 }
1152 
1153 /*ARGSUSED*/
1154 static int
1155 tl_constructor(void *buf, void *cdrarg, int kmflags)
1156 {
1157         tl_endpt_t *tep = buf;
1158 
1159         bzero(tep, sizeof (tl_endpt_t));
1160         mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1161         cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1162         mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1163         cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1164         mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1165 
1166         return (0);
1167 }
1168 
1169 /*ARGSUSED*/
1170 static void
1171 tl_destructor(void *buf, void *cdrarg)
1172 {
1173         tl_endpt_t *tep = buf;
1174 
1175         mutex_destroy(&tep->te_closelock);
1176         cv_destroy(&tep->te_closecv);
1177         mutex_destroy(&tep->te_srv_lock);
1178         cv_destroy(&tep->te_srv_cv);
1179         mutex_destroy(&tep->te_ser_lock);
1180 }
1181 
1182 static void
1183 tl_free(tl_endpt_t *tep)
1184 {
1185         ASSERT(tep->te_refcnt == 0);
1186         ASSERT(tep->te_transport != NULL);
1187         ASSERT(tep->te_rq == NULL);
1188         ASSERT(tep->te_wq == NULL);
1189         ASSERT(tep->te_ser != NULL);
1190         ASSERT(tep->te_ser_count == 0);
1191         ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1192 
1193         if (IS_SOCKET(tep)) {
1194                 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1195                 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1196                 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1197                 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1198         } else if (tep->te_abuf != NULL) {
1199                 kmem_free(tep->te_abuf, tep->te_alen);
1200                 tep->te_alen = -1; /* uninitialized */
1201                 tep->te_abuf = NULL;
1202         } else {
1203                 ASSERT(tep->te_alen == -1);
1204         }
1205 
1206         id_free(tl_minors, tep->te_minor);
1207         ASSERT(tep->te_credp == NULL);
1208 
1209         if (tep->te_hash_hndl != NULL)
1210                 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1211 
1212         if (IS_COTS(tep)) {
1213                 TL_REMOVE_PEER(tep->te_conp);
1214                 TL_REMOVE_PEER(tep->te_oconp);
1215                 tl_serializer_refrele(tep->te_ser);
1216                 tep->te_ser = NULL;
1217                 ASSERT(tep->te_nicon == 0);
1218                 ASSERT(list_head(&tep->te_iconp) == NULL);
1219         } else {
1220                 ASSERT(tep->te_lastep == NULL);
1221                 ASSERT(list_head(&tep->te_flowlist) == NULL);
1222                 ASSERT(tep->te_flowq == NULL);
1223         }
1224 
1225         ASSERT(tep->te_bufcid == 0);
1226         ASSERT(tep->te_timoutid == 0);
1227         bzero(&tep->te_ap, sizeof (tep->te_ap));
1228         tep->te_acceptor_id = 0;
1229 
1230         ASSERT(tep->te_closewait == 0);
1231         ASSERT(!tep->te_rsrv_active);
1232         ASSERT(!tep->te_wsrv_active);
1233         tep->te_closing = 0;
1234         tep->te_nowsrv = B_FALSE;
1235         tep->te_flag = 0;
1236 
1237         kmem_cache_free(tl_cache, tep);
1238 }
1239 
1240 /*
1241  * Allocate/free reference-counted wrappers for serializers.
1242  */
1243 static tl_serializer_t *
1244 tl_serializer_alloc(int flags)
1245 {
1246         tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1247         serializer_t *ser;
1248 
1249         if (s == NULL)
1250                 return (NULL);
1251 
1252         ser = serializer_create(flags);
1253 
1254         if (ser == NULL) {
1255                 kmem_free(s, sizeof (tl_serializer_t));
1256                 return (NULL);
1257         }
1258 
1259         s->ts_refcnt = 1;
1260         s->ts_serializer = ser;
1261         return (s);
1262 }
1263 
1264 static void
1265 tl_serializer_refhold(tl_serializer_t *s)
1266 {
1267         atomic_inc_32(&s->ts_refcnt);
1268 }
1269 
1270 static void
1271 tl_serializer_refrele(tl_serializer_t *s)
1272 {
1273         if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1274                 serializer_destroy(s->ts_serializer);
1275                 kmem_free(s, sizeof (tl_serializer_t));
1276         }
1277 }
1278 
1279 /*
1280  * Post a request on the endpoint serializer. For COTS transports keep track of
1281  * the number of pending requests.
1282  */
1283 static void
1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1285 {
1286         if (IS_COTS(tep)) {
1287                 mutex_enter(&tep->te_ser_lock);
1288                 tep->te_ser_count++;
1289                 mutex_exit(&tep->te_ser_lock);
1290         }
1291         serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1292 }
1293 
1294 /*
1295  * Complete processing the request on the serializer. Decrement the counter for
1296  * pending requests for COTS transports.
1297  */
1298 static void
1299 tl_serializer_exit(tl_endpt_t *tep)
1300 {
1301         if (IS_COTS(tep)) {
1302                 mutex_enter(&tep->te_ser_lock);
1303                 ASSERT(tep->te_ser_count != 0);
1304                 tep->te_ser_count--;
1305                 mutex_exit(&tep->te_ser_lock);
1306         }
1307 }
1308 
1309 /*
1310  * Hash management functions.
1311  */
1312 
1313 /*
1314  * Return TRUE if two addresses are equal, false otherwise.
1315  */
1316 static boolean_t
1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1318 {
1319         return ((ap1->ta_alen > 0) &&
1320             (ap1->ta_alen == ap2->ta_alen) &&
1321             (ap1->ta_zoneid == ap2->ta_zoneid) &&
1322             (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1323 }
1324 
1325 /*
1326  * This function is called whenever an endpoint is found in the hash table.
1327  */
1328 /* ARGSUSED0 */
1329 static void
1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1331 {
1332         tl_refhold((tl_endpt_t *)val);
1333 }
1334 
1335 /*
1336  * Address hash function.
1337  */
1338 /* ARGSUSED */
1339 static uint_t
1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1341 {
1342         tl_addr_t *ap = (tl_addr_t *)key;
1343         size_t  len = ap->ta_alen;
1344         uchar_t *p = ap->ta_abuf;
1345         uint_t i, g;
1346 
1347         ASSERT((len > 0) && (p != NULL));
1348 
1349         for (i = ap->ta_zoneid; len -- != 0; p++) {
1350                 i = (i << 4) + (*p);
1351                 if ((g = (i & 0xf0000000U)) != 0) {
1352                         i ^= (g >> 24);
1353                         i ^= g;
1354                 }
1355         }
1356         return (i);
1357 }
1358 
1359 /*
1360  * This function is used by hash lookups. It compares two generic addresses.
1361  */
1362 static int
1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1364 {
1365 #ifdef  DEBUG
1366         tl_addr_t *ap1 = (tl_addr_t *)key1;
1367         tl_addr_t *ap2 = (tl_addr_t *)key2;
1368 
1369         ASSERT(key1 != NULL);
1370         ASSERT(key2 != NULL);
1371 
1372         ASSERT(ap1->ta_abuf != NULL);
1373         ASSERT(ap2->ta_abuf != NULL);
1374         ASSERT(ap1->ta_alen > 0);
1375         ASSERT(ap2->ta_alen > 0);
1376 #endif
1377 
1378         return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1379 }
1380 
1381 /*
1382  * Prevent endpoint from closing if possible.
1383  * Return B_TRUE on success, B_FALSE on failure.
1384  */
1385 static boolean_t
1386 tl_noclose(tl_endpt_t *tep)
1387 {
1388         boolean_t rc = B_FALSE;
1389 
1390         mutex_enter(&tep->te_closelock);
1391         if (! tep->te_closing) {
1392                 ASSERT(tep->te_closewait == 0);
1393                 tep->te_closewait++;
1394                 rc = B_TRUE;
1395         }
1396         mutex_exit(&tep->te_closelock);
1397         return (rc);
1398 }
1399 
1400 /*
1401  * Allow endpoint to close if needed.
1402  */
1403 static void
1404 tl_closeok(tl_endpt_t *tep)
1405 {
1406         ASSERT(tep->te_closewait > 0);
1407         mutex_enter(&tep->te_closelock);
1408         ASSERT(tep->te_closewait == 1);
1409         tep->te_closewait--;
1410         cv_signal(&tep->te_closecv);
1411         mutex_exit(&tep->te_closelock);
1412 }
1413 
1414 /*
1415  * STREAMS open entry point.
1416  */
1417 /* ARGSUSED */
1418 static int
1419 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t  *credp)
1420 {
1421         tl_endpt_t *tep;
1422         minor_t     minor = getminor(*devp);
1423 
1424         /*
1425          * Driver is called directly. Both CLONEOPEN and MODOPEN
1426          * are illegal
1427          */
1428         if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1429                 return (ENXIO);
1430 
1431         if (rq->q_ptr != NULL)
1432                 return (0);
1433 
1434         /* Minor number should specify the mode used for the driver. */
1435         if ((minor >= TL_UNUSED))
1436                 return (ENXIO);
1437 
1438         if (oflag & SO_SOCKSTR) {
1439                 minor |= TL_SOCKET;
1440         }
1441 
1442         tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1443         tep->te_refcnt = 1;
1444         tep->te_cpid = curproc->p_pid;
1445         rq->q_ptr = WR(rq)->q_ptr = tep;
1446         tep->te_state = TS_UNBND;
1447         tep->te_credp = credp;
1448         crhold(credp);
1449         tep->te_zoneid = getzoneid();
1450 
1451         tep->te_flag = minor & TL_MINOR_MASK;
1452         tep->te_transport = &tl_transports[minor];
1453 
1454         /* Allocate a unique minor number for this instance. */
1455         tep->te_minor = (minor_t)id_alloc(tl_minors);
1456 
1457         /* Reserve hash handle for bind(). */
1458         (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1459 
1460         /* Transport-specific initialization */
1461         if (IS_COTS(tep)) {
1462                 /* Use private serializer */
1463                 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1464 
1465                 /* Create list for pending connections */
1466                 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1467                     offsetof(tl_icon_t, ti_node));
1468                 tep->te_qlen = 0;
1469                 tep->te_nicon = 0;
1470                 tep->te_oconp = NULL;
1471                 tep->te_conp = NULL;
1472         } else {
1473                 /* Use shared serializer */
1474                 tep->te_ser = tep->te_transport->tr_serializer;
1475                 bzero(&tep->te_flows, sizeof (list_node_t));
1476                 /* Create list for flow control */
1477                 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1478                     offsetof(tl_endpt_t, te_flows));
1479                 tep->te_flowq = NULL;
1480                 tep->te_lastep = NULL;
1481 
1482         }
1483 
1484         /* Initialize endpoint address */
1485         if (IS_SOCKET(tep)) {
1486                 /* Socket-specific address handling. */
1487                 tep->te_alen = TL_SOUX_ADDRLEN;
1488                 tep->te_abuf = &tep->te_uxaddr;
1489                 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1490                 tep->te_magic = SOU_MAGIC_IMPLICIT;
1491         } else {
1492                 tep->te_alen = -1;
1493                 tep->te_abuf = NULL;
1494         }
1495 
1496         /* clone the driver */
1497         *devp = makedevice(getmajor(*devp), tep->te_minor);
1498 
1499         tep->te_rq = rq;
1500         tep->te_wq = WR(rq);
1501 
1502 #ifdef  _ILP32
1503         if (IS_SOCKET(tep))
1504                 tep->te_acceptor_id = tep->te_minor;
1505         else
1506                 tep->te_acceptor_id = (t_uscalar_t)rq;
1507 #else
1508         tep->te_acceptor_id = tep->te_minor;
1509 #endif  /* _ILP32 */
1510 
1511 
1512         qprocson(rq);
1513 
1514         /*
1515          * Insert acceptor ID in the hash. The AI hash always sleeps on
1516          * insertion so insertion can't fail.
1517          */
1518         (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1519             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1520             (mod_hash_val_t)tep);
1521 
1522         return (0);
1523 }
1524 
1525 /* ARGSUSED1 */
1526 static int
1527 tl_close(queue_t *rq, int flag, cred_t *credp)
1528 {
1529         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1530         tl_endpt_t *elp = NULL;
1531         queue_t *wq = tep->te_wq;
1532         int rc;
1533 
1534         ASSERT(wq == WR(rq));
1535 
1536         /*
1537          * Remove the endpoint from acceptor hash.
1538          */
1539         rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1540             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1541             (mod_hash_val_t *)&elp);
1542         ASSERT(rc == 0 && tep == elp);
1543         if ((rc != 0) || (tep != elp)) {
1544                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1545                     SL_TRACE|SL_ERROR,
1546                     "tl_close:inconsistency in AI hash"));
1547         }
1548 
1549         /*
1550          * Wait till close is safe, then mark endpoint as closing.
1551          */
1552         mutex_enter(&tep->te_closelock);
1553         while (tep->te_closewait)
1554                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1555         tep->te_closing = B_TRUE;
1556         /*
1557          * Will wait for the serializer part of the close to finish, so set
1558          * te_closewait now.
1559          */
1560         tep->te_closewait = 1;
1561         tep->te_nowsrv = B_FALSE;
1562         mutex_exit(&tep->te_closelock);
1563 
1564         /*
1565          * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1566          * It is safe because close will wait for tl_close_ser to finish.
1567          */
1568         tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1569 
1570         /*
1571          * Wait for the first phase of close to complete before qprocsoff().
1572          */
1573         mutex_enter(&tep->te_closelock);
1574         while (tep->te_closewait)
1575                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1576         mutex_exit(&tep->te_closelock);
1577 
1578         qprocsoff(rq);
1579 
1580         if (tep->te_bufcid) {
1581                 qunbufcall(rq, tep->te_bufcid);
1582                 tep->te_bufcid = 0;
1583         }
1584         if (tep->te_timoutid) {
1585                 (void) quntimeout(rq, tep->te_timoutid);
1586                 tep->te_timoutid = 0;
1587         }
1588 
1589         /*
1590          * Finish close behind serializer.
1591          *
1592          * For a CLTS endpoint increase a refcount and continue close processing
1593          * with serializer protection. This processing may happen asynchronously
1594          * with the completion of tl_close().
1595          *
1596          * Fot a COTS endpoint wait before destroying tep since the serializer
1597          * may go away together with tep and we need to destroy serializer
1598          * outside of serializer context.
1599          */
1600         ASSERT(tep->te_closewait == 0);
1601         if (IS_COTS(tep))
1602                 tep->te_closewait = 1;
1603         else
1604                 tl_refhold(tep);
1605 
1606         tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1607 
1608         /*
1609          * For connection-oriented transports wait for all serializer activity
1610          * to settle down.
1611          */
1612         if (IS_COTS(tep)) {
1613                 mutex_enter(&tep->te_closelock);
1614                 while (tep->te_closewait)
1615                         cv_wait(&tep->te_closecv, &tep->te_closelock);
1616                 mutex_exit(&tep->te_closelock);
1617         }
1618 
1619         crfree(tep->te_credp);
1620         tep->te_credp = NULL;
1621         tep->te_wq = NULL;
1622         tl_refrele(tep);
1623         /*
1624          * tep is likely to be destroyed now, so can't reference it any more.
1625          */
1626 
1627         rq->q_ptr = wq->q_ptr = NULL;
1628         return (0);
1629 }
1630 
1631 /*
1632  * First phase of close processing done behind the serializer.
1633  *
1634  * Do not drop the reference in the end - tl_close() wants this reference to
1635  * stay.
1636  */
1637 /* ARGSUSED0 */
1638 static void
1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1640 {
1641         ASSERT(tep->te_closing);
1642         ASSERT(tep->te_closewait == 1);
1643         ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1644 
1645         tep->te_flag |= TL_CLOSE_SER;
1646 
1647         /*
1648          * Drain out all messages on queue except for TL_TICOTS where the
1649          * abortive release semantics permit discarding of data on close
1650          */
1651         if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1652                 tl_wsrv_ser(NULL, tep);
1653         }
1654 
1655         /* Remove address from hash table. */
1656         tl_addr_unbind(tep);
1657         /*
1658          * qprocsoff() gets confused when q->q_next is not NULL on the write
1659          * queue of the driver, so clear these before qprocsoff() is called.
1660          * Also clear q_next for the peer since this queue is going away.
1661          */
1662         if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1663                 tl_endpt_t *peer_tep = tep->te_conp;
1664 
1665                 tep->te_wq->q_next = NULL;
1666                 if ((peer_tep != NULL) && !peer_tep->te_closing)
1667                         peer_tep->te_wq->q_next = NULL;
1668         }
1669 
1670         tep->te_rq = NULL;
1671 
1672         /* wake up tl_close() */
1673         tl_closeok(tep);
1674         tl_serializer_exit(tep);
1675 }
1676 
1677 /*
1678  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1679  * the reference for CLTS.
1680  *
1681  * Called from serializer. Should drop reference count for CLTS only.
1682  */
1683 /* ARGSUSED0 */
1684 static void
1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1686 {
1687         ASSERT(tep->te_closing);
1688         IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1689         IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1690 
1691         tep->te_state = -1;  /* Uninitialized */
1692         if (IS_COTS(tep)) {
1693                 tl_co_unconnect(tep);
1694         } else {
1695                 /* Connectionless specific cleanup */
1696                 TL_REMOVE_PEER(tep->te_lastep);
1697                 /*
1698                  * Backenable anybody that is flow controlled waiting for
1699                  * this endpoint.
1700                  */
1701                 tl_cl_backenable(tep);
1702                 if (tep->te_flowq != NULL) {
1703                         list_remove(&(tep->te_flowq->te_flowlist), tep);
1704                         tep->te_flowq = NULL;
1705                 }
1706         }
1707 
1708         tl_serializer_exit(tep);
1709         if (IS_COTS(tep))
1710                 tl_closeok(tep);
1711         else
1712                 tl_refrele(tep);
1713 }
1714 
1715 /*
1716  * STREAMS write-side put procedure.
1717  * Enter serializer for most of the processing.
1718  *
1719  * The T_CONN_REQ is processed outside of serializer.
1720  */
1721 static void
1722 tl_wput(queue_t *wq, mblk_t *mp)
1723 {
1724         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
1725         ssize_t                 msz = MBLKL(mp);
1726         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
1727         tlproc_t                *tl_proc = NULL;
1728 
1729         switch (DB_TYPE(mp)) {
1730         case M_DATA:
1731                 /* Only valid for connection-oriented transports */
1732                 if (IS_CLTS(tep)) {
1733                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1734                             SL_TRACE|SL_ERROR,
1735                             "tl_wput:M_DATA invalid for ticlts driver"));
1736                         tl_merror(wq, mp, EPROTO);
1737                         return;
1738                 }
1739                 tl_proc = tl_wput_data_ser;
1740                 break;
1741 
1742         case M_IOCTL:
1743                 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1744                 case TL_IOC_CREDOPT:
1745                         /* FALLTHROUGH */
1746                 case TL_IOC_UCREDOPT:
1747                         /*
1748                          * Serialize endpoint state change.
1749                          */
1750                         tl_proc = tl_do_ioctl_ser;
1751                         break;
1752 
1753                 default:
1754                         miocnak(wq, mp, 0, EINVAL);
1755                         return;
1756                 }
1757                 break;
1758 
1759         case M_FLUSH:
1760                 /*
1761                  * do canonical M_FLUSH processing
1762                  */
1763                 if (*mp->b_rptr & FLUSHW) {
1764                         flushq(wq, FLUSHALL);
1765                         *mp->b_rptr &= ~FLUSHW;
1766                 }
1767                 if (*mp->b_rptr & FLUSHR) {
1768                         flushq(RD(wq), FLUSHALL);
1769                         qreply(wq, mp);
1770                 } else {
1771                         freemsg(mp);
1772                 }
1773                 return;
1774 
1775         case M_PROTO:
1776                 if (msz < sizeof (prim->type)) {
1777                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1778                             SL_TRACE|SL_ERROR,
1779                             "tl_wput:M_PROTO data too short"));
1780                         tl_merror(wq, mp, EPROTO);
1781                         return;
1782                 }
1783                 switch (prim->type) {
1784                 case T_OPTMGMT_REQ:
1785                 case T_SVR4_OPTMGMT_REQ:
1786                         /*
1787                          * Process TPI option management requests immediately
1788                          * in put procedure regardless of in-order processing
1789                          * of already queued messages.
1790                          * (Note: This driver supports AF_UNIX socket
1791                          * implementation.  Unless we implement this processing,
1792                          * setsockopt() on socket endpoint will block on flow
1793                          * controlled endpoints which it should not. That is
1794                          * required for successful execution of VSU socket tests
1795                          * and is consistent with BSD socket behavior).
1796                          */
1797                         tl_optmgmt(wq, mp);
1798                         return;
1799                 case O_T_BIND_REQ:
1800                 case T_BIND_REQ:
1801                         tl_proc = tl_bind_ser;
1802                         break;
1803                 case T_CONN_REQ:
1804                         if (IS_CLTS(tep)) {
1805                                 tl_merror(wq, mp, EPROTO);
1806                                 return;
1807                         }
1808                         tl_conn_req(wq, mp);
1809                         return;
1810                 case T_DATA_REQ:
1811                 case T_OPTDATA_REQ:
1812                 case T_EXDATA_REQ:
1813                 case T_ORDREL_REQ:
1814                         tl_proc = tl_putq_ser;
1815                         break;
1816                 case T_UNITDATA_REQ:
1817                         if (IS_COTS(tep) ||
1818                             (msz < sizeof (struct T_unitdata_req))) {
1819                                 tl_merror(wq, mp, EPROTO);
1820                                 return;
1821                         }
1822                         if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1823                                 tl_proc = tl_unitdata_ser;
1824                         } else {
1825                                 tl_proc = tl_putq_ser;
1826                         }
1827                         break;
1828                 default:
1829                         /*
1830                          * process in service procedure if message already
1831                          * queued (maintain in-order processing)
1832                          */
1833                         if (wq->q_first != NULL) {
1834                                 tl_proc = tl_putq_ser;
1835                         } else {
1836                                 tl_proc = tl_wput_ser;
1837                         }
1838                         break;
1839                 }
1840                 break;
1841 
1842         case M_PCPROTO:
1843                 /*
1844                  * Check that the message has enough data to figure out TPI
1845                  * primitive.
1846                  */
1847                 if (msz < sizeof (prim->type)) {
1848                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1849                             SL_TRACE|SL_ERROR,
1850                             "tl_wput:M_PCROTO data too short"));
1851                         tl_merror(wq, mp, EPROTO);
1852                         return;
1853                 }
1854                 switch (prim->type) {
1855                 case T_CAPABILITY_REQ:
1856                         tl_capability_req(mp, tep);
1857                         return;
1858                 case T_INFO_REQ:
1859                         tl_proc = tl_info_req_ser;
1860                         break;
1861                 case T_ADDR_REQ:
1862                         tl_proc = tl_addr_req_ser;
1863                         break;
1864 
1865                 default:
1866                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1867                             SL_TRACE|SL_ERROR,
1868                             "tl_wput:unknown TPI msg primitive"));
1869                         tl_merror(wq, mp, EPROTO);
1870                         return;
1871                 }
1872                 break;
1873         default:
1874                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1875                     "tl_wput:default:unexpected Streams message"));
1876                 freemsg(mp);
1877                 return;
1878         }
1879 
1880         /*
1881          * Continue processing via serializer.
1882          */
1883         ASSERT(tl_proc != NULL);
1884         tl_refhold(tep);
1885         tl_serializer_enter(tep, tl_proc, mp);
1886 }
1887 
1888 /*
1889  * Place message on the queue while preserving order.
1890  */
1891 static void
1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1893 {
1894         if (tep->te_closing) {
1895                 tl_wput_ser(mp, tep);
1896         } else {
1897                 TL_PUTQ(tep, mp);
1898                 tl_serializer_exit(tep);
1899                 tl_refrele(tep);
1900         }
1901 
1902 }
1903 
1904 static void
1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1906 {
1907         ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1908 
1909         switch (DB_TYPE(mp)) {
1910         case M_DATA:
1911                 tl_data(mp, tep);
1912                 break;
1913         case M_PROTO:
1914                 tl_do_proto(mp, tep);
1915                 break;
1916         default:
1917                 freemsg(mp);
1918                 break;
1919         }
1920 }
1921 
1922 /*
1923  * Write side put procedure called from serializer.
1924  */
1925 static void
1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1927 {
1928         tl_wput_common_ser(mp, tep);
1929         tl_serializer_exit(tep);
1930         tl_refrele(tep);
1931 }
1932 
1933 /*
1934  * M_DATA processing. Called from serializer.
1935  */
1936 static void
1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1938 {
1939         tl_endpt_t      *peer_tep = tep->te_conp;
1940         queue_t         *peer_rq;
1941 
1942         ASSERT(DB_TYPE(mp) == M_DATA);
1943         ASSERT(IS_COTS(tep));
1944 
1945         IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1946 
1947         /*
1948          * fastpath for data. Ignore flow control if tep is closing.
1949          */
1950         if ((peer_tep != NULL) &&
1951             !peer_tep->te_closing &&
1952             ((tep->te_state == TS_DATA_XFER) ||
1953             (tep->te_state == TS_WREQ_ORDREL)) &&
1954             (tep->te_wq != NULL) &&
1955             (tep->te_wq->q_first == NULL) &&
1956             ((peer_tep->te_state == TS_DATA_XFER) ||
1957             (peer_tep->te_state == TS_WREQ_ORDREL))  &&
1958             ((peer_rq = peer_tep->te_rq) != NULL) &&
1959             (canputnext(peer_rq) || tep->te_closing)) {
1960                 putnext(peer_rq, mp);
1961         } else if (tep->te_closing) {
1962                 /*
1963                  * It is possible that by the time we got here tep started to
1964                  * close. If the write queue is not empty, and the state is
1965                  * TS_DATA_XFER the data should be delivered in order, so we
1966                  * call putq() instead of freeing the data.
1967                  */
1968                 if ((tep->te_wq != NULL) &&
1969                     ((tep->te_state == TS_DATA_XFER) ||
1970                     (tep->te_state == TS_WREQ_ORDREL))) {
1971                         TL_PUTQ(tep, mp);
1972                 } else {
1973                         freemsg(mp);
1974                 }
1975         } else {
1976                 TL_PUTQ(tep, mp);
1977         }
1978 
1979         tl_serializer_exit(tep);
1980         tl_refrele(tep);
1981 }
1982 
1983 /*
1984  * Write side service routine.
1985  *
1986  * All actual processing happens within serializer which is entered
1987  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1988  * messages that need processing may have arrived, so tl_wsrv repeats until
1989  * queue is empty or te_nowsrv is set.
1990  */
1991 static void
1992 tl_wsrv(queue_t *wq)
1993 {
1994         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1995 
1996         while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1997                 mutex_enter(&tep->te_srv_lock);
1998                 ASSERT(tep->te_wsrv_active == B_FALSE);
1999                 tep->te_wsrv_active = B_TRUE;
2000                 mutex_exit(&tep->te_srv_lock);
2001 
2002                 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2003 
2004                 /*
2005                  * Wait for serializer job to complete.
2006                  */
2007                 mutex_enter(&tep->te_srv_lock);
2008                 while (tep->te_wsrv_active) {
2009                         cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2010                 }
2011                 cv_signal(&tep->te_srv_cv);
2012                 mutex_exit(&tep->te_srv_lock);
2013         }
2014 }
2015 
2016 /*
2017  * Serialized write side processing of the STREAMS queue.
2018  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2019  * is NULL.
2020  */
2021 static void
2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2023 {
2024         mblk_t *mp;
2025         queue_t *wq = tep->te_wq;
2026 
2027         ASSERT(wq != NULL);
2028         while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2029                 tl_wput_common_ser(mp, tep);
2030         }
2031 
2032         /*
2033          * Wakeup service routine unless called from close.
2034          * If ser_mp is specified, the caller is tl_wsrv().
2035          * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2036          * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2037          * be no matching tl_serializer_exit() in this case.
2038          * Also, there is no need to wakeup anyone since tl_close_ser() is not
2039          * waiting on te_srv_cv.
2040          */
2041         if (ser_mp != NULL) {
2042                 /*
2043                  * We are called from tl_wsrv.
2044                  */
2045                 mutex_enter(&tep->te_srv_lock);
2046                 ASSERT(tep->te_wsrv_active);
2047                 tep->te_wsrv_active = B_FALSE;
2048                 cv_signal(&tep->te_srv_cv);
2049                 mutex_exit(&tep->te_srv_lock);
2050                 tl_serializer_exit(tep);
2051         }
2052 }
2053 
2054 /*
2055  * Called when the stream is backenabled. Enter serializer and qenable everyone
2056  * flow controlled by tep.
2057  *
2058  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2059  * is possible that two instances of tl_rsrv will be running reusing the same
2060  * rsrv mblk.
2061  */
2062 static void
2063 tl_rsrv(queue_t *rq)
2064 {
2065         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2066 
2067         ASSERT(rq->q_first == NULL);
2068         ASSERT(tep->te_rsrv_active == 0);
2069 
2070         tep->te_rsrv_active = B_TRUE;
2071         tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2072         /*
2073          * Wait for serializer job to complete.
2074          */
2075         mutex_enter(&tep->te_srv_lock);
2076         while (tep->te_rsrv_active) {
2077                 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2078         }
2079         cv_signal(&tep->te_srv_cv);
2080         mutex_exit(&tep->te_srv_lock);
2081 }
2082 
2083 /* ARGSUSED */
2084 static void
2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2086 {
2087         tl_endpt_t *peer_tep;
2088 
2089         if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2090                 tl_cl_backenable(tep);
2091         } else if (
2092             IS_COTS(tep) &&
2093             ((peer_tep = tep->te_conp) != NULL) &&
2094             !peer_tep->te_closing &&
2095             ((tep->te_state == TS_DATA_XFER) ||
2096             (tep->te_state == TS_WIND_ORDREL)||
2097             (tep->te_state == TS_WREQ_ORDREL))) {
2098                 TL_QENABLE(peer_tep);
2099         }
2100 
2101         /*
2102          * Wakeup read side service routine.
2103          */
2104         mutex_enter(&tep->te_srv_lock);
2105         ASSERT(tep->te_rsrv_active);
2106         tep->te_rsrv_active = B_FALSE;
2107         cv_signal(&tep->te_srv_cv);
2108         mutex_exit(&tep->te_srv_lock);
2109         tl_serializer_exit(tep);
2110 }
2111 
2112 /*
2113  * process M_PROTO messages. Always called from serializer.
2114  */
2115 static void
2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2117 {
2118         ssize_t                 msz = MBLKL(mp);
2119         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
2120 
2121         /* Message size was validated by tl_wput(). */
2122         ASSERT(msz >= sizeof (prim->type));
2123 
2124         switch (prim->type) {
2125         case T_UNBIND_REQ:
2126                 tl_unbind(mp, tep);
2127                 break;
2128 
2129         case T_ADDR_REQ:
2130                 tl_addr_req(mp, tep);
2131                 break;
2132 
2133         case O_T_CONN_RES:
2134         case T_CONN_RES:
2135                 if (IS_CLTS(tep)) {
2136                         tl_merror(tep->te_wq, mp, EPROTO);
2137                         break;
2138                 }
2139                 tl_conn_res(mp, tep);
2140                 break;
2141 
2142         case T_DISCON_REQ:
2143                 if (IS_CLTS(tep)) {
2144                         tl_merror(tep->te_wq, mp, EPROTO);
2145                         break;
2146                 }
2147                 tl_discon_req(mp, tep);
2148                 break;
2149 
2150         case T_DATA_REQ:
2151                 if (IS_CLTS(tep)) {
2152                         tl_merror(tep->te_wq, mp, EPROTO);
2153                         break;
2154                 }
2155                 tl_data(mp, tep);
2156                 break;
2157 
2158         case T_OPTDATA_REQ:
2159                 if (IS_CLTS(tep)) {
2160                         tl_merror(tep->te_wq, mp, EPROTO);
2161                         break;
2162                 }
2163                 tl_data(mp, tep);
2164                 break;
2165 
2166         case T_EXDATA_REQ:
2167                 if (IS_CLTS(tep)) {
2168                         tl_merror(tep->te_wq, mp, EPROTO);
2169                         break;
2170                 }
2171                 tl_exdata(mp, tep);
2172                 break;
2173 
2174         case T_ORDREL_REQ:
2175                 if (! IS_COTSORD(tep)) {
2176                         tl_merror(tep->te_wq, mp, EPROTO);
2177                         break;
2178                 }
2179                 tl_ordrel(mp, tep);
2180                 break;
2181 
2182         case T_UNITDATA_REQ:
2183                 if (IS_COTS(tep)) {
2184                         tl_merror(tep->te_wq, mp, EPROTO);
2185                         break;
2186                 }
2187                 tl_unitdata(mp, tep);
2188                 break;
2189 
2190         default:
2191                 tl_merror(tep->te_wq, mp, EPROTO);
2192                 break;
2193         }
2194 }
2195 
2196 /*
2197  * Process ioctl from serializer.
2198  * This is a wrapper around tl_do_ioctl().
2199  */
2200 static void
2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2202 {
2203         if (! tep->te_closing)
2204                 tl_do_ioctl(mp, tep);
2205         else
2206                 freemsg(mp);
2207 
2208         tl_serializer_exit(tep);
2209         tl_refrele(tep);
2210 }
2211 
2212 static void
2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2214 {
2215         struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2216         int cmd = iocbp->ioc_cmd;
2217         queue_t *wq = tep->te_wq;
2218         int error;
2219         int thisopt, otheropt;
2220 
2221         ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2222 
2223         switch (cmd) {
2224         case TL_IOC_CREDOPT:
2225                 if (cmd == TL_IOC_CREDOPT) {
2226                         thisopt = TL_SETCRED;
2227                         otheropt = TL_SETUCRED;
2228                 } else {
2229                         /* FALLTHROUGH */
2230         case TL_IOC_UCREDOPT:
2231                         thisopt = TL_SETUCRED;
2232                         otheropt = TL_SETCRED;
2233                 }
2234                 /*
2235                  * The credentials passing does not apply to sockets.
2236                  * Only one of the cred options can be set at a given time.
2237                  */
2238                 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2239                         miocnak(wq, mp, 0, EINVAL);
2240                         return;
2241                 }
2242 
2243                 /*
2244                  * Turn on generation of credential options for
2245                  * T_conn_req, T_conn_con, T_unidata_ind.
2246                  */
2247                 error = miocpullup(mp, sizeof (uint32_t));
2248                 if (error != 0) {
2249                         miocnak(wq, mp, 0, error);
2250                         return;
2251                 }
2252                 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2253                         miocnak(wq, mp, 0, EINVAL);
2254                         return;
2255                 }
2256 
2257                 if (*(uint32_t *)mp->b_cont->b_rptr)
2258                         tep->te_flag |= thisopt;
2259                 else
2260                         tep->te_flag &= ~thisopt;
2261 
2262                 miocack(wq, mp, 0, 0);
2263                 break;
2264 
2265         default:
2266                 /* Should not be here */
2267                 miocnak(wq, mp, 0, EINVAL);
2268                 break;
2269         }
2270 }
2271 
2272 
2273 /*
2274  * send T_ERROR_ACK
2275  * Note: assumes enough memory or caller passed big enough mp
2276  *      - no recovery from allocb failures
2277  */
2278 
2279 static void
2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2281     t_scalar_t unix_err, t_scalar_t type)
2282 {
2283         struct T_error_ack *err_ack;
2284         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2285             M_PCPROTO, T_ERROR_ACK);
2286 
2287         if (ackmp == NULL) {
2288                 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2289                     "tl_error_ack:out of mblk memory"));
2290                 tl_merror(wq, NULL, ENOSR);
2291                 return;
2292         }
2293         err_ack = (struct T_error_ack *)ackmp->b_rptr;
2294         err_ack->ERROR_prim = type;
2295         err_ack->TLI_error = tli_err;
2296         err_ack->UNIX_error = unix_err;
2297 
2298         /*
2299          * send error ack message
2300          */
2301         qreply(wq, ackmp);
2302 }
2303 
2304 
2305 
2306 /*
2307  * send T_OK_ACK
2308  * Note: assumes enough memory or caller passed big enough mp
2309  *      - no recovery from allocb failures
2310  */
2311 static void
2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2313 {
2314         struct T_ok_ack *ok_ack;
2315         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2316             M_PCPROTO, T_OK_ACK);
2317 
2318         if (ackmp == NULL) {
2319                 tl_merror(wq, NULL, ENOMEM);
2320                 return;
2321         }
2322 
2323         ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2324         ok_ack->CORRECT_prim = type;
2325 
2326         (void) qreply(wq, ackmp);
2327 }
2328 
2329 /*
2330  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2331  * This is a wrapper around tl_bind().
2332  */
2333 static void
2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2335 {
2336         if (! tep->te_closing)
2337                 tl_bind(mp, tep);
2338         else
2339                 freemsg(mp);
2340 
2341         tl_serializer_exit(tep);
2342         tl_refrele(tep);
2343 }
2344 
2345 /*
2346  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2347  * Assumes that the endpoint is in the unbound.
2348  */
2349 static void
2350 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2351 {
2352         queue_t                 *wq = tep->te_wq;
2353         struct T_bind_ack       *b_ack;
2354         struct T_bind_req       *bind = (struct T_bind_req *)mp->b_rptr;
2355         mblk_t                  *ackmp, *bamp;
2356         soux_addr_t             ux_addr;
2357         t_uscalar_t             qlen = 0;
2358         t_scalar_t              alen, aoff;
2359         tl_addr_t               addr_req;
2360         void                    *addr_startp;
2361         ssize_t                 msz = MBLKL(mp), basize;
2362         t_scalar_t              tli_err = 0, unix_err = 0;
2363         t_scalar_t              save_prim_type = bind->PRIM_type;
2364         t_scalar_t              save_state = tep->te_state;
2365 
2366         if (tep->te_state != TS_UNBND) {
2367                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2368                     SL_TRACE|SL_ERROR,
2369                     "tl_wput:bind_request:out of state, state=%d",
2370                     tep->te_state));
2371                 tli_err = TOUTSTATE;
2372                 goto error;
2373         }
2374 
2375         if (msz < sizeof (struct T_bind_req)) {
2376                 tli_err = TSYSERR; unix_err = EINVAL;
2377                 goto error;
2378         }
2379 
2380         tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2381 
2382         ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2383             (bind->PRIM_type == T_BIND_REQ));
2384 
2385         alen = bind->ADDR_length;
2386         aoff = bind->ADDR_offset;
2387 
2388         /* negotiate max conn req pending */
2389         if (IS_COTS(tep)) {
2390                 qlen = bind->CONIND_number;
2391                 if (qlen > tl_maxqlen)
2392                         qlen = tl_maxqlen;
2393         }
2394 
2395         /*
2396          * Reserve hash handle. It can only be NULL if the endpoint is unbound
2397          * and bound again.
2398          */
2399         if ((tep->te_hash_hndl == NULL) &&
2400             ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2401             mod_hash_reserve_nosleep(tep->te_addrhash,
2402             &tep->te_hash_hndl) != 0) {
2403                 tli_err = TSYSERR; unix_err = ENOSR;
2404                 goto error;
2405         }
2406 
2407         /*
2408          * Verify address correctness.
2409          */
2410         if (IS_SOCKET(tep)) {
2411                 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2412 
2413                 if ((alen != TL_SOUX_ADDRLEN) ||
2414                     (aoff < 0) ||
2415                     (aoff + alen > msz)) {
2416                         (void) (STRLOG(TL_ID, tep->te_minor,
2417                             1, SL_TRACE|SL_ERROR,
2418                             "tl_bind: invalid socket addr"));
2419                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2420                         tli_err = TSYSERR; unix_err = EINVAL;
2421                         goto error;
2422                 }
2423                 /* Copy address from message to local buffer. */
2424                 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2425                 /*
2426                  * Check that we got correct address from sockets
2427                  */
2428                 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2429                     (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2430                         (void) (STRLOG(TL_ID, tep->te_minor,
2431                             1, SL_TRACE|SL_ERROR,
2432                             "tl_bind: invalid socket magic"));
2433                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2434                         tli_err = TSYSERR; unix_err = EINVAL;
2435                         goto error;
2436                 }
2437                 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2438                     (ux_addr.soua_vp != NULL)) {
2439                         (void) (STRLOG(TL_ID, tep->te_minor,
2440                             1, SL_TRACE|SL_ERROR,
2441                             "tl_bind: implicit addr non-empty"));
2442                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2443                         tli_err = TSYSERR; unix_err = EINVAL;
2444                         goto error;
2445                 }
2446                 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2447                     (ux_addr.soua_vp == NULL)) {
2448                         (void) (STRLOG(TL_ID, tep->te_minor,
2449                             1, SL_TRACE|SL_ERROR,
2450                             "tl_bind: explicit addr empty"));
2451                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2452                         tli_err = TSYSERR; unix_err = EINVAL;
2453                         goto error;
2454                 }
2455         } else {
2456                 if ((alen > 0) && ((aoff < 0) ||
2457                     ((ssize_t)(aoff + alen) > msz) ||
2458                     ((aoff + alen) < 0))) {
2459                         (void) (STRLOG(TL_ID, tep->te_minor,
2460                             1, SL_TRACE|SL_ERROR,
2461                             "tl_bind: invalid message"));
2462                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2463                         tli_err = TSYSERR; unix_err = EINVAL;
2464                         goto error;
2465                 }
2466                 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2467                         (void) (STRLOG(TL_ID, tep->te_minor,
2468                             1, SL_TRACE|SL_ERROR,
2469                             "tl_bind: bad addr in  message"));
2470                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2471                         tli_err = TBADADDR;
2472                         goto error;
2473                 }
2474 #ifdef DEBUG
2475                 /*
2476                  * Mild form of ASSERT()ion to detect broken TPI apps.
2477                  * if (! assertion)
2478                  *      log warning;
2479                  */
2480                 if (! ((alen == 0 && aoff == 0) ||
2481                         (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2482                         (void) (STRLOG(TL_ID, tep->te_minor,
2483                                     3, SL_TRACE|SL_ERROR,
2484                                     "tl_bind: addr overlaps TPI message"));
2485                 }
2486 #endif
2487         }
2488 
2489         /*
2490          * Bind the address provided or allocate one if requested.
2491          * Allow rebinds with a new qlen value.
2492          */
2493         if (IS_SOCKET(tep)) {
2494                 /*
2495                  * For anonymous requests the te_ap is already set up properly
2496                  * so use minor number as an address.
2497                  * For explicit requests need to check whether the address is
2498                  * already in use.
2499                  */
2500                 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2501                         int rc;
2502 
2503                         if (tep->te_flag & TL_ADDRHASHED) {
2504                                 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2505                                 if (tep->te_vp == ux_addr.soua_vp)
2506                                         goto skip_addr_bind;
2507                                 else /* Rebind to a new address. */
2508                                         tl_addr_unbind(tep);
2509                         }
2510                         /*
2511                          * Insert address in the hash if it is not already
2512                          * there.  Since we use preallocated handle, the insert
2513                          * can fail only if the key is already present.
2514                          */
2515                         rc = mod_hash_insert_reserve(tep->te_addrhash,
2516                             (mod_hash_key_t)ux_addr.soua_vp,
2517                             (mod_hash_val_t)tep, tep->te_hash_hndl);
2518 
2519                         if (rc != 0) {
2520                                 ASSERT(rc == MH_ERR_DUPLICATE);
2521                                 /*
2522                                  * Violate O_T_BIND_REQ semantics and fail with
2523                                  * TADDRBUSY - sockets will not use any address
2524                                  * other than supplied one for explicit binds.
2525                                  */
2526                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2527                                     SL_TRACE|SL_ERROR,
2528                                     "tl_bind:requested addr %p is busy",
2529                                     ux_addr.soua_vp));
2530                                 tli_err = TADDRBUSY; unix_err = 0;
2531                                 goto error;
2532                         }
2533                         tep->te_uxaddr = ux_addr;
2534                         tep->te_flag |= TL_ADDRHASHED;
2535                         tep->te_hash_hndl = NULL;
2536                 }
2537         } else if (alen == 0) {
2538                 /*
2539                  * assign any free address
2540                  */
2541                 if (! tl_get_any_addr(tep, NULL)) {
2542                         (void) (STRLOG(TL_ID, tep->te_minor,
2543                             1, SL_TRACE|SL_ERROR,
2544                             "tl_bind:failed to get buffer for any "
2545                             "address"));
2546                         tli_err = TSYSERR; unix_err = ENOSR;
2547                         goto error;
2548                 }
2549         } else {
2550                 addr_req.ta_alen = alen;
2551                 addr_req.ta_abuf = (mp->b_rptr + aoff);
2552                 addr_req.ta_zoneid = tep->te_zoneid;
2553 
2554                 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2555                 if (tep->te_abuf == NULL) {
2556                         tli_err = TSYSERR; unix_err = ENOSR;
2557                         goto error;
2558                 }
2559                 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2560                 tep->te_alen = alen;
2561 
2562                 if (mod_hash_insert_reserve(tep->te_addrhash,
2563                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2564                     tep->te_hash_hndl) != 0) {
2565                         if (save_prim_type == T_BIND_REQ) {
2566                                 /*
2567                                  * The bind semantics for this primitive
2568                                  * require a failure if the exact address
2569                                  * requested is busy
2570                                  */
2571                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2572                                     SL_TRACE|SL_ERROR,
2573                                     "tl_bind:requested addr is busy"));
2574                                 tli_err = TADDRBUSY; unix_err = 0;
2575                                 goto error;
2576                         }
2577 
2578                         /*
2579                          * O_T_BIND_REQ semantics say if address if requested
2580                          * address is busy, bind to any available free address
2581                          */
2582                         if (! tl_get_any_addr(tep, &addr_req)) {
2583                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2584                                     SL_TRACE|SL_ERROR,
2585                                     "tl_bind:unable to get any addr buf"));
2586                                 tli_err = TSYSERR; unix_err = ENOMEM;
2587                                 goto error;
2588                         }
2589                 } else {
2590                         tep->te_flag |= TL_ADDRHASHED;
2591                         tep->te_hash_hndl = NULL;
2592                 }
2593         }
2594 
2595         ASSERT(tep->te_alen >= 0);
2596 
2597 skip_addr_bind:
2598         /*
2599          * prepare T_BIND_ACK TPI message
2600          */
2601         basize = sizeof (struct T_bind_ack) + tep->te_alen;
2602         bamp = reallocb(mp, basize, 0);
2603         if (bamp == NULL) {
2604                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2605                     "tl_wput:tl_bind: allocb failed"));
2606                 /*
2607                  * roll back state changes
2608                  */
2609                 tl_addr_unbind(tep);
2610                 tep->te_state = TS_UNBND;
2611                 tl_memrecover(wq, mp, basize);
2612                 return;
2613         }
2614 
2615         DB_TYPE(bamp) = M_PCPROTO;
2616         bamp->b_wptr = bamp->b_rptr + basize;
2617         b_ack = (struct T_bind_ack *)bamp->b_rptr;
2618         b_ack->PRIM_type = T_BIND_ACK;
2619         b_ack->CONIND_number = qlen;
2620         b_ack->ADDR_length = tep->te_alen;
2621         b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2622         addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2623         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2624 
2625         if (IS_COTS(tep)) {
2626                 tep->te_qlen = qlen;
2627                 if (qlen > 0)
2628                         tep->te_flag |= TL_LISTENER;
2629         }
2630 
2631         tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2632         /*
2633          * send T_BIND_ACK message
2634          */
2635         (void) qreply(wq, bamp);
2636         return;
2637 
2638 error:
2639         ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2640         if (ackmp == NULL) {
2641                 /*
2642                  * roll back state changes
2643                  */
2644                 tep->te_state = save_state;
2645                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2646                 return;
2647         }
2648         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2649         tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2650 }
2651 
2652 /*
2653  * Process T_UNBIND_REQ.
2654  * Called from serializer.
2655  */
2656 static void
2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2658 {
2659         queue_t *wq;
2660         mblk_t *ackmp;
2661 
2662         if (tep->te_closing) {
2663                 freemsg(mp);
2664                 return;
2665         }
2666 
2667         wq = tep->te_wq;
2668 
2669         /*
2670          * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2671          * ==> allocate for T_ERROR_ACK (known max)
2672          */
2673         if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2674                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2675                 return;
2676         }
2677         /*
2678          * memory resources committed
2679          * Note: no message validation. T_UNBIND_REQ message is
2680          * same size as PRIM_type field so already verified earlier.
2681          */
2682 
2683         /*
2684          * validate state
2685          */
2686         if (tep->te_state != TS_IDLE) {
2687                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2688                     SL_TRACE|SL_ERROR,
2689                     "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2690                     tep->te_state));
2691                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2692                 return;
2693         }
2694         tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2695 
2696         /*
2697          * TPI says on T_UNBIND_REQ:
2698          *    send up a M_FLUSH to flush both
2699          *    read and write queues
2700          */
2701         (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2702 
2703         if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2704             tep->te_magic != SOU_MAGIC_EXPLICIT) {
2705 
2706                 /*
2707                  * Sockets use bind with qlen==0 followed by bind() to
2708                  * the same address with qlen > 0 for listeners.
2709                  * We allow rebind with a new qlen value.
2710                  */
2711                 tl_addr_unbind(tep);
2712         }
2713 
2714         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2715         /*
2716          * send  T_OK_ACK
2717          */
2718         tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2719 }
2720 
2721 
2722 /*
2723  * Option management code from drv/ip is used here
2724  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2725  *      database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2726  *      However, that is what we want as that option is 'unorthodox'
2727  *      and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2728  *      and not in T_SVR4_OPTMGMT_REQ/ACK
2729  * Note2: use of optcom_req means this routine is an exception to
2730  *       recovery from allocb() failures.
2731  */
2732 
2733 static void
2734 tl_optmgmt(queue_t *wq, mblk_t *mp)
2735 {
2736         tl_endpt_t *tep;
2737         mblk_t *ackmp;
2738         union T_primitives *prim;
2739         cred_t *cr;
2740 
2741         tep = (tl_endpt_t *)wq->q_ptr;
2742         prim = (union T_primitives *)mp->b_rptr;
2743 
2744         /*
2745          * All Solaris components should pass a db_credp
2746          * for this TPI message, hence we ASSERT.
2747          * But in case there is some other M_PROTO that looks
2748          * like a TPI message sent by some other kernel
2749          * component, we check and return an error.
2750          */
2751         cr = msg_getcred(mp, NULL);
2752         ASSERT(cr != NULL);
2753         if (cr == NULL) {
2754                 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2755                 return;
2756         }
2757 
2758         /*  all states OK for AF_UNIX options ? */
2759         if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2760             prim->type == T_SVR4_OPTMGMT_REQ) {
2761                 /*
2762                  * Broken TLI semantics that options can only be managed
2763                  * in TS_IDLE state. Needed for Sparc ABI test suite that
2764                  * tests this TLI (mis)feature using this device driver.
2765                  */
2766                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2767                     SL_TRACE|SL_ERROR,
2768                     "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2769                     tep->te_state));
2770                 /*
2771                  * preallocate memory for T_ERROR_ACK
2772                  */
2773                 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2774                 if (! ackmp) {
2775                         tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2776                         return;
2777                 }
2778 
2779                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2780                 freemsg(mp);
2781                 return;
2782         }
2783 
2784         /*
2785          * call common option management routine from drv/ip
2786          */
2787         if (prim->type == T_SVR4_OPTMGMT_REQ) {
2788                 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2789         } else {
2790                 ASSERT(prim->type == T_OPTMGMT_REQ);
2791                 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2792         }
2793 }
2794 
2795 /*
2796  * Handle T_conn_req - the driver part of accept().
2797  * If TL_SET[U]CRED generate the credentials options.
2798  * If this is a socket pass through options unmodified.
2799  * For sockets generate the T_CONN_CON here instead of
2800  * waiting for the T_CONN_RES.
2801  */
2802 static void
2803 tl_conn_req(queue_t *wq, mblk_t *mp)
2804 {
2805         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
2806         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_rptr;
2807         ssize_t                 msz = MBLKL(mp);
2808         t_scalar_t              alen, aoff, olen, ooff, err = 0;
2809         tl_endpt_t              *peer_tep = NULL;
2810         mblk_t                  *ackmp;
2811         mblk_t                  *dimp;
2812         struct T_discon_ind     *di;
2813         soux_addr_t             ux_addr;
2814         tl_addr_t               dst;
2815 
2816         ASSERT(IS_COTS(tep));
2817 
2818         if (tep->te_closing) {
2819                 freemsg(mp);
2820                 return;
2821         }
2822 
2823         /*
2824          * preallocate memory for:
2825          * 1. max of T_ERROR_ACK and T_OK_ACK
2826          *      ==> known max T_ERROR_ACK
2827          * 2. max of T_DISCON_IND and T_CONN_IND
2828          */
2829         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2830         if (! ackmp) {
2831                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2832                 return;
2833         }
2834         /*
2835          * memory committed for T_OK_ACK/T_ERROR_ACK now
2836          * will be committed for T_DISCON_IND/T_CONN_IND later
2837          */
2838 
2839         if (tep->te_state != TS_IDLE) {
2840                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2841                     SL_TRACE|SL_ERROR,
2842                     "tl_wput:T_CONN_REQ:out of state, state=%d",
2843                     tep->te_state));
2844                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2845                 freemsg(mp);
2846                 return;
2847         }
2848 
2849         /*
2850          * validate the message
2851          * Note: dereference fields in struct inside message only
2852          * after validating the message length.
2853          */
2854         if (msz < sizeof (struct T_conn_req)) {
2855                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2856                     "tl_conn_req:invalid message length"));
2857                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2858                 freemsg(mp);
2859                 return;
2860         }
2861         alen = creq->DEST_length;
2862         aoff = creq->DEST_offset;
2863         olen = creq->OPT_length;
2864         ooff = creq->OPT_offset;
2865         if (olen == 0)
2866                 ooff = 0;
2867 
2868         if (IS_SOCKET(tep)) {
2869                 if ((alen != TL_SOUX_ADDRLEN) ||
2870                     (aoff < 0) ||
2871                     (aoff + alen > msz) ||
2872                     (alen > msz - sizeof (struct T_conn_req))) {
2873                         (void) (STRLOG(TL_ID, tep->te_minor,
2874                                     1, SL_TRACE|SL_ERROR,
2875                                     "tl_conn_req: invalid socket addr"));
2876                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2877                         freemsg(mp);
2878                         return;
2879                 }
2880                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2881                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2882                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2883                         (void) (STRLOG(TL_ID, tep->te_minor,
2884                             1, SL_TRACE|SL_ERROR,
2885                             "tl_conn_req: invalid socket magic"));
2886                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2887                         freemsg(mp);
2888                         return;
2889                 }
2890         } else {
2891                 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2892                     (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2893                     ooff + olen < 0)) ||
2894                     olen < 0 || ooff < 0) {
2895                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2896                             SL_TRACE|SL_ERROR,
2897                             "tl_conn_req:invalid message"));
2898                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2899                         freemsg(mp);
2900                         return;
2901                 }
2902 
2903                 if (alen <= 0 || aoff < 0 ||
2904                     (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2905                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2906                                     SL_TRACE|SL_ERROR,
2907                                     "tl_conn_req:bad addr in message, "
2908                                     "alen=%d, msz=%ld",
2909                                     alen, msz));
2910                         tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2911                         freemsg(mp);
2912                         return;
2913                 }
2914 #ifdef DEBUG
2915                 /*
2916                  * Mild form of ASSERT()ion to detect broken TPI apps.
2917                  * if (! assertion)
2918                  *      log warning;
2919                  */
2920                 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2921                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
2922                             SL_TRACE|SL_ERROR,
2923                             "tl_conn_req: addr overlaps TPI message"));
2924                 }
2925 #endif
2926                 if (olen) {
2927                         /*
2928                          * no opts in connect req
2929                          * supported in this provider except for sockets.
2930                          */
2931                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2932                             SL_TRACE|SL_ERROR,
2933                             "tl_conn_req:options not supported "
2934                             "in message"));
2935                         tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2936                         freemsg(mp);
2937                         return;
2938                 }
2939         }
2940 
2941         /*
2942          * Prevent tep from closing on us.
2943          */
2944         if (! tl_noclose(tep)) {
2945                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2946                     "tl_conn_req:endpoint is closing"));
2947                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2948                 freemsg(mp);
2949                 return;
2950         }
2951 
2952         tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2953         /*
2954          * get endpoint to connect to
2955          * check that peer with DEST addr is bound to addr
2956          * and has CONIND_number > 0
2957          */
2958         dst.ta_alen = alen;
2959         dst.ta_abuf = mp->b_rptr + aoff;
2960         dst.ta_zoneid = tep->te_zoneid;
2961 
2962         /*
2963          * Verify if remote addr is in use
2964          */
2965         peer_tep = (IS_SOCKET(tep) ?
2966             tl_sock_find_peer(tep, &ux_addr) :
2967             tl_find_peer(tep, &dst));
2968 
2969         if (peer_tep == NULL) {
2970                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2971                     "tl_conn_req:no one at connect address"));
2972                 err = ECONNREFUSED;
2973         } else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2974                 /*
2975                  * validate that number of incoming connection is
2976                  * not to capacity on destination endpoint
2977                  */
2978                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2979                     "tl_conn_req: qlen overflow connection refused"));
2980                         err = ECONNREFUSED;
2981         }
2982 
2983         /*
2984          * Send T_DISCON_IND in case of error
2985          */
2986         if (err != 0) {
2987                 if (peer_tep != NULL)
2988                         tl_refrele(peer_tep);
2989                 /* We are still expected to send T_OK_ACK */
2990                 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2991                 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2992                 tl_closeok(tep);
2993                 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2994                     M_PROTO, T_DISCON_IND);
2995                 if (dimp == NULL) {
2996                         tl_merror(wq, NULL, ENOSR);
2997                         return;
2998                 }
2999                 di = (struct T_discon_ind *)dimp->b_rptr;
3000                 di->DISCON_reason = err;
3001                 di->SEQ_number = BADSEQNUM;
3002 
3003                 tep->te_state = TS_IDLE;
3004                 /*
3005                  * send T_DISCON_IND message
3006                  */
3007                 putnext(tep->te_rq, dimp);
3008                 return;
3009         }
3010 
3011         ASSERT(IS_COTS(peer_tep));
3012 
3013         /*
3014          * Found the listener. At this point processing will continue on
3015          * listener serializer. Close of the endpoint should be blocked while we
3016          * switch serializers.
3017          */
3018         tl_serializer_refhold(peer_tep->te_ser);
3019         tl_serializer_refrele(tep->te_ser);
3020         tep->te_ser = peer_tep->te_ser;
3021         ASSERT(tep->te_oconp == NULL);
3022         tep->te_oconp = peer_tep;
3023 
3024         /*
3025          * It is safe to close now. Close may continue on listener serializer.
3026          */
3027         tl_closeok(tep);
3028 
3029         /*
3030          * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3031          * data, so we link mp to ackmp.
3032          */
3033         ackmp->b_cont = mp;
3034         mp = ackmp;
3035 
3036         tl_refhold(tep);
3037         tl_serializer_enter(tep, tl_conn_req_ser, mp);
3038 }
3039 
3040 /*
3041  * Finish T_CONN_REQ processing on listener serializer.
3042  */
3043 static void
3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3045 {
3046         queue_t         *wq;
3047         tl_endpt_t      *peer_tep = tep->te_oconp;
3048         mblk_t          *confmp, *cimp, *indmp;
3049         void            *opts = NULL;
3050         mblk_t          *ackmp = mp;
3051         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3052         struct T_conn_ind       *ci;
3053         tl_icon_t       *tip;
3054         void            *addr_startp;
3055         t_scalar_t      olen = creq->OPT_length;
3056         t_scalar_t      ooff = creq->OPT_offset;
3057         size_t          ci_msz;
3058         size_t          size;
3059         cred_t          *cr = NULL;
3060         pid_t           cpid;
3061 
3062         if (tep->te_closing) {
3063                 TL_UNCONNECT(tep->te_oconp);
3064                 tl_serializer_exit(tep);
3065                 tl_refrele(tep);
3066                 freemsg(mp);
3067                 return;
3068         }
3069 
3070         wq = tep->te_wq;
3071         tep->te_flag |= TL_EAGER;
3072 
3073         /*
3074          * Extract preallocated ackmp from mp.
3075          */
3076         mp = mp->b_cont;
3077         ackmp->b_cont = NULL;
3078 
3079         if (olen == 0)
3080                 ooff = 0;
3081 
3082         if (peer_tep->te_closing ||
3083             !((peer_tep->te_state == TS_IDLE) ||
3084             (peer_tep->te_state == TS_WRES_CIND))) {
3085                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3086                     "tl_conn_req:peer in bad state (%d)",
3087                     peer_tep->te_state));
3088                 TL_UNCONNECT(tep->te_oconp);
3089                 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3090                 freemsg(ackmp);
3091                 tl_serializer_exit(tep);
3092                 tl_refrele(tep);
3093                 return;
3094         }
3095 
3096         /*
3097          * preallocate now for T_DISCON_IND or T_CONN_IND
3098          */
3099         /*
3100          * calculate length of T_CONN_IND message
3101          */
3102         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3103                 cr = msg_getcred(mp, &cpid);
3104                 ASSERT(cr != NULL);
3105                 if (peer_tep->te_flag & TL_SETCRED) {
3106                         ooff = 0;
3107                         olen = (t_scalar_t) sizeof (struct opthdr) +
3108                             OPTLEN(sizeof (tl_credopt_t));
3109                         /* 1 option only */
3110                 } else {
3111                         ooff = 0;
3112                         olen = (t_scalar_t)sizeof (struct opthdr) +
3113                             OPTLEN(ucredminsize(cr));
3114                         /* 1 option only */
3115                 }
3116         }
3117         ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3118         ci_msz = T_ALIGN(ci_msz) + olen;
3119         size = max(ci_msz, sizeof (struct T_discon_ind));
3120 
3121         /*
3122          * Save options from mp - we'll need them for T_CONN_IND.
3123          */
3124         if (ooff != 0) {
3125                 opts = kmem_alloc(olen, KM_NOSLEEP);
3126                 if (opts == NULL) {
3127                         /*
3128                          * roll back state changes
3129                          */
3130                         tep->te_state = TS_IDLE;
3131                         tl_memrecover(wq, mp, size);
3132                         freemsg(ackmp);
3133                         TL_UNCONNECT(tep->te_oconp);
3134                         tl_serializer_exit(tep);
3135                         tl_refrele(tep);
3136                         return;
3137                 }
3138                 /* Copy options to a temp buffer */
3139                 bcopy(mp->b_rptr + ooff, opts, olen);
3140         }
3141 
3142         if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3143                 /*
3144                  * Generate a T_CONN_CON that has the identical address
3145                  * (and options) as the T_CONN_REQ.
3146                  * NOTE: assumes that the T_conn_req and T_conn_con structures
3147                  * are isomorphic.
3148                  */
3149                 confmp = copyb(mp);
3150                 if (! confmp) {
3151                         /*
3152                          * roll back state changes
3153                          */
3154                         tep->te_state = TS_IDLE;
3155                         tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3156                         freemsg(ackmp);
3157                         if (opts != NULL)
3158                                 kmem_free(opts, olen);
3159                         TL_UNCONNECT(tep->te_oconp);
3160                         tl_serializer_exit(tep);
3161                         tl_refrele(tep);
3162                         return;
3163                 }
3164                 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3165                     T_CONN_CON;
3166         } else {
3167                 confmp = NULL;
3168         }
3169         if ((indmp = reallocb(mp, size, 0)) == NULL) {
3170                 /*
3171                  * roll back state changes
3172                  */
3173                 tep->te_state = TS_IDLE;
3174                 tl_memrecover(wq, mp, size);
3175                 freemsg(ackmp);
3176                 if (opts != NULL)
3177                         kmem_free(opts, olen);
3178                 freemsg(confmp);
3179                 TL_UNCONNECT(tep->te_oconp);
3180                 tl_serializer_exit(tep);
3181                 tl_refrele(tep);
3182                 return;
3183         }
3184 
3185         tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3186         if (tip == NULL) {
3187                 /*
3188                  * roll back state changes
3189                  */
3190                 tep->te_state = TS_IDLE;
3191                 tl_memrecover(wq, indmp, sizeof (*tip));
3192                 freemsg(ackmp);
3193                 if (opts != NULL)
3194                         kmem_free(opts, olen);
3195                 freemsg(confmp);
3196                 TL_UNCONNECT(tep->te_oconp);
3197                 tl_serializer_exit(tep);
3198                 tl_refrele(tep);
3199                 return;
3200         }
3201         tip->ti_mp = NULL;
3202 
3203         /*
3204          * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3205          * and tl_icon_t cell.
3206          */
3207 
3208         /*
3209          * ack validity of request and send the peer credential in the ACK.
3210          */
3211         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3212 
3213         if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3214             confmp != NULL) {
3215                 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3216         }
3217 
3218         tl_ok_ack(wq, ackmp, T_CONN_REQ);
3219 
3220         /*
3221          * prepare message to send T_CONN_IND
3222          */
3223         /*
3224          * allocate the message - original data blocks retained
3225          * in the returned mblk
3226          */
3227         cimp = tl_resizemp(indmp, size);
3228         if (! cimp) {
3229                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3230                     "tl_conn_req:con_ind:allocb failure"));
3231                 tl_merror(wq, indmp, ENOMEM);
3232                 TL_UNCONNECT(tep->te_oconp);
3233                 tl_serializer_exit(tep);
3234                 tl_refrele(tep);
3235                 if (opts != NULL)
3236                         kmem_free(opts, olen);
3237                 freemsg(confmp);
3238                 ASSERT(tip->ti_mp == NULL);
3239                 kmem_free(tip, sizeof (*tip));
3240                 return;
3241         }
3242 
3243         DB_TYPE(cimp) = M_PROTO;
3244         ci = (struct T_conn_ind *)cimp->b_rptr;
3245         ci->PRIM_type  = T_CONN_IND;
3246         ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3247         ci->SRC_length = tep->te_alen;
3248         ci->SEQ_number = tep->te_seqno;
3249 
3250         addr_startp = cimp->b_rptr + ci->SRC_offset;
3251         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3252         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3253 
3254                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3255                     ci->SRC_length);
3256                 ci->OPT_length = olen; /* because only 1 option */
3257                 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3258                     cr, cpid,
3259                     peer_tep->te_flag, peer_tep->te_credp);
3260         } else if (ooff != 0) {
3261                 /* Copy option from T_CONN_REQ */
3262                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3263                     ci->SRC_length);
3264                 ci->OPT_length = olen;
3265                 ASSERT(opts != NULL);
3266                 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3267         } else {
3268                 ci->OPT_offset = 0;
3269                 ci->OPT_length = 0;
3270         }
3271         if (opts != NULL)
3272                 kmem_free(opts, olen);
3273 
3274         /*
3275          * register connection request with server peer
3276          * append to list of incoming connections
3277          * increment references for both peer_tep and tep: peer_tep is placed on
3278          * te_oconp and tep is placed on listeners queue.
3279          */
3280         tip->ti_tep = tep;
3281         tip->ti_seqno = tep->te_seqno;
3282         list_insert_tail(&peer_tep->te_iconp, tip);
3283         peer_tep->te_nicon++;
3284 
3285         peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3286         /*
3287          * send the T_CONN_IND message
3288          */
3289         putnext(peer_tep->te_rq, cimp);
3290 
3291         /*
3292          * Send a T_CONN_CON message for sockets.
3293          * Disable the queues until we have reached the correct state!
3294          */
3295         if (confmp != NULL) {
3296                 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3297                 noenable(wq);
3298                 putnext(tep->te_rq, confmp);
3299         }
3300         /*
3301          * Now we need to increment tep reference because tep is referenced by
3302          * server list of pending connections. We also need to decrement
3303          * reference before exiting serializer. Two operations void each other
3304          * so we don't modify reference at all.
3305          */
3306         ASSERT(tep->te_refcnt >= 2);
3307         ASSERT(peer_tep->te_refcnt >= 2);
3308         tl_serializer_exit(tep);
3309 }
3310 
3311 
3312 
3313 /*
3314  * Handle T_conn_res on listener stream. Called on listener serializer.
3315  * tl_conn_req has already generated the T_CONN_CON.
3316  * tl_conn_res is called on listener serializer.
3317  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3318  * Switch eager serializer to acceptor's.
3319  *
3320  * If TL_SET[U]CRED generate the credentials options.
3321  * For sockets tl_conn_req has already generated the T_CONN_CON.
3322  */
3323 static void
3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3325 {
3326         queue_t                 *wq;
3327         struct T_conn_res       *cres = (struct T_conn_res *)mp->b_rptr;
3328         ssize_t                 msz = MBLKL(mp);
3329         t_scalar_t              olen, ooff, err = 0;
3330         t_scalar_t              prim = cres->PRIM_type;
3331         uchar_t                 *addr_startp;
3332         tl_endpt_t              *acc_ep = NULL, *cl_ep = NULL;
3333         tl_icon_t               *tip;
3334         size_t                  size;
3335         mblk_t                  *ackmp, *respmp;
3336         mblk_t                  *dimp, *ccmp = NULL;
3337         struct T_discon_ind     *di;
3338         struct T_conn_con       *cc;
3339         boolean_t               client_noclose_set = B_FALSE;
3340         boolean_t               switch_client_serializer = B_TRUE;
3341 
3342         ASSERT(IS_COTS(tep));
3343 
3344         if (tep->te_closing) {
3345                 freemsg(mp);
3346                 return;
3347         }
3348 
3349         wq = tep->te_wq;
3350 
3351         /*
3352          * preallocate memory for:
3353          * 1. max of T_ERROR_ACK and T_OK_ACK
3354          *      ==> known max T_ERROR_ACK
3355          * 2. max of T_DISCON_IND and T_CONN_CON
3356          */
3357         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3358         if (! ackmp) {
3359                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3360                 return;
3361         }
3362         /*
3363          * memory committed for T_OK_ACK/T_ERROR_ACK now
3364          * will be committed for T_DISCON_IND/T_CONN_CON later
3365          */
3366 
3367 
3368         ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3369 
3370         /*
3371          * validate state
3372          */
3373         if (tep->te_state != TS_WRES_CIND) {
3374                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3375                     SL_TRACE|SL_ERROR,
3376                     "tl_wput:T_CONN_RES:out of state, state=%d",
3377                     tep->te_state));
3378                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3379                 freemsg(mp);
3380                 return;
3381         }
3382 
3383         /*
3384          * validate the message
3385          * Note: dereference fields in struct inside message only
3386          * after validating the message length.
3387          */
3388         if (msz < sizeof (struct T_conn_res)) {
3389                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3390                     "tl_conn_res:invalid message length"));
3391                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3392                 freemsg(mp);
3393                 return;
3394         }
3395         olen = cres->OPT_length;
3396         ooff = cres->OPT_offset;
3397         if (((olen > 0) && ((ooff + olen) > msz))) {
3398                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3399                     "tl_conn_res:invalid message"));
3400                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3401                 freemsg(mp);
3402                 return;
3403         }
3404         if (olen) {
3405                 /*
3406                  * no opts in connect res
3407                  * supported in this provider
3408                  */
3409                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3410                     "tl_conn_res:options not supported in message"));
3411                 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3412                 freemsg(mp);
3413                 return;
3414         }
3415 
3416         tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3417         ASSERT(tep->te_state == TS_WACK_CRES);
3418 
3419         if (cres->SEQ_number < TL_MINOR_START &&
3420             cres->SEQ_number >= BADSEQNUM) {
3421                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3422                     "tl_conn_res:remote endpoint sequence number bad"));
3423                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3424                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3425                 freemsg(mp);
3426                 return;
3427         }
3428 
3429         /*
3430          * find accepting endpoint. Will have extra reference if found.
3431          */
3432         if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3433             (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3434             (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3435                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3436                     "tl_conn_res:bad accepting endpoint"));
3437                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3438                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3439                 freemsg(mp);
3440                 return;
3441         }
3442 
3443         /*
3444          * Prevent acceptor from closing.
3445          */
3446         if (! tl_noclose(acc_ep)) {
3447                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3448                     "tl_conn_res:bad accepting endpoint"));
3449                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3450                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3451                 tl_refrele(acc_ep);
3452                 freemsg(mp);
3453                 return;
3454         }
3455 
3456         acc_ep->te_flag |= TL_ACCEPTOR;
3457 
3458         /*
3459          * validate that accepting endpoint, if different from listening
3460          * has address bound => state is TS_IDLE
3461          * TROUBLE in XPG4 !!?
3462          */
3463         if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3464                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3465                     "tl_conn_res:accepting endpoint has no address bound,"
3466                     "state=%d", acc_ep->te_state));
3467                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3468                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3469                 freemsg(mp);
3470                 tl_closeok(acc_ep);
3471                 tl_refrele(acc_ep);
3472                 return;
3473         }
3474 
3475         /*
3476          * validate if accepting endpt same as listening, then
3477          * no other incoming connection should be on the queue
3478          */
3479 
3480         if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3481                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3482                     "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3483                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3485                 freemsg(mp);
3486                 tl_closeok(acc_ep);
3487                 tl_refrele(acc_ep);
3488                 return;
3489         }
3490 
3491         /*
3492          * Mark for deletion, the entry corresponding to client
3493          * on list of pending connections made by the listener
3494          *  search list to see if client is one of the
3495          * recorded as a listener.
3496          */
3497         tip = tl_icon_find(tep, cres->SEQ_number);
3498         if (tip == NULL) {
3499                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3500                     "tl_conn_res:no client in listener list"));
3501                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3502                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3503                 freemsg(mp);
3504                 tl_closeok(acc_ep);
3505                 tl_refrele(acc_ep);
3506                 return;
3507         }
3508 
3509         /*
3510          * If ti_tep is NULL the client has already closed. In this case
3511          * the code below will avoid any action on the client side
3512          * but complete the server and acceptor state transitions.
3513          */
3514         ASSERT(tip->ti_tep == NULL ||
3515             tip->ti_tep->te_seqno == cres->SEQ_number);
3516         cl_ep = tip->ti_tep;
3517 
3518         /*
3519          * If the client is present it is switched from listener's to acceptor's
3520          * serializer. We should block client closes while serializers are
3521          * being switched.
3522          *
3523          * It is possible that the client is present but is currently being
3524          * closed. There are two possible cases:
3525          *
3526          * 1) The client has already entered tl_close_finish_ser() and sent
3527          *    T_ORDREL_IND. In this case we can just ignore the client (but we
3528          *    still need to send all messages from tip->ti_mp to the acceptor).
3529          *
3530          * 2) The client started the close but has not entered
3531          *    tl_close_finish_ser() yet. In this case, the client is already
3532          *    proceeding asynchronously on the listener's serializer, so we're
3533          *    forced to change the acceptor to use the listener's serializer to
3534          *    ensure that any operations on the acceptor are serialized with
3535          *    respect to the close that's in-progress.
3536          */
3537         if (cl_ep != NULL) {
3538                 if (tl_noclose(cl_ep)) {
3539                         client_noclose_set = B_TRUE;
3540                 } else {
3541                         /*
3542                          * Client is closing. If it it has sent the
3543                          * T_ORDREL_IND, we can simply ignore it - otherwise,
3544                          * we have to let let the client continue until it is
3545                          * sent.
3546                          *
3547                          * If we do continue using the client, acceptor will
3548                          * switch to client's serializer which is used by client
3549                          * for its close.
3550                          */
3551                         tl_client_closing_when_accepting++;
3552                         switch_client_serializer = B_FALSE;
3553                         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3554                             cl_ep->te_state == -1)
3555                                 cl_ep = NULL;
3556                 }
3557         }
3558 
3559         if (cl_ep != NULL) {
3560                 /*
3561                  * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3562                  * (latter for sockets only)
3563                  */
3564                 if (cl_ep->te_state != TS_WCON_CREQ &&
3565                     (cl_ep->te_state != TS_DATA_XFER &&
3566                     IS_SOCKET(cl_ep))) {
3567                         err = ECONNREFUSED;
3568                         /*
3569                          * T_DISCON_IND sent later after committing memory
3570                          * and acking validity of request
3571                          */
3572                         (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3573                             "tl_conn_res:peer in bad state"));
3574                 }
3575 
3576                 /*
3577                  * preallocate now for T_DISCON_IND or T_CONN_CONN
3578                  * ack validity of request (T_OK_ACK) after memory committed
3579                  */
3580 
3581                 if (err)
3582                         size = sizeof (struct T_discon_ind);
3583                 else {
3584                         /*
3585                          * calculate length of T_CONN_CON message
3586                          */
3587                         olen = 0;
3588                         if (cl_ep->te_flag & TL_SETCRED) {
3589                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3590                                     OPTLEN(sizeof (tl_credopt_t));
3591                         } else if (cl_ep->te_flag & TL_SETUCRED) {
3592                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3593                                     OPTLEN(ucredminsize(acc_ep->te_credp));
3594                         }
3595                         size = T_ALIGN(sizeof (struct T_conn_con) +
3596                             acc_ep->te_alen) + olen;
3597                 }
3598                 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3599                         /*
3600                          * roll back state changes
3601                          */
3602                         tep->te_state = TS_WRES_CIND;
3603                         tl_memrecover(wq, mp, size);
3604                         freemsg(ackmp);
3605                         if (client_noclose_set)
3606                                 tl_closeok(cl_ep);
3607                         tl_closeok(acc_ep);
3608                         tl_refrele(acc_ep);
3609                         return;
3610                 }
3611                 mp = NULL;
3612         }
3613 
3614         /*
3615          * Now ack validity of request
3616          */
3617         if (tep->te_nicon == 1) {
3618                 if (tep == acc_ep)
3619                         tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3620                 else
3621                         tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3622         } else
3623                 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3624 
3625         /*
3626          * send T_DISCON_IND now if client state validation failed earlier
3627          */
3628         if (err) {
3629                 tl_ok_ack(wq, ackmp, prim);
3630                 /*
3631                  * flush the queues - why always ?
3632                  */
3633                 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3634 
3635                 dimp = tl_resizemp(respmp, size);
3636                 if (! dimp) {
3637                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3638                             SL_TRACE|SL_ERROR,
3639                             "tl_conn_res:con_ind:allocb failure"));
3640                         tl_merror(wq, respmp, ENOMEM);
3641                         tl_closeok(acc_ep);
3642                         if (client_noclose_set)
3643                                 tl_closeok(cl_ep);
3644                         tl_refrele(acc_ep);
3645                         return;
3646                 }
3647                 if (dimp->b_cont) {
3648                         /* no user data in provider generated discon ind */
3649                         freemsg(dimp->b_cont);
3650                         dimp->b_cont = NULL;
3651                 }
3652 
3653                 DB_TYPE(dimp) = M_PROTO;
3654                 di = (struct T_discon_ind *)dimp->b_rptr;
3655                 di->PRIM_type  = T_DISCON_IND;
3656                 di->DISCON_reason = err;
3657                 di->SEQ_number = BADSEQNUM;
3658 
3659                 tep->te_state = TS_IDLE;
3660                 /*
3661                  * send T_DISCON_IND message
3662                  */
3663                 putnext(acc_ep->te_rq, dimp);
3664                 if (client_noclose_set)
3665                         tl_closeok(cl_ep);
3666                 tl_closeok(acc_ep);
3667                 tl_refrele(acc_ep);
3668                 return;
3669         }
3670 
3671         /*
3672          * now start connecting the accepting endpoint
3673          */
3674         if (tep != acc_ep)
3675                 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3676 
3677         if (cl_ep == NULL) {
3678                 /*
3679                  * The client has already closed. Send up any queued messages
3680                  * and change the state accordingly.
3681                  */
3682                 tl_ok_ack(wq, ackmp, prim);
3683                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3684 
3685                 /*
3686                  * remove endpoint from incoming connection
3687                  * delete client from list of incoming connections
3688                  */
3689                 tl_freetip(tep, tip);
3690                 freemsg(mp);
3691                 tl_closeok(acc_ep);
3692                 tl_refrele(acc_ep);
3693                 return;
3694         } else if (tip->ti_mp != NULL) {
3695                 /*
3696                  * The client could have queued a T_DISCON_IND which needs
3697                  * to be sent up.
3698                  * Note that t_discon_req can not operate the same as
3699                  * t_data_req since it is not possible for it to putbq
3700                  * the message and return -1 due to the use of qwriter.
3701                  */
3702                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3703         }
3704 
3705         /*
3706          * prepare connect confirm T_CONN_CON message
3707          */
3708 
3709         /*
3710          * allocate the message - original data blocks
3711          * retained in the returned mblk
3712          */
3713         if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3714                 ccmp = tl_resizemp(respmp, size);
3715                 if (ccmp == NULL) {
3716                         tl_ok_ack(wq, ackmp, prim);
3717                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3718                             SL_TRACE|SL_ERROR,
3719                             "tl_conn_res:conn_con:allocb failure"));
3720                         tl_merror(wq, respmp, ENOMEM);
3721                         tl_closeok(acc_ep);
3722                         if (client_noclose_set)
3723                                 tl_closeok(cl_ep);
3724                         tl_refrele(acc_ep);
3725                         return;
3726                 }
3727 
3728                 DB_TYPE(ccmp) = M_PROTO;
3729                 cc = (struct T_conn_con *)ccmp->b_rptr;
3730                 cc->PRIM_type  = T_CONN_CON;
3731                 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3732                 cc->RES_length = acc_ep->te_alen;
3733                 addr_startp = ccmp->b_rptr + cc->RES_offset;
3734                 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3735                 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3736                         cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3737                             cc->RES_length);
3738                         cc->OPT_length = olen;
3739                         tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3740                             acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3741                             cl_ep->te_credp);
3742                 } else {
3743                         cc->OPT_offset = 0;
3744                         cc->OPT_length = 0;
3745                 }
3746                 /*
3747                  * Forward the credential in the packet so it can be picked up
3748                  * at the higher layers for more complete credential processing
3749                  */
3750                 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3751         } else {
3752                 freemsg(respmp);
3753                 respmp = NULL;
3754         }
3755 
3756         /*
3757          * make connection linking
3758          * accepting and client endpoints
3759          * No need to increment references:
3760          *      on client: it should already have one from tip->ti_tep linkage.
3761          *      on acceptor is should already have one from the table lookup.
3762          *
3763          * At this point both client and acceptor can't close. Set client
3764          * serializer to acceptor's.
3765          */
3766         ASSERT(cl_ep->te_refcnt >= 2);
3767         ASSERT(acc_ep->te_refcnt >= 2);
3768         ASSERT(cl_ep->te_conp == NULL);
3769         ASSERT(acc_ep->te_conp == NULL);
3770         cl_ep->te_conp = acc_ep;
3771         acc_ep->te_conp = cl_ep;
3772         ASSERT(cl_ep->te_ser == tep->te_ser);
3773         if (switch_client_serializer) {
3774                 mutex_enter(&cl_ep->te_ser_lock);
3775                 if (cl_ep->te_ser_count > 0) {
3776                         switch_client_serializer = B_FALSE;
3777                         tl_serializer_noswitch++;
3778                 } else {
3779                         /*
3780                          * Move client to the acceptor's serializer.
3781                          */
3782                         tl_serializer_refhold(acc_ep->te_ser);
3783                         tl_serializer_refrele(cl_ep->te_ser);
3784                         cl_ep->te_ser = acc_ep->te_ser;
3785                 }
3786                 mutex_exit(&cl_ep->te_ser_lock);
3787         }
3788         if (!switch_client_serializer) {
3789                 /*
3790                  * It is not possible to switch client to use acceptor's.
3791                  * Move acceptor to client's serializer (which is the same as
3792                  * listener's).
3793                  */
3794                 tl_serializer_refhold(cl_ep->te_ser);
3795                 tl_serializer_refrele(acc_ep->te_ser);
3796                 acc_ep->te_ser = cl_ep->te_ser;
3797         }
3798 
3799         TL_REMOVE_PEER(cl_ep->te_oconp);
3800         TL_REMOVE_PEER(acc_ep->te_oconp);
3801 
3802         /*
3803          * remove endpoint from incoming connection
3804          * delete client from list of incoming connections
3805          */
3806         tip->ti_tep = NULL;
3807         tl_freetip(tep, tip);
3808         tl_ok_ack(wq, ackmp, prim);
3809 
3810         /*
3811          * data blocks already linked in reallocb()
3812          */
3813 
3814         /*
3815          * link queues so that I_SENDFD will work
3816          */
3817         if (! IS_SOCKET(tep)) {
3818                 acc_ep->te_wq->q_next = cl_ep->te_rq;
3819                 cl_ep->te_wq->q_next = acc_ep->te_rq;
3820         }
3821 
3822         /*
3823          * send T_CONN_CON up on client side unless it was already
3824          * done (for a socket). In cases any data or ordrel req has been
3825          * queued make sure that the service procedure runs.
3826          */
3827         if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3828                 enableok(cl_ep->te_wq);
3829                 TL_QENABLE(cl_ep);
3830                 if (ccmp != NULL)
3831                         freemsg(ccmp);
3832         } else {
3833                 /*
3834                  * change client state on TE_CONN_CON event
3835                  */
3836                 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3837                 putnext(cl_ep->te_rq, ccmp);
3838         }
3839 
3840         /* Mark the both endpoints as accepted */
3841         cl_ep->te_flag |= TL_ACCEPTED;
3842         acc_ep->te_flag |= TL_ACCEPTED;
3843 
3844         /*
3845          * Allow client and acceptor to close.
3846          */
3847         tl_closeok(acc_ep);
3848         if (client_noclose_set)
3849                 tl_closeok(cl_ep);
3850 }
3851 
3852 
3853 
3854 
3855 static void
3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3857 {
3858         queue_t                 *wq;
3859         struct T_discon_req     *dr;
3860         ssize_t                 msz;
3861         tl_endpt_t              *peer_tep = tep->te_conp;
3862         tl_endpt_t              *srv_tep = tep->te_oconp;
3863         tl_icon_t               *tip;
3864         size_t                  size;
3865         mblk_t                  *ackmp, *dimp, *respmp;
3866         struct T_discon_ind     *di;
3867         t_scalar_t              save_state, new_state;
3868 
3869         if (tep->te_closing) {
3870                 freemsg(mp);
3871                 return;
3872         }
3873 
3874         if ((peer_tep != NULL) && peer_tep->te_closing) {
3875                 TL_UNCONNECT(tep->te_conp);
3876                 peer_tep = NULL;
3877         }
3878         if ((srv_tep != NULL) && srv_tep->te_closing) {
3879                 TL_UNCONNECT(tep->te_oconp);
3880                 srv_tep = NULL;
3881         }
3882 
3883         wq = tep->te_wq;
3884 
3885         /*
3886          * preallocate memory for:
3887          * 1. max of T_ERROR_ACK and T_OK_ACK
3888          *      ==> known max T_ERROR_ACK
3889          * 2. for  T_DISCON_IND
3890          */
3891         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3892         if (! ackmp) {
3893                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3894                 return;
3895         }
3896         /*
3897          * memory committed for T_OK_ACK/T_ERROR_ACK now
3898          * will be committed for T_DISCON_IND  later
3899          */
3900 
3901         dr = (struct T_discon_req *)mp->b_rptr;
3902         msz = MBLKL(mp);
3903 
3904         /*
3905          * validate the state
3906          */
3907         save_state = new_state = tep->te_state;
3908         if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3909             ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3910                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3911                     SL_TRACE|SL_ERROR,
3912                     "tl_wput:T_DISCON_REQ:out of state, state=%d",
3913                     tep->te_state));
3914                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3915                 freemsg(mp);
3916                 return;
3917         }
3918         /*
3919          * Defer committing the state change until it is determined if
3920          * the message will be queued with the tl_icon or not.
3921          */
3922         new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3923 
3924         /* validate the message */
3925         if (msz < sizeof (struct T_discon_req)) {
3926                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3927                     "tl_discon_req:invalid message"));
3928                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3929                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3930                 freemsg(mp);
3931                 return;
3932         }
3933 
3934         /*
3935          * if server, then validate that client exists
3936          * by connection sequence number etc.
3937          */
3938         if (tep->te_nicon > 0) { /* server */
3939 
3940                 /*
3941                  * search server list for disconnect client
3942                  */
3943                 tip = tl_icon_find(tep, dr->SEQ_number);
3944                 if (tip == NULL) {
3945                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
3946                             SL_TRACE|SL_ERROR,
3947                             "tl_discon_req:no disconnect endpoint"));
3948                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3949                         tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3950                         freemsg(mp);
3951                         return;
3952                 }
3953                 /*
3954                  * If ti_tep is NULL the client has already closed. In this case
3955                  * the code below will avoid any action on the client side.
3956                  */
3957 
3958                 IMPLY(tip->ti_tep != NULL,
3959                     tip->ti_tep->te_seqno == dr->SEQ_number);
3960                 peer_tep = tip->ti_tep;
3961         }
3962 
3963         /*
3964          * preallocate now for T_DISCON_IND
3965          * ack validity of request (T_OK_ACK) after memory committed
3966          */
3967         size = sizeof (struct T_discon_ind);
3968         if ((respmp = reallocb(mp, size, 0)) == NULL) {
3969                 tl_memrecover(wq, mp, size);
3970                 freemsg(ackmp);
3971                 return;
3972         }
3973 
3974         /*
3975          * prepare message to ack validity of request
3976          */
3977         if (tep->te_nicon == 0)
3978                 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3979         else
3980                 if (tep->te_nicon == 1)
3981                         new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3982                 else
3983                         new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3984 
3985         /*
3986          * Flushing queues according to TPI. Using the old state.
3987          */
3988         if ((tep->te_nicon <= 1) &&
3989             ((save_state == TS_DATA_XFER) ||
3990             (save_state == TS_WIND_ORDREL) ||
3991             (save_state == TS_WREQ_ORDREL)))
3992                 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3993 
3994         /* send T_OK_ACK up  */
3995         tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3996 
3997         /*
3998          * now do disconnect business
3999          */
4000         if (tep->te_nicon > 0) { /* listener */
4001                 if (peer_tep != NULL && !peer_tep->te_closing) {
4002                         /*
4003                          * disconnect incoming connect request pending to tep
4004                          */
4005                         if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4006                                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4007                                     SL_TRACE|SL_ERROR,
4008                                     "tl_discon_req: reallocb failed"));
4009                                 tep->te_state = new_state;
4010                                 tl_merror(wq, respmp, ENOMEM);
4011                                 return;
4012                         }
4013                         di = (struct T_discon_ind *)dimp->b_rptr;
4014                         di->SEQ_number = BADSEQNUM;
4015                         save_state = peer_tep->te_state;
4016                         peer_tep->te_state = TS_IDLE;
4017 
4018                         TL_REMOVE_PEER(peer_tep->te_oconp);
4019                         enableok(peer_tep->te_wq);
4020                         TL_QENABLE(peer_tep);
4021                 } else {
4022                         freemsg(respmp);
4023                         dimp = NULL;
4024                 }
4025 
4026                 /*
4027                  * remove endpoint from incoming connection list
4028                  * - remove disconnect client from list on server
4029                  */
4030                 tl_freetip(tep, tip);
4031         } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4032                 /*
4033                  * disconnect an outgoing request pending from tep
4034                  */
4035 
4036                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4037                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4038                             SL_TRACE|SL_ERROR,
4039                             "tl_discon_req: reallocb failed"));
4040                         tep->te_state = new_state;
4041                         tl_merror(wq, respmp, ENOMEM);
4042                         return;
4043                 }
4044                 di = (struct T_discon_ind *)dimp->b_rptr;
4045                 DB_TYPE(dimp) = M_PROTO;
4046                 di->PRIM_type  = T_DISCON_IND;
4047                 di->DISCON_reason = ECONNRESET;
4048                 di->SEQ_number = tep->te_seqno;
4049 
4050                 /*
4051                  * If this is a socket the T_DISCON_IND is queued with
4052                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4053                  * from the list of pending connections.
4054                  * Note that when te_oconp is set the peer better have
4055                  * a t_connind_t for the client.
4056                  */
4057                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4058                         /*
4059                          * No need to check that
4060                          * ti_tep == NULL since the T_DISCON_IND
4061                          * takes precedence over other queued
4062                          * messages.
4063                          */
4064                         tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4065                         peer_tep = NULL;
4066                         dimp = NULL;
4067                         /*
4068                          * Can't clear te_oconp since tl_co_unconnect needs
4069                          * it as a hint not to free the tep.
4070                          * Keep the state unchanged since tl_conn_res inspects
4071                          * it.
4072                          */
4073                         new_state = tep->te_state;
4074                 } else {
4075                         /* Found - delete it */
4076                         tip = tl_icon_find(peer_tep, tep->te_seqno);
4077                         if (tip != NULL) {
4078                                 ASSERT(tep == tip->ti_tep);
4079                                 save_state = peer_tep->te_state;
4080                                 if (peer_tep->te_nicon == 1)
4081                                         peer_tep->te_state =
4082                                             NEXTSTATE(TE_DISCON_IND2,
4083                                             peer_tep->te_state);
4084                                 else
4085                                         peer_tep->te_state =
4086                                             NEXTSTATE(TE_DISCON_IND3,
4087                                             peer_tep->te_state);
4088                                 tl_freetip(peer_tep, tip);
4089                         }
4090                         ASSERT(tep->te_oconp != NULL);
4091                         TL_UNCONNECT(tep->te_oconp);
4092                 }
4093         } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4094                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4095                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4096                             SL_TRACE|SL_ERROR,
4097                             "tl_discon_req: reallocb failed"));
4098                         tep->te_state = new_state;
4099                         tl_merror(wq, respmp, ENOMEM);
4100                         return;
4101                 }
4102                 di = (struct T_discon_ind *)dimp->b_rptr;
4103                 di->SEQ_number = BADSEQNUM;
4104 
4105                 save_state = peer_tep->te_state;
4106                 peer_tep->te_state = TS_IDLE;
4107         } else {
4108                 /* Not connected */
4109                 tep->te_state = new_state;
4110                 freemsg(respmp);
4111                 return;
4112         }
4113 
4114         /* Commit state changes */
4115         tep->te_state = new_state;
4116 
4117         if (peer_tep == NULL) {
4118                 ASSERT(dimp == NULL);
4119                 goto done;
4120         }
4121         /*
4122          * Flush queues on peer before sending up
4123          * T_DISCON_IND according to TPI
4124          */
4125 
4126         if ((save_state == TS_DATA_XFER) ||
4127             (save_state == TS_WIND_ORDREL) ||
4128             (save_state == TS_WREQ_ORDREL))
4129                 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4130 
4131         DB_TYPE(dimp) = M_PROTO;
4132         di->PRIM_type  = T_DISCON_IND;
4133         di->DISCON_reason = ECONNRESET;
4134 
4135         /*
4136          * data blocks already linked into dimp by reallocb()
4137          */
4138         /*
4139          * send indication message to peer user module
4140          */
4141         ASSERT(dimp != NULL);
4142         putnext(peer_tep->te_rq, dimp);
4143 done:
4144         if (tep->te_conp) {  /* disconnect pointers if connected */
4145                 ASSERT(! peer_tep->te_closing);
4146 
4147                 /*
4148                  * Messages may be queued on peer's write queue
4149                  * waiting to be processed by its write service
4150                  * procedure. Before the pointer to the peer transport
4151                  * structure is set to NULL, qenable the peer's write
4152                  * queue so that the queued up messages are processed.
4153                  */
4154                 if ((save_state == TS_DATA_XFER) ||
4155                     (save_state == TS_WIND_ORDREL) ||
4156                     (save_state == TS_WREQ_ORDREL))
4157                         TL_QENABLE(peer_tep);
4158                 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4159                 TL_UNCONNECT(peer_tep->te_conp);
4160                 if (! IS_SOCKET(tep)) {
4161                         /*
4162                          * unlink the streams
4163                          */
4164                         tep->te_wq->q_next = NULL;
4165                         peer_tep->te_wq->q_next = NULL;
4166                 }
4167                 TL_UNCONNECT(tep->te_conp);
4168         }
4169 }
4170 
4171 static void
4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4173 {
4174         if (!tep->te_closing)
4175                 tl_addr_req(mp, tep);
4176         else
4177                 freemsg(mp);
4178 
4179         tl_serializer_exit(tep);
4180         tl_refrele(tep);
4181 }
4182 
4183 static void
4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4185 {
4186         queue_t                 *wq;
4187         size_t                  ack_sz;
4188         mblk_t                  *ackmp;
4189         struct T_addr_ack       *taa;
4190 
4191         if (tep->te_closing) {
4192                 freemsg(mp);
4193                 return;
4194         }
4195 
4196         wq = tep->te_wq;
4197 
4198         /*
4199          * Note: T_ADDR_REQ message has only PRIM_type field
4200          * so it is already validated earlier.
4201          */
4202 
4203         if (IS_CLTS(tep) ||
4204             (tep->te_state > TS_WREQ_ORDREL) ||
4205             (tep->te_state < TS_DATA_XFER)) {
4206                 /*
4207                  * Either connectionless or connection oriented but not
4208                  * in connected data transfer state or half-closed states.
4209                  */
4210                 ack_sz = sizeof (struct T_addr_ack);
4211                 if (tep->te_state >= TS_IDLE)
4212                         /* is bound */
4213                         ack_sz += tep->te_alen;
4214                 ackmp = reallocb(mp, ack_sz, 0);
4215                 if (ackmp == NULL) {
4216                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4217                             SL_TRACE|SL_ERROR,
4218                             "tl_addr_req: reallocb failed"));
4219                         tl_memrecover(wq, mp, ack_sz);
4220                         return;
4221                 }
4222 
4223                 taa = (struct T_addr_ack *)ackmp->b_rptr;
4224 
4225                 bzero(taa, sizeof (struct T_addr_ack));
4226 
4227                 taa->PRIM_type = T_ADDR_ACK;
4228                 ackmp->b_datap->db_type = M_PCPROTO;
4229                 ackmp->b_wptr = (uchar_t *)&taa[1];
4230 
4231                 if (tep->te_state >= TS_IDLE) {
4232                         /* endpoint is bound */
4233                         taa->LOCADDR_length = tep->te_alen;
4234                         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4235 
4236                         bcopy(tep->te_abuf, ackmp->b_wptr,
4237                             tep->te_alen);
4238                         ackmp->b_wptr += tep->te_alen;
4239                         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4240                 }
4241 
4242                 (void) qreply(wq, ackmp);
4243         } else {
4244                 ASSERT(tep->te_state == TS_DATA_XFER ||
4245                     tep->te_state == TS_WIND_ORDREL ||
4246                     tep->te_state == TS_WREQ_ORDREL);
4247                 /* connection oriented in data transfer */
4248                 tl_connected_cots_addr_req(mp, tep);
4249         }
4250 }
4251 
4252 
4253 static void
4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4255 {
4256         tl_endpt_t              *peer_tep = tep->te_conp;
4257         size_t                  ack_sz;
4258         mblk_t                  *ackmp;
4259         struct T_addr_ack       *taa;
4260         uchar_t                 *addr_startp;
4261 
4262         if (tep->te_closing) {
4263                 freemsg(mp);
4264                 return;
4265         }
4266 
4267         if (peer_tep == NULL || peer_tep->te_closing) {
4268                 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4269                 return;
4270         }
4271 
4272         ASSERT(tep->te_state >= TS_IDLE);
4273 
4274         ack_sz = sizeof (struct T_addr_ack);
4275         ack_sz += T_ALIGN(tep->te_alen);
4276         ack_sz += peer_tep->te_alen;
4277 
4278         ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4279         if (ackmp == NULL) {
4280                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4281                     "tl_connected_cots_addr_req: reallocb failed"));
4282                 tl_memrecover(tep->te_wq, mp, ack_sz);
4283                 return;
4284         }
4285 
4286         taa = (struct T_addr_ack *)ackmp->b_rptr;
4287 
4288         /* endpoint is bound */
4289         taa->LOCADDR_length = tep->te_alen;
4290         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4291 
4292         addr_startp = (uchar_t *)&taa[1];
4293 
4294         bcopy(tep->te_abuf, addr_startp,
4295             tep->te_alen);
4296 
4297         taa->REMADDR_length = peer_tep->te_alen;
4298         taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4299             taa->LOCADDR_length);
4300         addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4301         bcopy(peer_tep->te_abuf, addr_startp,
4302             peer_tep->te_alen);
4303         ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4304             taa->REMADDR_offset + peer_tep->te_alen;
4305         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4306 
4307         putnext(tep->te_rq, ackmp);
4308 }
4309 
4310 static void
4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4312 {
4313         if (IS_CLTS(tep)) {
4314                 *ia = tl_clts_info_ack;
4315                 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4316         } else {
4317                 *ia = tl_cots_info_ack;
4318                 if (IS_COTSORD(tep))
4319                         ia->SERV_type = T_COTS_ORD;
4320         }
4321         ia->TIDU_size = tl_tidusz;
4322         ia->CURRENT_state = tep->te_state;
4323 }
4324 
4325 /*
4326  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4327  * tl_wput.
4328  */
4329 static void
4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4331 {
4332         mblk_t                  *ackmp;
4333         t_uscalar_t             cap_bits1;
4334         struct T_capability_ack *tcap;
4335 
4336         if (tep->te_closing) {
4337                 freemsg(mp);
4338                 return;
4339         }
4340 
4341         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4342 
4343         ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4344             M_PCPROTO, T_CAPABILITY_ACK);
4345         if (ackmp == NULL) {
4346                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4347                     "tl_capability_req: reallocb failed"));
4348                 tl_memrecover(tep->te_wq, mp,
4349                     sizeof (struct T_capability_ack));
4350                 return;
4351         }
4352 
4353         tcap = (struct T_capability_ack *)ackmp->b_rptr;
4354         tcap->CAP_bits1 = 0;
4355 
4356         if (cap_bits1 & TC1_INFO) {
4357                 tl_copy_info(&tcap->INFO_ack, tep);
4358                 tcap->CAP_bits1 |= TC1_INFO;
4359         }
4360 
4361         if (cap_bits1 & TC1_ACCEPTOR_ID) {
4362                 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4363                 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4364         }
4365 
4366         putnext(tep->te_rq, ackmp);
4367 }
4368 
4369 static void
4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4371 {
4372         if (! tep->te_closing)
4373                 tl_info_req(mp, tep);
4374         else
4375                 freemsg(mp);
4376 
4377         tl_serializer_exit(tep);
4378         tl_refrele(tep);
4379 }
4380 
4381 static void
4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4383 {
4384         mblk_t *ackmp;
4385 
4386         ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4387             M_PCPROTO, T_INFO_ACK);
4388         if (ackmp == NULL) {
4389                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4390                     "tl_info_req: reallocb failed"));
4391                 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4392                 return;
4393         }
4394 
4395         /*
4396          * fill in T_INFO_ACK contents
4397          */
4398         tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4399 
4400         /*
4401          * send ack message
4402          */
4403         putnext(tep->te_rq, ackmp);
4404 }
4405 
4406 /*
4407  * Handle M_DATA, T_data_req and T_optdata_req.
4408  * If this is a socket pass through T_optdata_req options unmodified.
4409  */
4410 static void
4411 tl_data(mblk_t *mp, tl_endpt_t *tep)
4412 {
4413         queue_t                 *wq = tep->te_wq;
4414         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4415         ssize_t                 msz = MBLKL(mp);
4416         tl_endpt_t              *peer_tep;
4417         queue_t                 *peer_rq;
4418         boolean_t               closing = tep->te_closing;
4419 
4420         if (IS_CLTS(tep)) {
4421                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4422                     SL_TRACE|SL_ERROR,
4423                     "tl_wput:clts:unattached M_DATA"));
4424                 if (!closing) {
4425                         tl_merror(wq, mp, EPROTO);
4426                 } else {
4427                         freemsg(mp);
4428                 }
4429                 return;
4430         }
4431 
4432         /*
4433          * If the endpoint is closing it should still forward any data to the
4434          * peer (if it has one). If it is not allowed to forward it can just
4435          * free the message.
4436          */
4437         if (closing &&
4438             (tep->te_state != TS_DATA_XFER) &&
4439             (tep->te_state != TS_WREQ_ORDREL)) {
4440                 freemsg(mp);
4441                 return;
4442         }
4443 
4444         if (DB_TYPE(mp) == M_PROTO) {
4445                 if (prim->type == T_DATA_REQ &&
4446                     msz < sizeof (struct T_data_req)) {
4447                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4448                                 SL_TRACE|SL_ERROR,
4449                                 "tl_data:T_DATA_REQ:invalid message"));
4450                         if (!closing) {
4451                                 tl_merror(wq, mp, EPROTO);
4452                         } else {
4453                                 freemsg(mp);
4454                         }
4455                         return;
4456                 } else if (prim->type == T_OPTDATA_REQ &&
4457                     (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4458                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4459                             SL_TRACE|SL_ERROR,
4460                             "tl_data:T_OPTDATA_REQ:invalid message"));
4461                         if (!closing) {
4462                                 tl_merror(wq, mp, EPROTO);
4463                         } else {
4464                                 freemsg(mp);
4465                         }
4466                         return;
4467                 }
4468         }
4469 
4470         /*
4471          * connection oriented provider
4472          */
4473         switch (tep->te_state) {
4474         case TS_IDLE:
4475                 /*
4476                  * Other end not here - do nothing.
4477                  */
4478                 freemsg(mp);
4479                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4480                     "tl_data:cots with endpoint idle"));
4481                 return;
4482 
4483         case TS_DATA_XFER:
4484                 /* valid states */
4485                 if (tep->te_conp != NULL)
4486                         break;
4487 
4488                 if (tep->te_oconp == NULL) {
4489                         if (!closing) {
4490                                 tl_merror(wq, mp, EPROTO);
4491                         } else {
4492                                 freemsg(mp);
4493                         }
4494                         return;
4495                 }
4496                 /*
4497                  * For a socket the T_CONN_CON is sent early thus
4498                  * the peer might not yet have accepted the connection.
4499                  * If we are closing queue the packet with the T_CONN_IND.
4500                  * Otherwise defer processing the packet until the peer
4501                  * accepts the connection.
4502                  * Note that the queue is noenabled when we go into this
4503                  * state.
4504                  */
4505                 if (!closing) {
4506                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4507                             SL_TRACE|SL_ERROR,
4508                             "tl_data: ocon"));
4509                         TL_PUTBQ(tep, mp);
4510                         return;
4511                 }
4512                 if (DB_TYPE(mp) == M_PROTO) {
4513                         if (msz < sizeof (t_scalar_t)) {
4514                                 freemsg(mp);
4515                                 return;
4516                         }
4517                         /* reuse message block - just change REQ to IND */
4518                         if (prim->type == T_DATA_REQ)
4519                                 prim->type = T_DATA_IND;
4520                         else
4521                                 prim->type = T_OPTDATA_IND;
4522                 }
4523                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4524                 return;
4525 
4526         case TS_WREQ_ORDREL:
4527                 if (tep->te_conp == NULL) {
4528                         /*
4529                          * Other end closed - generate discon_ind
4530                          * with reason 0 to cause an EPIPE but no
4531                          * read side error on AF_UNIX sockets.
4532                          */
4533                         freemsg(mp);
4534                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4535                             SL_TRACE|SL_ERROR,
4536                             "tl_data: WREQ_ORDREL and no peer"));
4537                         tl_discon_ind(tep, 0);
4538                         return;
4539                 }
4540                 break;
4541 
4542         default:
4543                 /* invalid state for event TE_DATA_REQ */
4544                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4545                     "tl_data:cots:out of state"));
4546                 tl_merror(wq, mp, EPROTO);
4547                 return;
4548         }
4549         /*
4550          * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4551          * (State stays same on this event)
4552          */
4553 
4554         /*
4555          * get connected endpoint
4556          */
4557         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4558                 freemsg(mp);
4559                 /* Peer closed */
4560                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4561                     "tl_data: peer gone"));
4562                 return;
4563         }
4564 
4565         ASSERT(tep->te_serializer == peer_tep->te_serializer);
4566         peer_rq = peer_tep->te_rq;
4567 
4568         /*
4569          * Put it back if flow controlled
4570          * Note: Messages already on queue when we are closing is bounded
4571          * so we can ignore flow control.
4572          */
4573         if (!canputnext(peer_rq) && !closing) {
4574                 TL_PUTBQ(tep, mp);
4575                 return;
4576         }
4577 
4578         /*
4579          * validate peer state
4580          */
4581         switch (peer_tep->te_state) {
4582         case TS_DATA_XFER:
4583         case TS_WIND_ORDREL:
4584                 /* valid states */
4585                 break;
4586         default:
4587                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4588                     "tl_data:rx side:invalid state"));
4589                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4590                 return;
4591         }
4592         if (DB_TYPE(mp) == M_PROTO) {
4593                 /* reuse message block - just change REQ to IND */
4594                 if (prim->type == T_DATA_REQ)
4595                         prim->type = T_DATA_IND;
4596                 else
4597                         prim->type = T_OPTDATA_IND;
4598         }
4599         /*
4600          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4601          * (peer state stays same on this event)
4602          */
4603         /*
4604          * send data to connected peer
4605          */
4606         putnext(peer_rq, mp);
4607 }
4608 
4609 
4610 
4611 static void
4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4613 {
4614         queue_t                 *wq = tep->te_wq;
4615         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4616         ssize_t                 msz = MBLKL(mp);
4617         tl_endpt_t              *peer_tep;
4618         queue_t                 *peer_rq;
4619         boolean_t               closing = tep->te_closing;
4620 
4621         if (msz < sizeof (struct T_exdata_req)) {
4622                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4623                     "tl_exdata:invalid message"));
4624                 if (!closing) {
4625                         tl_merror(wq, mp, EPROTO);
4626                 } else {
4627                         freemsg(mp);
4628                 }
4629                 return;
4630         }
4631 
4632         /*
4633          * If the endpoint is closing it should still forward any data to the
4634          * peer (if it has one). If it is not allowed to forward it can just
4635          * free the message.
4636          */
4637         if (closing &&
4638             (tep->te_state != TS_DATA_XFER) &&
4639             (tep->te_state != TS_WREQ_ORDREL)) {
4640                 freemsg(mp);
4641                 return;
4642         }
4643 
4644         /*
4645          * validate state
4646          */
4647         switch (tep->te_state) {
4648         case TS_IDLE:
4649                 /*
4650                  * Other end not here - do nothing.
4651                  */
4652                 freemsg(mp);
4653                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4654                     "tl_exdata:cots with endpoint idle"));
4655                 return;
4656 
4657         case TS_DATA_XFER:
4658                 /* valid states */
4659                 if (tep->te_conp != NULL)
4660                         break;
4661 
4662                 if (tep->te_oconp == NULL) {
4663                         if (!closing) {
4664                                 tl_merror(wq, mp, EPROTO);
4665                         } else {
4666                                 freemsg(mp);
4667                         }
4668                         return;
4669                 }
4670                 /*
4671                  * For a socket the T_CONN_CON is sent early thus
4672                  * the peer might not yet have accepted the connection.
4673                  * If we are closing queue the packet with the T_CONN_IND.
4674                  * Otherwise defer processing the packet until the peer
4675                  * accepts the connection.
4676                  * Note that the queue is noenabled when we go into this
4677                  * state.
4678                  */
4679                 if (!closing) {
4680                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4681                             SL_TRACE|SL_ERROR,
4682                             "tl_exdata: ocon"));
4683                         TL_PUTBQ(tep, mp);
4684                         return;
4685                 }
4686                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4687                     "tl_exdata: closing socket ocon"));
4688                 prim->type = T_EXDATA_IND;
4689                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4690                 return;
4691 
4692         case TS_WREQ_ORDREL:
4693                 if (tep->te_conp == NULL) {
4694                         /*
4695                          * Other end closed - generate discon_ind
4696                          * with reason 0 to cause an EPIPE but no
4697                          * read side error on AF_UNIX sockets.
4698                          */
4699                         freemsg(mp);
4700                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4701                             SL_TRACE|SL_ERROR,
4702                             "tl_exdata: WREQ_ORDREL and no peer"));
4703                         tl_discon_ind(tep, 0);
4704                         return;
4705                 }
4706                 break;
4707 
4708         default:
4709                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4710                     SL_TRACE|SL_ERROR,
4711                     "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4712                     tep->te_state));
4713                 tl_merror(wq, mp, EPROTO);
4714                 return;
4715         }
4716         /*
4717          * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4718          * (state stays same on this event)
4719          */
4720 
4721         /*
4722          * get connected endpoint
4723          */
4724         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4725                 freemsg(mp);
4726                 /* Peer closed */
4727                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4728                     "tl_exdata: peer gone"));
4729                 return;
4730         }
4731 
4732         peer_rq = peer_tep->te_rq;
4733 
4734         /*
4735          * Put it back if flow controlled
4736          * Note: Messages already on queue when we are closing is bounded
4737          * so we can ignore flow control.
4738          */
4739         if (!canputnext(peer_rq) && !closing) {
4740                 TL_PUTBQ(tep, mp);
4741                 return;
4742         }
4743 
4744         /*
4745          * validate state on peer
4746          */
4747         switch (peer_tep->te_state) {
4748         case TS_DATA_XFER:
4749         case TS_WIND_ORDREL:
4750                 /* valid states */
4751                 break;
4752         default:
4753                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4754                     "tl_exdata:rx side:invalid state"));
4755                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4756                 return;
4757         }
4758         /*
4759          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4760          * (peer state stays same on this event)
4761          */
4762         /*
4763          * reuse message block
4764          */
4765         prim->type = T_EXDATA_IND;
4766 
4767         /*
4768          * send data to connected peer
4769          */
4770         putnext(peer_rq, mp);
4771 }
4772 
4773 
4774 
4775 static void
4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4777 {
4778         queue_t                 *wq =  tep->te_wq;
4779         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4780         ssize_t                 msz = MBLKL(mp);
4781         tl_endpt_t              *peer_tep;
4782         queue_t                 *peer_rq;
4783         boolean_t               closing = tep->te_closing;
4784 
4785         if (msz < sizeof (struct T_ordrel_req)) {
4786                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4787                     "tl_ordrel:invalid message"));
4788                 if (!closing) {
4789                         tl_merror(wq, mp, EPROTO);
4790                 } else {
4791                         freemsg(mp);
4792                 }
4793                 return;
4794         }
4795 
4796         /*
4797          * validate state
4798          */
4799         switch (tep->te_state) {
4800         case TS_DATA_XFER:
4801         case TS_WREQ_ORDREL:
4802                 /* valid states */
4803                 if (tep->te_conp != NULL)
4804                         break;
4805 
4806                 if (tep->te_oconp == NULL)
4807                         break;
4808 
4809                 /*
4810                  * For a socket the T_CONN_CON is sent early thus
4811                  * the peer might not yet have accepted the connection.
4812                  * If we are closing queue the packet with the T_CONN_IND.
4813                  * Otherwise defer processing the packet until the peer
4814                  * accepts the connection.
4815                  * Note that the queue is noenabled when we go into this
4816                  * state.
4817                  */
4818                 if (!closing) {
4819                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4820                             SL_TRACE|SL_ERROR,
4821                             "tl_ordlrel: ocon"));
4822                         TL_PUTBQ(tep, mp);
4823                         return;
4824                 }
4825                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4826                     "tl_ordlrel: closing socket ocon"));
4827                 prim->type = T_ORDREL_IND;
4828                 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4829                 return;
4830 
4831         default:
4832                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4833                     SL_TRACE|SL_ERROR,
4834                     "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4835                     tep->te_state));
4836                 if (!closing) {
4837                         tl_merror(wq, mp, EPROTO);
4838                 } else {
4839                         freemsg(mp);
4840                 }
4841                 return;
4842         }
4843         tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4844 
4845         /*
4846          * get connected endpoint
4847          */
4848         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4849                 /* Peer closed */
4850                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4851                     "tl_ordrel: peer gone"));
4852                 freemsg(mp);
4853                 return;
4854         }
4855 
4856         peer_rq = peer_tep->te_rq;
4857 
4858         /*
4859          * Put it back if flow controlled except when we are closing.
4860          * Note: Messages already on queue when we are closing is bounded
4861          * so we can ignore flow control.
4862          */
4863         if (! canputnext(peer_rq) && !closing) {
4864                 TL_PUTBQ(tep, mp);
4865                 return;
4866         }
4867 
4868         /*
4869          * validate state on peer
4870          */
4871         switch (peer_tep->te_state) {
4872         case TS_DATA_XFER:
4873         case TS_WIND_ORDREL:
4874                 /* valid states */
4875                 break;
4876         default:
4877                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4878                     "tl_ordrel:rx side:invalid state"));
4879                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4880                 return;
4881         }
4882         peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4883 
4884         /*
4885          * reuse message block
4886          */
4887         prim->type = T_ORDREL_IND;
4888         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4889             "tl_ordrel: send ordrel_ind"));
4890 
4891         /*
4892          * send data to connected peer
4893          */
4894         putnext(peer_rq, mp);
4895 }
4896 
4897 
4898 /*
4899  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4900  */
4901 static void
4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4903 {
4904         size_t                  err_sz;
4905         tl_endpt_t              *tep;
4906         struct T_unitdata_req   *udreq;
4907         mblk_t                  *err_mp;
4908         t_scalar_t              alen;
4909         t_scalar_t              olen;
4910         struct T_uderror_ind    *uderr;
4911         uchar_t                 *addr_startp;
4912 
4913         err_sz = sizeof (struct T_uderror_ind);
4914         tep = (tl_endpt_t *)wq->q_ptr;
4915         udreq = (struct T_unitdata_req *)mp->b_rptr;
4916         alen = udreq->DEST_length;
4917         olen = udreq->OPT_length;
4918 
4919         if (alen > 0)
4920                 err_sz = T_ALIGN(err_sz + alen);
4921         if (olen > 0)
4922                 err_sz += olen;
4923 
4924         err_mp = allocb(err_sz, BPRI_MED);
4925         if (! err_mp) {
4926                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4927                     "tl_uderr:allocb failure"));
4928                 /*
4929                  * Note: no rollback of state needed as it does
4930                  * not change in connectionless transport
4931                  */
4932                 tl_memrecover(wq, mp, err_sz);
4933                 return;
4934         }
4935 
4936         DB_TYPE(err_mp) = M_PROTO;
4937         err_mp->b_wptr = err_mp->b_rptr + err_sz;
4938         uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4939         uderr->PRIM_type = T_UDERROR_IND;
4940         uderr->ERROR_type = err;
4941         uderr->DEST_length = alen;
4942         uderr->OPT_length = olen;
4943         if (alen <= 0) {
4944                 uderr->DEST_offset = 0;
4945         } else {
4946                 uderr->DEST_offset =
4947                     (t_scalar_t)sizeof (struct T_uderror_ind);
4948                 addr_startp  = mp->b_rptr + udreq->DEST_offset;
4949                 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4950                     (size_t)alen);
4951         }
4952         if (olen <= 0) {
4953                 uderr->OPT_offset = 0;
4954         } else {
4955                 uderr->OPT_offset =
4956                     (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4957                     uderr->DEST_length);
4958                 addr_startp  = mp->b_rptr + udreq->OPT_offset;
4959                 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4960                     (size_t)olen);
4961         }
4962         freemsg(mp);
4963 
4964         /*
4965          * send indication message
4966          */
4967         tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4968 
4969         qreply(wq, err_mp);
4970 }
4971 
4972 static void
4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4974 {
4975         queue_t *wq = tep->te_wq;
4976 
4977         if (!tep->te_closing && (wq->q_first != NULL)) {
4978                 TL_PUTQ(tep, mp);
4979         } else if (tep->te_rq != NULL)
4980                 tl_unitdata(mp, tep);
4981         else
4982                 freemsg(mp);
4983 
4984         tl_serializer_exit(tep);
4985         tl_refrele(tep);
4986 }
4987 
4988 /*
4989  * Handle T_unitdata_req.
4990  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4991  * If this is a socket pass through options unmodified.
4992  */
4993 static void
4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4995 {
4996         queue_t                 *wq = tep->te_wq;
4997         soux_addr_t             ux_addr;
4998         tl_addr_t               destaddr;
4999         uchar_t                 *addr_startp;
5000         tl_endpt_t              *peer_tep;
5001         struct T_unitdata_ind   *udind;
5002         struct T_unitdata_req   *udreq;
5003         ssize_t                 msz, ui_sz, reuse_mb_sz;
5004         t_scalar_t              alen, aoff, olen, ooff;
5005         t_scalar_t              oldolen = 0;
5006         cred_t                  *cr = NULL;
5007         pid_t                   cpid;
5008 
5009         udreq = (struct T_unitdata_req *)mp->b_rptr;
5010         msz = MBLKL(mp);
5011 
5012         /*
5013          * validate the state
5014          */
5015         if (tep->te_state != TS_IDLE) {
5016                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5017                     SL_TRACE|SL_ERROR,
5018                     "tl_wput:T_CONN_REQ:out of state"));
5019                 tl_merror(wq, mp, EPROTO);
5020                 return;
5021         }
5022         /*
5023          * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5024          * (state does not change on this event)
5025          */
5026 
5027         /*
5028          * validate the message
5029          * Note: dereference fields in struct inside message only
5030          * after validating the message length.
5031          */
5032         if (msz < sizeof (struct T_unitdata_req)) {
5033                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5034                     "tl_unitdata:invalid message length"));
5035                 tl_merror(wq, mp, EINVAL);
5036                 return;
5037         }
5038         alen = udreq->DEST_length;
5039         aoff = udreq->DEST_offset;
5040         oldolen = olen = udreq->OPT_length;
5041         ooff = udreq->OPT_offset;
5042         if (olen == 0)
5043                 ooff = 0;
5044 
5045         if (IS_SOCKET(tep)) {
5046                 if ((alen != TL_SOUX_ADDRLEN) ||
5047                     (aoff < 0) ||
5048                     (aoff + alen > msz) ||
5049                     (olen < 0) || (ooff < 0) ||
5050                     ((olen > 0) && ((ooff + olen) > msz))) {
5051                         (void) (STRLOG(TL_ID, tep->te_minor,
5052                             1, SL_TRACE|SL_ERROR,
5053                             "tl_unitdata_req: invalid socket addr "
5054                             "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5055                             (int)msz, alen, aoff, olen, ooff));
5056                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5057                         return;
5058                 }
5059                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5060 
5061                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5062                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5063                         (void) (STRLOG(TL_ID, tep->te_minor,
5064                             1, SL_TRACE|SL_ERROR,
5065                             "tl_conn_req: invalid socket magic"));
5066                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5067                         return;
5068                 }
5069         } else {
5070                 if ((alen < 0) ||
5071                     (aoff < 0) ||
5072                     ((alen > 0) && ((aoff + alen) > msz)) ||
5073                     ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5074                     ((aoff + alen) < 0) ||
5075                     ((olen > 0) && ((ooff + olen) > msz)) ||
5076                     (olen < 0) ||
5077                     (ooff < 0) ||
5078                     ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5079                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5080                                     SL_TRACE|SL_ERROR,
5081                                     "tl_unitdata:invalid unit data message"));
5082                         tl_merror(wq, mp, EINVAL);
5083                         return;
5084                 }
5085         }
5086 
5087         /* Options not supported unless it's a socket */
5088         if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5089                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5090                     "tl_unitdata:option use(unsupported) or zero len addr"));
5091                 tl_uderr(wq, mp, EPROTO);
5092                 return;
5093         }
5094 #ifdef DEBUG
5095         /*
5096          * Mild form of ASSERT()ion to detect broken TPI apps.
5097          * if (! assertion)
5098          *      log warning;
5099          */
5100         if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5101                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5102                     "tl_unitdata:addr overlaps TPI message"));
5103         }
5104 #endif
5105         /*
5106          * get destination endpoint
5107          */
5108         destaddr.ta_alen = alen;
5109         destaddr.ta_abuf = mp->b_rptr + aoff;
5110         destaddr.ta_zoneid = tep->te_zoneid;
5111 
5112         /*
5113          * Check whether the destination is the same that was used previously
5114          * and the destination endpoint is in the right state. If something is
5115          * wrong, find destination again and cache it.
5116          */
5117         peer_tep = tep->te_lastep;
5118 
5119         if ((peer_tep == NULL) || peer_tep->te_closing ||
5120             (peer_tep->te_state != TS_IDLE) ||
5121             !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5122                 /*
5123                  * Not the same as cached destination , need to find the right
5124                  * destination.
5125                  */
5126                 peer_tep = (IS_SOCKET(tep) ?
5127                     tl_sock_find_peer(tep, &ux_addr) :
5128                     tl_find_peer(tep, &destaddr));
5129 
5130                 if (peer_tep == NULL) {
5131                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5132                             SL_TRACE|SL_ERROR,
5133                             "tl_unitdata:no one at destination address"));
5134                         tl_uderr(wq, mp, ECONNRESET);
5135                         return;
5136                 }
5137 
5138                 /*
5139                  * Cache the new peer.
5140                  */
5141                 if (tep->te_lastep != NULL)
5142                         tl_refrele(tep->te_lastep);
5143 
5144                 tep->te_lastep = peer_tep;
5145         }
5146 
5147         if (peer_tep->te_state != TS_IDLE) {
5148                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5149                     "tl_unitdata:provider in invalid state"));
5150                 tl_uderr(wq, mp, EPROTO);
5151                 return;
5152         }
5153 
5154         ASSERT(peer_tep->te_rq != NULL);
5155 
5156         /*
5157          * Put it back if flow controlled except when we are closing.
5158          * Note: Messages already on queue when we are closing is bounded
5159          * so we can ignore flow control.
5160          */
5161         if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5162                 /* record what we are flow controlled on */
5163                 if (tep->te_flowq != NULL) {
5164                         list_remove(&tep->te_flowq->te_flowlist, tep);
5165                 }
5166                 list_insert_head(&peer_tep->te_flowlist, tep);
5167                 tep->te_flowq = peer_tep;
5168                 TL_PUTBQ(tep, mp);
5169                 return;
5170         }
5171         /*
5172          * prepare indication message
5173          */
5174 
5175         /*
5176          * calculate length of message
5177          */
5178         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5179                 cr = msg_getcred(mp, &cpid);
5180                 ASSERT(cr != NULL);
5181 
5182                 if (peer_tep->te_flag & TL_SETCRED) {
5183                         ASSERT(olen == 0);
5184                         olen = (t_scalar_t)sizeof (struct opthdr) +
5185                             OPTLEN(sizeof (tl_credopt_t));
5186                                                 /* 1 option only */
5187                 } else if (peer_tep->te_flag & TL_SETUCRED) {
5188                         ASSERT(olen == 0);
5189                         olen = (t_scalar_t)sizeof (struct opthdr) +
5190                             OPTLEN(ucredminsize(cr));
5191                                                 /* 1 option only */
5192                 } else {
5193                         /* Possibly more than one option */
5194                         olen += (t_scalar_t)sizeof (struct T_opthdr) +
5195                             OPTLEN(ucredminsize(cr));
5196                 }
5197         }
5198 
5199         ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5200         reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5201 
5202         /*
5203          * If the unitdata_ind fits and we are not adding options
5204          * reuse the udreq mblk.
5205          *
5206          * Otherwise, it is possible we need to append an option if one of the
5207          * te_flag bits is set. This requires extra space in the data block for
5208          * the additional option but the traditional technique used below to
5209          * allocate a new block and copy into it will not work when there is a
5210          * message block with a free pointer (since we don't know anything
5211          * about the layout of the data, pointers referencing or within the
5212          * data, etc.). To handle this possibility the upper layers may have
5213          * preallocated some space to use for appending an option. We check the
5214          * overall mblock size against the size we need ('reuse_mb_sz' with the
5215          * original address length [alen] to ensure we won't overrun the
5216          * current mblk data size) to see if there is free space and thus
5217          * avoid allocating a new message block.
5218          */
5219         if (msz >= ui_sz && alen >= tep->te_alen &&
5220             !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5221                 /*
5222                  * Reuse the original mblk. Leave options in place.
5223                  */
5224                 udind =  (struct T_unitdata_ind *)mp->b_rptr;
5225                 udind->PRIM_type = T_UNITDATA_IND;
5226                 udind->SRC_length = tep->te_alen;
5227                 addr_startp = mp->b_rptr + udind->SRC_offset;
5228                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5229 
5230         } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5231             mp->b_datap->db_frtnp != NULL) {
5232                 /*
5233                  * We have a message block with a free pointer, but extra space
5234                  * has been pre-allocated for us in case we need to append an
5235                  * option. Reuse the original mblk, leaving existing options in
5236                  * place.
5237                  */
5238                 udind =  (struct T_unitdata_ind *)mp->b_rptr;
5239                 udind->PRIM_type = T_UNITDATA_IND;
5240                 udind->SRC_length = tep->te_alen;
5241                 addr_startp = mp->b_rptr + udind->SRC_offset;
5242                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5243 
5244                 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5245                         ASSERT(cr != NULL);
5246                         /*
5247                          * We're appending one new option here after the
5248                          * original ones.
5249                          */
5250                         tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5251                             cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5252                 }
5253 
5254         } else if (mp->b_datap->db_frtnp != NULL) {
5255                 /*
5256                  * The next block creates a new mp and tries to copy the data
5257                  * block into it, but that cannot handle a message with a free
5258                  * pointer (for more details see the comment in kstrputmsg()
5259                  * where dupmsg() is called). Since we can never properly
5260                  * duplicate the mp while also extending the data, just error
5261                  * out now.
5262                  */
5263                 tl_uderr(wq, mp, EPROTO);
5264                 return;
5265         } else {
5266                 /* Allocate a new T_unitdata_ind message */
5267                 mblk_t *ui_mp;
5268 
5269                 ui_mp = allocb(ui_sz, BPRI_MED);
5270                 if (! ui_mp) {
5271                         (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5272                             "tl_unitdata:allocb failure:message queued"));
5273                         tl_memrecover(wq, mp, ui_sz);
5274                         return;
5275                 }
5276 
5277                 /*
5278                  * fill in T_UNITDATA_IND contents
5279                  */
5280                 DB_TYPE(ui_mp) = M_PROTO;
5281                 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5282                 udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5283                 udind->PRIM_type = T_UNITDATA_IND;
5284                 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5285                 udind->SRC_length = tep->te_alen;
5286                 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5287                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5288                 udind->OPT_offset =
5289                     (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5290                 udind->OPT_length = olen;
5291                 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5292 
5293                         if (oldolen != 0) {
5294                                 bcopy((void *)((uintptr_t)udreq + ooff),
5295                                     (void *)((uintptr_t)udind +
5296                                     udind->OPT_offset),
5297                                     oldolen);
5298                         }
5299                         ASSERT(cr != NULL);
5300 
5301                         tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5302                             oldolen, cr, cpid,
5303                             peer_tep->te_flag, peer_tep->te_credp);
5304                 } else {
5305                         bcopy((void *)((uintptr_t)udreq + ooff),
5306                             (void *)((uintptr_t)udind + udind->OPT_offset),
5307                             olen);
5308                 }
5309 
5310                 /*
5311                  * relink data blocks from mp to ui_mp
5312                  */
5313                 ui_mp->b_cont = mp->b_cont;
5314                 freeb(mp);
5315                 mp = ui_mp;
5316         }
5317         /*
5318          * send indication message
5319          */
5320         peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5321         putnext(peer_tep->te_rq, mp);
5322 }
5323 
5324 
5325 
5326 /*
5327  * Check if a given addr is in use.
5328  * Endpoint ptr returned or NULL if not found.
5329  * The name space is separate for each mode. This implies that
5330  * sockets get their own name space.
5331  */
5332 static tl_endpt_t *
5333 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5334 {
5335         tl_endpt_t *peer_tep = NULL;
5336         int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5337             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5338 
5339         ASSERT(! IS_SOCKET(tep));
5340 
5341         ASSERT(ap != NULL && ap->ta_alen > 0);
5342         ASSERT(ap->ta_zoneid == tep->te_zoneid);
5343         ASSERT(ap->ta_abuf != NULL);
5344         EQUIV(rc == 0, peer_tep != NULL);
5345         IMPLY(rc == 0,
5346             (tep->te_zoneid == peer_tep->te_zoneid) &&
5347             (tep->te_transport == peer_tep->te_transport));
5348 
5349         if ((rc == 0) && (peer_tep->te_closing)) {
5350                 tl_refrele(peer_tep);
5351                 peer_tep = NULL;
5352         }
5353 
5354         return (peer_tep);
5355 }
5356 
5357 /*
5358  * Find peer for a socket based on unix domain address.
5359  * For implicit addresses our peer can be found by minor number in ai hash. For
5360  * explicit binds we look vnode address at addr_hash.
5361  */
5362 static tl_endpt_t *
5363 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5364 {
5365         tl_endpt_t *peer_tep = NULL;
5366         mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5367             tep->te_aihash : tep->te_addrhash;
5368         int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5369             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5370 
5371         ASSERT(IS_SOCKET(tep));
5372         EQUIV(rc == 0, peer_tep != NULL);
5373         IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5374 
5375         if (peer_tep != NULL) {
5376                 /* Don't attempt to use closing peer. */
5377                 if (peer_tep->te_closing)
5378                         goto errout;
5379 
5380                 /*
5381                  * Cross-zone unix sockets are permitted, but for Trusted
5382                  * Extensions only, the "server" for these must be in the
5383                  * global zone.
5384                  */
5385                 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5386                     is_system_labeled() &&
5387                     (peer_tep->te_zoneid != GLOBAL_ZONEID))
5388                         goto errout;
5389         }
5390 
5391         return (peer_tep);
5392 
5393 errout:
5394         tl_refrele(peer_tep);
5395         return (NULL);
5396 }
5397 
5398 /*
5399  * Generate a free addr and return it in struct pointed by ap
5400  * but allocating space for address buffer.
5401  * The generated address will be at least 4 bytes long and, if req->ta_alen
5402  * exceeds 4 bytes, be req->ta_alen bytes long.
5403  *
5404  * If address is found it will be inserted in the hash.
5405  *
5406  * If req->ta_alen is larger than the default alen (4 bytes) the last
5407  * alen-4 bytes will always be the same as in req.
5408  *
5409  * Return 0 for failure.
5410  * Return non-zero for success.
5411  */
5412 static boolean_t
5413 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5414 {
5415         t_scalar_t      alen;
5416         uint32_t        loopcnt;        /* Limit loop to 2^32 */
5417 
5418         ASSERT(tep->te_hash_hndl != NULL);
5419         ASSERT(! IS_SOCKET(tep));
5420 
5421         if (tep->te_hash_hndl == NULL)
5422                 return (B_FALSE);
5423 
5424         /*
5425          * check if default addr is in use
5426          * if it is - bump it and try again
5427          */
5428         if (req == NULL) {
5429                 alen = sizeof (uint32_t);
5430         } else {
5431                 alen = max(req->ta_alen, sizeof (uint32_t));
5432                 ASSERT(tep->te_zoneid == req->ta_zoneid);
5433         }
5434 
5435         if (tep->te_alen < alen) {
5436                 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5437 
5438                 /*
5439                  * Not enough space in tep->ta_ap to hold the address,
5440                  * allocate a bigger space.
5441                  */
5442                 if (abuf == NULL)
5443                         return (B_FALSE);
5444 
5445                 if (tep->te_alen > 0)
5446                         kmem_free(tep->te_abuf, tep->te_alen);
5447 
5448                 tep->te_alen = alen;
5449                 tep->te_abuf = abuf;
5450         }
5451 
5452         /* Copy in the address in req */
5453         if (req != NULL) {
5454                 ASSERT(alen >= req->ta_alen);
5455                 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5456         }
5457 
5458         /*
5459          * First try minor number then try default addresses.
5460          */
5461         bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5462 
5463         for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5464                 if (mod_hash_insert_reserve(tep->te_addrhash,
5465                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5466                     tep->te_hash_hndl) == 0) {
5467                         /*
5468                          * found free address
5469                          */
5470                         tep->te_flag |= TL_ADDRHASHED;
5471                         tep->te_hash_hndl = NULL;
5472 
5473                         return (B_TRUE); /* successful return */
5474                 }
5475                 /*
5476                  * Use default address.
5477                  */
5478                 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5479                 atomic_inc_32(&tep->te_defaddr);
5480         }
5481 
5482         /*
5483          * Failed to find anything.
5484          */
5485         (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5486             "tl_get_any_addr:looped 2^32 times"));
5487         return (B_FALSE);
5488 }
5489 
5490 /*
5491  * reallocb + set r/w ptrs to reflect size.
5492  */
5493 static mblk_t *
5494 tl_resizemp(mblk_t *mp, ssize_t new_size)
5495 {
5496         if ((mp = reallocb(mp, new_size, 0)) == NULL)
5497                 return (NULL);
5498 
5499         mp->b_rptr = DB_BASE(mp);
5500         mp->b_wptr = mp->b_rptr + new_size;
5501         return (mp);
5502 }
5503 
5504 static void
5505 tl_cl_backenable(tl_endpt_t *tep)
5506 {
5507         list_t *l = &tep->te_flowlist;
5508         tl_endpt_t *elp;
5509 
5510         ASSERT(IS_CLTS(tep));
5511 
5512         for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5513                 ASSERT(tep->te_ser == elp->te_ser);
5514                 ASSERT(elp->te_flowq == tep);
5515                 if (! elp->te_closing)
5516                         TL_QENABLE(elp);
5517                 elp->te_flowq = NULL;
5518                 list_remove(l, elp);
5519         }
5520 }
5521 
5522 /*
5523  * Unconnect endpoints.
5524  */
5525 static void
5526 tl_co_unconnect(tl_endpt_t *tep)
5527 {
5528         tl_endpt_t      *peer_tep = tep->te_conp;
5529         tl_endpt_t      *srv_tep = tep->te_oconp;
5530         list_t          *l;
5531         tl_icon_t       *tip;
5532         tl_endpt_t      *cl_tep;
5533         mblk_t          *d_mp;
5534 
5535         ASSERT(IS_COTS(tep));
5536         /*
5537          * If our peer is closing, don't use it.
5538          */
5539         if ((peer_tep != NULL) && peer_tep->te_closing) {
5540                 TL_UNCONNECT(tep->te_conp);
5541                 peer_tep = NULL;
5542         }
5543         if ((srv_tep != NULL) && srv_tep->te_closing) {
5544                 TL_UNCONNECT(tep->te_oconp);
5545                 srv_tep = NULL;
5546         }
5547 
5548         if (tep->te_nicon > 0) {
5549                 l = &tep->te_iconp;
5550                 /*
5551                  * If incoming requests pending, change state
5552                  * of clients on disconnect ind event and send
5553                  * discon_ind pdu to modules above them
5554                  * for server: all clients get disconnect
5555                  */
5556 
5557                 while (tep->te_nicon > 0) {
5558                         tip    = list_head(l);
5559                         cl_tep = tip->ti_tep;
5560 
5561                         if (cl_tep == NULL) {
5562                                 tl_freetip(tep, tip);
5563                                 continue;
5564                         }
5565 
5566                         if (cl_tep->te_oconp != NULL) {
5567                                 ASSERT(cl_tep != cl_tep->te_oconp);
5568                                 TL_UNCONNECT(cl_tep->te_oconp);
5569                         }
5570 
5571                         if (cl_tep->te_closing) {
5572                                 tl_freetip(tep, tip);
5573                                 continue;
5574                         }
5575 
5576                         enableok(cl_tep->te_wq);
5577                         TL_QENABLE(cl_tep);
5578                         d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5579                         if (d_mp != NULL) {
5580                                 cl_tep->te_state = TS_IDLE;
5581                                 putnext(cl_tep->te_rq, d_mp);
5582                         } else {
5583                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5584                                     SL_TRACE|SL_ERROR,
5585                                     "tl_co_unconnect:icmng: "
5586                                     "allocb failure"));
5587                         }
5588                         tl_freetip(tep, tip);
5589                 }
5590         } else if (srv_tep != NULL) {
5591                 /*
5592                  * If outgoing request pending, change state
5593                  * of server on discon ind event
5594                  */
5595 
5596                 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5597                     IS_COTSORD(srv_tep) &&
5598                     !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5599                         /*
5600                          * Queue ordrel_ind for server to be picked up
5601                          * when the connection is accepted.
5602                          */
5603                         d_mp = tl_ordrel_ind_alloc();
5604                 } else {
5605                         /*
5606                          * send discon_ind to server
5607                          */
5608                         d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5609                 }
5610                 if (d_mp == NULL) {
5611                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5612                             SL_TRACE|SL_ERROR,
5613                             "tl_co_unconnect:outgoing:allocb failure"));
5614                         TL_UNCONNECT(tep->te_oconp);
5615                         goto discon_peer;
5616                 }
5617 
5618                 /*
5619                  * If this is a socket the T_DISCON_IND is queued with
5620                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5621                  * from the list of pending connections.
5622                  * Note that when te_oconp is set the peer better have
5623                  * a t_connind_t for the client.
5624                  */
5625                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5626                         /*
5627                          * Queue the disconnection message.
5628                          */
5629                         tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5630                 } else {
5631                         tip = tl_icon_find(srv_tep, tep->te_seqno);
5632                         if (tip == NULL) {
5633                                 freemsg(d_mp);
5634                         } else {
5635                                 ASSERT(tep == tip->ti_tep);
5636                                 ASSERT(tep->te_ser == srv_tep->te_ser);
5637                                 /*
5638                                  * Delete tip from the server list.
5639                                  */
5640                                 if (srv_tep->te_nicon == 1) {
5641                                         srv_tep->te_state =
5642                                             NEXTSTATE(TE_DISCON_IND2,
5643                                             srv_tep->te_state);
5644                                 } else {
5645                                         srv_tep->te_state =
5646                                             NEXTSTATE(TE_DISCON_IND3,
5647                                             srv_tep->te_state);
5648                                 }
5649                                 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5650                                     T_DISCON_IND);
5651                                 putnext(srv_tep->te_rq, d_mp);
5652                                 tl_freetip(srv_tep, tip);
5653                         }
5654                         TL_UNCONNECT(tep->te_oconp);
5655                         srv_tep = NULL;
5656                 }
5657         } else if (peer_tep != NULL) {
5658                 /*
5659                  * unconnect existing connection
5660                  * If connected, change state of peer on
5661                  * discon ind event and send discon ind pdu
5662                  * to module above it
5663                  */
5664 
5665                 ASSERT(tep->te_ser == peer_tep->te_ser);
5666                 if (IS_COTSORD(peer_tep) &&
5667                     (peer_tep->te_state == TS_WIND_ORDREL ||
5668                     peer_tep->te_state == TS_DATA_XFER)) {
5669                         /*
5670                          * send ordrel ind
5671                          */
5672                         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5673                         "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5674                             peer_tep->te_state,
5675                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5676                         d_mp = tl_ordrel_ind_alloc();
5677                         if (! d_mp) {
5678                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5679                                     SL_TRACE|SL_ERROR,
5680                                     "tl_co_unconnect:connected:"
5681                                     "allocb failure"));
5682                                 /*
5683                                  * Continue with cleaning up peer as
5684                                  * this side may go away with the close
5685                                  */
5686                                 TL_QENABLE(peer_tep);
5687                                 goto discon_peer;
5688                         }
5689                         peer_tep->te_state =
5690                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5691 
5692                         putnext(peer_tep->te_rq, d_mp);
5693                         /*
5694                          * Handle flow control case.  This will generate
5695                          * a t_discon_ind message with reason 0 if there
5696                          * is data queued on the write side.
5697                          */
5698                         TL_QENABLE(peer_tep);
5699                 } else if (IS_COTSORD(peer_tep) &&
5700                     peer_tep->te_state == TS_WREQ_ORDREL) {
5701                         /*
5702                          * Sent an ordrel_ind. We send a discon with
5703                          * with error 0 to inform that the peer is gone.
5704                          */
5705                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5706                             SL_TRACE|SL_ERROR,
5707                             "tl_co_unconnect: discon in state %d",
5708                             tep->te_state));
5709                         tl_discon_ind(peer_tep, 0);
5710                 } else {
5711                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5712                             SL_TRACE|SL_ERROR,
5713                             "tl_co_unconnect: state %d", tep->te_state));
5714                         tl_discon_ind(peer_tep, ECONNRESET);
5715                 }
5716 
5717 discon_peer:
5718                 /*
5719                  * Disconnect cross-pointers only for close
5720                  */
5721                 if (tep->te_closing) {
5722                         peer_tep = tep->te_conp;
5723                         TL_REMOVE_PEER(peer_tep->te_conp);
5724                         TL_REMOVE_PEER(tep->te_conp);
5725                 }
5726         }
5727 }
5728 
5729 /*
5730  * Note: The following routine does not recover from allocb()
5731  * failures
5732  * The reason should be from the <sys/errno.h> space.
5733  */
5734 static void
5735 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5736 {
5737         mblk_t *d_mp;
5738 
5739         if (tep->te_closing)
5740                 return;
5741 
5742         /*
5743          * flush the queues.
5744          */
5745         flushq(tep->te_rq, FLUSHDATA);
5746         (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5747 
5748         /*
5749          * send discon ind
5750          */
5751         d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5752         if (! d_mp) {
5753                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5754                     "tl_discon_ind:allocb failure"));
5755                 return;
5756         }
5757         tep->te_state = TS_IDLE;
5758         putnext(tep->te_rq, d_mp);
5759 }
5760 
5761 /*
5762  * Note: The following routine does not recover from allocb()
5763  * failures
5764  * The reason should be from the <sys/errno.h> space.
5765  */
5766 static mblk_t *
5767 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5768 {
5769         mblk_t *mp;
5770         struct T_discon_ind *tdi;
5771 
5772         if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5773                 DB_TYPE(mp) = M_PROTO;
5774                 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5775                 tdi = (struct T_discon_ind *)mp->b_rptr;
5776                 tdi->PRIM_type = T_DISCON_IND;
5777                 tdi->DISCON_reason = reason;
5778                 tdi->SEQ_number = seqnum;
5779         }
5780         return (mp);
5781 }
5782 
5783 
5784 /*
5785  * Note: The following routine does not recover from allocb()
5786  * failures
5787  */
5788 static mblk_t *
5789 tl_ordrel_ind_alloc(void)
5790 {
5791         mblk_t *mp;
5792         struct T_ordrel_ind *toi;
5793 
5794         if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5795                 DB_TYPE(mp) = M_PROTO;
5796                 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5797                 toi = (struct T_ordrel_ind *)mp->b_rptr;
5798                 toi->PRIM_type = T_ORDREL_IND;
5799         }
5800         return (mp);
5801 }
5802 
5803 
5804 /*
5805  * Lookup the seqno in the list of queued connections.
5806  */
5807 static tl_icon_t *
5808 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5809 {
5810         list_t *l = &tep->te_iconp;
5811         tl_icon_t *tip = list_head(l);
5812 
5813         ASSERT(seqno != 0);
5814 
5815         for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5816                 ;
5817 
5818         return (tip);
5819 }
5820 
5821 /*
5822  * Queue data for a given T_CONN_IND while verifying that redundant
5823  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5824  * Used when the originator of the connection closes.
5825  */
5826 static void
5827 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5828 {
5829         tl_icon_t               *tip;
5830         mblk_t                  **mpp, *mp;
5831         int                     prim, nprim;
5832 
5833         if (nmp->b_datap->db_type == M_PROTO)
5834                 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5835         else
5836                 nprim = -1;     /* M_DATA */
5837 
5838         tip = tl_icon_find(tep, seqno);
5839         if (tip == NULL) {
5840                 freemsg(nmp);
5841                 return;
5842         }
5843 
5844         ASSERT(tip->ti_seqno != 0);
5845         mpp = &tip->ti_mp;
5846         while (*mpp != NULL) {
5847                 mp = *mpp;
5848 
5849                 if (mp->b_datap->db_type == M_PROTO)
5850                         prim = ((union T_primitives *)mp->b_rptr)->type;
5851                 else
5852                         prim = -1;      /* M_DATA */
5853 
5854                 /*
5855                  * Allow nothing after a T_DISCON_IND
5856                  */
5857                 if (prim == T_DISCON_IND) {
5858                         freemsg(nmp);
5859                         return;
5860                 }
5861                 /*
5862                  * Only allow a T_DISCON_IND after an T_ORDREL_IND
5863                  */
5864                 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5865                         freemsg(nmp);
5866                         return;
5867                 }
5868                 mpp = &(mp->b_next);
5869         }
5870         *mpp = nmp;
5871 }
5872 
5873 /*
5874  * Verify if a certain TPI primitive exists on the connind queue.
5875  * Use prim -1 for M_DATA.
5876  * Return non-zero if found.
5877  */
5878 static boolean_t
5879 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5880 {
5881         tl_icon_t *tip = tl_icon_find(tep, seqno);
5882         boolean_t found = B_FALSE;
5883 
5884         if (tip != NULL) {
5885                 mblk_t *mp;
5886                 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5887                         found = (DB_TYPE(mp) == M_PROTO &&
5888                             ((union T_primitives *)mp->b_rptr)->type == prim);
5889                 }
5890         }
5891         return (found);
5892 }
5893 
5894 /*
5895  * Send the b_next mblk chain that has accumulated before the connection
5896  * was accepted. Perform the necessary state transitions.
5897  */
5898 static void
5899 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5900 {
5901         mblk_t                  *mp;
5902         union T_primitives      *primp;
5903 
5904         if (tep->te_closing) {
5905                 tl_icon_freemsgs(mpp);
5906                 return;
5907         }
5908 
5909         ASSERT(tep->te_state == TS_DATA_XFER);
5910         ASSERT(tep->te_rq->q_first == NULL);
5911 
5912         while ((mp = *mpp) != NULL) {
5913                 *mpp = mp->b_next;
5914                 mp->b_next = NULL;
5915 
5916                 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5917                 switch (DB_TYPE(mp)) {
5918                 default:
5919                         freemsg(mp);
5920                         break;
5921                 case M_DATA:
5922                         putnext(tep->te_rq, mp);
5923                         break;
5924                 case M_PROTO:
5925                         primp = (union T_primitives *)mp->b_rptr;
5926                         switch (primp->type) {
5927                         case T_UNITDATA_IND:
5928                         case T_DATA_IND:
5929                         case T_OPTDATA_IND:
5930                         case T_EXDATA_IND:
5931                                 putnext(tep->te_rq, mp);
5932                                 break;
5933                         case T_ORDREL_IND:
5934                                 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5935                                     tep->te_state);
5936                                 putnext(tep->te_rq, mp);
5937                                 break;
5938                         case T_DISCON_IND:
5939                                 tep->te_state = TS_IDLE;
5940                                 putnext(tep->te_rq, mp);
5941                                 break;
5942                         default:
5943 #ifdef DEBUG
5944                                 cmn_err(CE_PANIC,
5945                                     "tl_icon_sendmsgs: unknown primitive");
5946 #endif /* DEBUG */
5947                                 freemsg(mp);
5948                                 break;
5949                         }
5950                         break;
5951                 }
5952         }
5953 }
5954 
5955 /*
5956  * Free the b_next mblk chain that has accumulated before the connection
5957  * was accepted.
5958  */
5959 static void
5960 tl_icon_freemsgs(mblk_t **mpp)
5961 {
5962         mblk_t *mp;
5963 
5964         while ((mp = *mpp) != NULL) {
5965                 *mpp = mp->b_next;
5966                 mp->b_next = NULL;
5967                 freemsg(mp);
5968         }
5969 }
5970 
5971 /*
5972  * Send M_ERROR
5973  * Note: assumes caller ensured enough space in mp or enough
5974  *      memory available. Does not attempt recovery from allocb()
5975  *      failures
5976  */
5977 
5978 static void
5979 tl_merror(queue_t *wq, mblk_t *mp, int error)
5980 {
5981         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5982 
5983         if (tep->te_closing) {
5984                 freemsg(mp);
5985                 return;
5986         }
5987 
5988         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5989             SL_TRACE|SL_ERROR,
5990             "tl_merror: tep=%p, err=%d", (void *)tep, error));
5991 
5992         /*
5993          * flush all messages on queue. we are shutting
5994          * the stream down on fatal error
5995          */
5996         flushq(wq, FLUSHALL);
5997         if (IS_COTS(tep)) {
5998                 /* connection oriented - unconnect endpoints */
5999                 tl_co_unconnect(tep);
6000         }
6001         if (mp->b_cont) {
6002                 freemsg(mp->b_cont);
6003                 mp->b_cont = NULL;
6004         }
6005 
6006         if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6007                 freemsg(mp);
6008                 mp = allocb(1, BPRI_HI);
6009                 if (!mp) {
6010                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6011                             SL_TRACE|SL_ERROR,
6012                             "tl_merror:M_PROTO: out of memory"));
6013                         return;
6014                 }
6015         }
6016         if (mp) {
6017                 DB_TYPE(mp) = M_ERROR;
6018                 mp->b_rptr = DB_BASE(mp);
6019                 *mp->b_rptr = (char)error;
6020                 mp->b_wptr = mp->b_rptr + sizeof (char);
6021                 qreply(wq, mp);
6022         } else {
6023                 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6024         }
6025 }
6026 
6027 static void
6028 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6029 {
6030         ASSERT(cr != NULL);
6031 
6032         if (flag & TL_SETCRED) {
6033                 struct opthdr *opt = (struct opthdr *)buf;
6034                 tl_credopt_t *tlcred;
6035 
6036                 opt->level = TL_PROT_LEVEL;
6037                 opt->name = TL_OPT_PEER_CRED;
6038                 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6039 
6040                 tlcred = (tl_credopt_t *)(opt + 1);
6041                 tlcred->tc_uid = crgetuid(cr);
6042                 tlcred->tc_gid = crgetgid(cr);
6043                 tlcred->tc_ruid = crgetruid(cr);
6044                 tlcred->tc_rgid = crgetrgid(cr);
6045                 tlcred->tc_suid = crgetsuid(cr);
6046                 tlcred->tc_sgid = crgetsgid(cr);
6047                 tlcred->tc_ngroups = crgetngroups(cr);
6048         } else if (flag & TL_SETUCRED) {
6049                 struct opthdr *opt = (struct opthdr *)buf;
6050 
6051                 opt->level = TL_PROT_LEVEL;
6052                 opt->name = TL_OPT_PEER_UCRED;
6053                 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6054 
6055                 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6056         } else {
6057                 struct T_opthdr *topt = (struct T_opthdr *)buf;
6058                 ASSERT(flag & TL_SOCKUCRED);
6059 
6060                 topt->level = SOL_SOCKET;
6061                 topt->name = SCM_UCRED;
6062                 topt->len = ucredminsize(cr) + sizeof (*topt);
6063                 topt->status = 0;
6064                 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6065         }
6066 }
6067 
6068 /* ARGSUSED */
6069 static int
6070 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6071 {
6072         /* no default value processed in protocol specific code currently */
6073         return (-1);
6074 }
6075 
6076 /* ARGSUSED */
6077 static int
6078 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6079 {
6080         int len;
6081         tl_endpt_t *tep;
6082         int *valp;
6083 
6084         tep = (tl_endpt_t *)wq->q_ptr;
6085 
6086         len = 0;
6087 
6088         /*
6089          * Assumes: option level and name sanity check done elsewhere
6090          */
6091 
6092         switch (level) {
6093         case SOL_SOCKET:
6094                 if (! IS_SOCKET(tep))
6095                         break;
6096                 switch (name) {
6097                 case SO_RECVUCRED:
6098                         len = sizeof (int);
6099                         valp = (int *)ptr;
6100                         *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6101                         break;
6102                 default:
6103                         break;
6104                 }
6105                 break;
6106         case TL_PROT_LEVEL:
6107                 switch (name) {
6108                 case TL_OPT_PEER_CRED:
6109                 case TL_OPT_PEER_UCRED:
6110                         /*
6111                          * option not supposed to retrieved directly
6112                          * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6113                          * when some internal flags set by other options
6114                          * Direct retrieval always designed to fail(ignored)
6115                          * for this option.
6116                          */
6117                         break;
6118                 }
6119         }
6120         return (len);
6121 }
6122 
6123 /* ARGSUSED */
6124 static int
6125 tl_set_opt(
6126         queue_t         *wq,
6127         uint_t          mgmt_flags,
6128         int             level,
6129         int             name,
6130         uint_t          inlen,
6131         uchar_t         *invalp,
6132         uint_t          *outlenp,
6133         uchar_t         *outvalp,
6134         void            *thisdg_attrs,
6135         cred_t          *cr)
6136 {
6137         int error;
6138         tl_endpt_t *tep;
6139 
6140         tep = (tl_endpt_t *)wq->q_ptr;
6141 
6142         error = 0;              /* NOERROR */
6143 
6144         /*
6145          * Assumes: option level and name sanity checks done elsewhere
6146          */
6147 
6148         switch (level) {
6149         case SOL_SOCKET:
6150                 if (! IS_SOCKET(tep)) {
6151                         error = EINVAL;
6152                         break;
6153                 }
6154                 /*
6155                  * TBD: fill in other AF_UNIX socket options and then stop
6156                  * returning error.
6157                  */
6158                 switch (name) {
6159                 case SO_RECVUCRED:
6160                         /*
6161                          * We only support this for datagram sockets;
6162                          * getpeerucred handles the connection oriented
6163                          * transports.
6164                          */
6165                         if (! IS_CLTS(tep)) {
6166                                 error = EINVAL;
6167                                 break;
6168                         }
6169                         if (*(int *)invalp == 0)
6170                                 tep->te_flag &= ~TL_SOCKUCRED;
6171                         else
6172                                 tep->te_flag |= TL_SOCKUCRED;
6173                         break;
6174                 default:
6175                         error = EINVAL;
6176                         break;
6177                 }
6178                 break;
6179         case TL_PROT_LEVEL:
6180                 switch (name) {
6181                 case TL_OPT_PEER_CRED:
6182                 case TL_OPT_PEER_UCRED:
6183                         /*
6184                          * option not supposed to be set directly
6185                          * Its value in initialized for each endpoint at
6186                          * driver open time.
6187                          * Direct setting always designed to fail for this
6188                          * option.
6189                          */
6190                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6191                             SL_TRACE|SL_ERROR,
6192                             "tl_set_opt: option is not supported"));
6193                         error = EPROTO;
6194                         break;
6195                 }
6196         }
6197         return (error);
6198 }
6199 
6200 
6201 static void
6202 tl_timer(void *arg)
6203 {
6204         queue_t *wq = arg;
6205         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6206 
6207         ASSERT(tep);
6208 
6209         tep->te_timoutid = 0;
6210 
6211         enableok(wq);
6212         /*
6213          * Note: can call wsrv directly here and save context switch
6214          * Consider change when qtimeout (not timeout) is active
6215          */
6216         qenable(wq);
6217 }
6218 
6219 static void
6220 tl_buffer(void *arg)
6221 {
6222         queue_t *wq = arg;
6223         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6224 
6225         ASSERT(tep);
6226 
6227         tep->te_bufcid = 0;
6228         tep->te_nowsrv = B_FALSE;
6229 
6230         enableok(wq);
6231         /*
6232          *  Note: can call wsrv directly here and save context switch
6233          * Consider change when qbufcall (not bufcall) is active
6234          */
6235         qenable(wq);
6236 }
6237 
6238 static void
6239 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6240 {
6241         tl_endpt_t *tep;
6242 
6243         tep = (tl_endpt_t *)wq->q_ptr;
6244 
6245         if (tep->te_closing) {
6246                 freemsg(mp);
6247                 return;
6248         }
6249         noenable(wq);
6250 
6251         (void) insq(wq, wq->q_first, mp);
6252 
6253         if (tep->te_bufcid || tep->te_timoutid) {
6254                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6255                     "tl_memrecover:recover %p pending", (void *)wq));
6256                 return;
6257         }
6258 
6259         if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6260                 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6261                     drv_usectohz(TL_BUFWAIT));
6262         }
6263 }
6264 
6265 static void
6266 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6267 {
6268         ASSERT(tip->ti_seqno != 0);
6269 
6270         if (tip->ti_mp != NULL) {
6271                 tl_icon_freemsgs(&tip->ti_mp);
6272                 tip->ti_mp = NULL;
6273         }
6274         if (tip->ti_tep != NULL) {
6275                 tl_refrele(tip->ti_tep);
6276                 tip->ti_tep = NULL;
6277         }
6278         list_remove(&tep->te_iconp, tip);
6279         kmem_free(tip, sizeof (tl_icon_t));
6280         tep->te_nicon--;
6281 }
6282 
6283 /*
6284  * Remove address from address hash.
6285  */
6286 static void
6287 tl_addr_unbind(tl_endpt_t *tep)
6288 {
6289         tl_endpt_t *elp;
6290 
6291         if (tep->te_flag & TL_ADDRHASHED) {
6292                 if (IS_SOCKET(tep)) {
6293                         (void) mod_hash_remove(tep->te_addrhash,
6294                             (mod_hash_key_t)tep->te_vp,
6295                             (mod_hash_val_t *)&elp);
6296                         tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6297                         tep->te_magic = SOU_MAGIC_IMPLICIT;
6298                 } else {
6299                         (void) mod_hash_remove(tep->te_addrhash,
6300                             (mod_hash_key_t)&tep->te_ap,
6301                             (mod_hash_val_t *)&elp);
6302                         (void) kmem_free(tep->te_abuf, tep->te_alen);
6303                         tep->te_alen = -1;
6304                         tep->te_abuf = NULL;
6305                 }
6306                 tep->te_flag &= ~TL_ADDRHASHED;
6307         }
6308 }