1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2012 by Delphix. All rights reserved.
  28  * Copyright 2015 Joyent, Inc.
  29  */
  30 
  31 /*
  32  * Multithreaded STREAMS Local Transport Provider.
  33  *
  34  * OVERVIEW
  35  * ========
  36  *
  37  * This driver provides TLI as well as socket semantics.  It provides
  38  * connectionless, connection oriented, and connection oriented with orderly
  39  * release transports for TLI and sockets. Each transport type has separate name
  40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
  41  * this removes any name space conflicts when binding to socket style transport
  42  * addresses.
  43  *
  44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
  45  * the same namespace. In fact, sockets always use ticotsord type transport.
  46  *
  47  * The driver mode is specified during open() by the minor number used for
  48  * open.
  49  *
  50  *  The sockets in addition have the following semantic differences:
  51  *  No support for passing up credentials (TL_SET[U]CRED).
  52  *
  53  *      Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
  54  *      from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
  55  *      T_OPTDATA_IND.
  56  *
  57  *      The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
  58  *      a T_CONN_RES is received from the acceptor. This means that a socket
  59  *      connect will complete before the peer has called accept.
  60  *
  61  *
  62  * MULTITHREADING
  63  * ==============
  64  *
  65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
  66  * generic "serializer" abstraction. Most of the operations are executed behind
  67  * the serializer and are, essentially single-threaded. All functions executed
  68  * behind the same serializer are strictly serialized. So if one thread calls
  69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
  70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
  71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
  72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
  73  * same time.
  74  *
  75  * Connectionless transport use a single serializer per transport type (one for
  76  * TLI and one for sockets. Connection-oriented transports use finer-grained
  77  * serializers.
  78  *
  79  * All COTS-type endpoints start their life with private serializers. During
  80  * connection request processing the endpoint serializer is switched to the
  81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
  82  * listener serializer. During T_CONN_RES processing the eager serializer is
  83  * switched from listener to acceptor serializer and after that point all
  84  * processing for eager and acceptor happens on this serializer. To avoid races
  85  * with endpoint closes while its serializer may be changing closes are blocked
  86  * while serializers are manipulated.
  87  *
  88  * References accounting
  89  * ---------------------
  90  *
  91  * Endpoints are reference counted and freed when the last reference is
  92  * dropped. Functions within the serializer may access an endpoint state even
  93  * after an endpoint closed. The te_closing being set on the endpoint indicates
  94  * that the endpoint entered its close routine.
  95  *
  96  * One reference is held for each opened endpoint instance. The reference
  97  * counter is incremented when the endpoint is linked to another endpoint and
  98  * decremented when the link disappears. It is also incremented when the
  99  * endpoint is found by the hash table lookup. This increment is atomic with the
 100  * lookup itself and happens while the hash table read lock is held.
 101  *
 102  * Close synchronization
 103  * ---------------------
 104  *
 105  * During close the endpoint as marked as closing using te_closing flag. It is
 106  * usually enough to check for te_closing flag since all other state changes
 107  * happen after this flag is set and the close entered serializer. Immediately
 108  * after setting te_closing flag tl_close() enters serializer and waits until
 109  * the callback finishes. This allows all functions called within serializer to
 110  * simply check te_closing without any locks.
 111  *
 112  * Serializer management.
 113  * ---------------------
 114  *
 115  * For COTS transports serializers are created when the endpoint is constructed
 116  * and destroyed when the endpoint is destructed. CLTS transports use global
 117  * serializers - one for sockets and one for TLI.
 118  *
 119  * COTS serializers have separate reference counts to deal with several
 120  * endpoints sharing the same serializer. There is a subtle problem related to
 121  * the serializer destruction. The serializer should never be destroyed by any
 122  * function executed inside serializer. This means that close has to wait till
 123  * all serializer activity for this endpoint is finished before it can drop the
 124  * last reference on the endpoint (which may as well free the serializer).  This
 125  * is only relevant for COTS transports which manage serializers
 126  * dynamically. For CLTS transports close may complete without waiting for all
 127  * serializer activity to finish since serializer is only destroyed at driver
 128  * detach time.
 129  *
 130  * COTS endpoints keep track of the number of outstanding requests on the
 131  * serializer for the endpoint. The code handling accept() avoids changing
 132  * client serializer if it has any pending messages on the serializer and
 133  * instead moves acceptor to listener's serializer.
 134  *
 135  *
 136  * Use of hash tables
 137  * ------------------
 138  *
 139  * The driver uses modhash hash table implementation. Each transport uses two
 140  * hash tables - one for finding endpoints by acceptor ID and another one for
 141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
 142  * pair of hash tables since sockets only use TICOTSORD.
 143  *
 144  * All hash tables lookups increment a reference count for returned endpoints,
 145  * so we may safely check the endpoint state even when the endpoint is removed
 146  * from the hash by another thread immediately after it is found.
 147  *
 148  *
 149  * CLOSE processing
 150  * ================
 151  *
 152  * The driver enters serializer twice on close(). The close sequence is the
 153  * following:
 154  *
 155  * 1) Wait until closing is safe (te_closewait becomes zero)
 156  *      This step is needed to prevent close during serializer switches. In most
 157  *      cases (close happening after connection establishment) te_closewait is
 158  *      zero.
 159  * 1) Set te_closing.
 160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
 161  *
 162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
 163  *      It also needs to clear write-side q_next pointers - this should be done
 164  *      before qprocsoff().
 165  *
 166  *    This synchronous serializer entry during close is needed to ensure that
 167  *    the queue is valid everywhere inside the serializer.
 168  *
 169  *    Note that in many cases close will execute tl_close_ser() synchronously,
 170  *    so it will not wait at all.
 171  *
 172  * 3) Calls qprocsoff().
 173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
 174  *      complete (for COTS transports). For CLTS transport there is no wait.
 175  *
 176  *      tl_close_finish_ser() Finishes the close process and wakes up waiting
 177  *      close if there is any.
 178  *
 179  *    Note that in most cases close will enter te_close_ser_finish()
 180  *    synchronously and will not wait at all.
 181  *
 182  *
 183  * Flow Control
 184  * ============
 185  *
 186  * The driver implements both read and write side service routines. No one calls
 187  * putq() on the read queue. The read side service routine tl_rsrv() is called
 188  * when the read side stream is back-enabled. It enters serializer synchronously
 189  * (waits till serializer processing is complete). Within serializer it
 190  * back-enables all endpoints blocked by the queue for connection-less
 191  * transports and enables write side service processing for the peer for
 192  * connection-oriented transports.
 193  *
 194  * Read and write side service routines use special mblk_sized space in the
 195  * endpoint structure to enter perimeter.
 196  *
 197  * Write-side flow control
 198  * -----------------------
 199  *
 200  * Write side flow control is a bit tricky. The driver needs to deal with two
 201  * message queues - the explicit STREAMS message queue maintained by
 202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
 203  * queues should be synchronized to preserve message ordering and should
 204  * maintain a single order determined by the order in which messages enter
 205  * tl_wput(). In order to maintain the ordering between these two queues the
 206  * STREAMS queue is only manipulated within the serializer, so the ordering is
 207  * provided by the serializer.
 208  *
 209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
 210  * immediately stop any further processing of the STREAMS message queues the
 211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
 212  * side service processing stops when the flag is set.
 213  *
 214  * The tl_wsrv() function enters serializer synchronously and waits for it to
 215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
 216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
 217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
 218  * always bounded by the amount of messages on the STREAMS queue at the time
 219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
 220  * queue from another serialized entry which can't happen in parallel. This
 221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
 222  * of it draining forever while writer places new messages on the STREAMS
 223  * queue).
 224  *
 225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
 226  *
 227  *
 228  * Unix Domain Sockets
 229  * ===================
 230  *
 231  * The driver knows the structure of Unix Domain sockets addresses and treats
 232  * them differently from generic TLI addresses. For sockets implicit binds are
 233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
 234  * instead of using address length of zero. Explicit binds specify
 235  * SOU_MAGIC_EXPLICIT as magic.
 236  *
 237  * For implicit binds we always use minor number as soua_vp part of the address
 238  * and avoid any hash table lookups. This saves two hash tables lookups per
 239  * anonymous bind.
 240  *
 241  * For explicit address we hash the vnode pointer instead of hashing the
 242  * full-scale address+zone+length. Hashing by pointer is more efficient then
 243  * hashing by the full address.
 244  *
 245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
 246  * tep structure, so it should be never freed.
 247  *
 248  * Also for sockets the driver always uses minor number as acceptor id.
 249  *
 250  * TPI VIOLATIONS
 251  * --------------
 252  *
 253  * This driver violates TPI in several respects for Unix Domain Sockets:
 254  *
 255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
 256  *      is requested and the endpoint is already in use. There is no point in
 257  *      generating an unused address since this address will be rejected by
 258  *      sockfs anyway. For implicit binds it always generates a new address
 259  *      (sets soua_vp to its minor number).
 260  *
 261  * 2) It always uses minor number as acceptor ID and never uses queue
 262  *      pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
 263  *      message and they do not use the queue pointer.
 264  *
 265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
 266  *      followed by listen(). The listen() should be issued with non-zero
 267  *      backlog, so sotpi_listen() issues unbind request followed by bind
 268  *      request to the same address but with a non-zero qlen value. Both
 269  *      tl_bind() and tl_unbind() require write lock on the hash table to
 270  *      insert/remove the address. The driver does not remove the address from
 271  *      the hash for endpoints that are bound to the explicit address and have
 272  *      backlog of zero. During T_BIND_REQ processing if the address requested
 273  *      is equal to the address the endpoint already has it updates the backlog
 274  *      without reinserting the address in the hash table. This optimization
 275  *      avoids two hash table updates for each listener created. It always
 276  *      avoids the problem of a "stolen" address when another listener may use
 277  *      the same address between the unbind and bind and suddenly listen() fails
 278  *      because address is in use even though the bind() succeeded.
 279  *
 280  *
 281  * CONNECTIONLESS TRANSPORTS
 282  * =========================
 283  *
 284  * Connectionless transports all share the same serializer (one for TLI and one
 285  * for Sockets). Functions executing behind serializer can check or modify state
 286  * of any endpoint.
 287  *
 288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
 289  * te_lastep field. The next time X talks to some address A it checks whether A
 290  * is the same as Y's address and if it is there is no need to lookup Y. If the
 291  * address is different or the state of Y is not appropriate (e.g. closed or not
 292  * idle) X does a lookup using tl_find_peer() and caches the new address.
 293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
 294  * on the endpoint found.
 295  *
 296  * During close of endpoint Y it doesn't try to remove itself from other
 297  * endpoints caches. They will detect that Y is gone and will search the peer
 298  * endpoint again.
 299  *
 300  * Flow Control Handling.
 301  * ----------------------
 302  *
 303  * Each connectionless endpoint keeps a list of endpoints which are
 304  * flow-controlled by its queue. It also keeps a pointer to the queue which
 305  * flow-controls itself.  Whenever flow control releases for endpoint X it
 306  * enables all queues from the list. During close it also back-enables everyone
 307  * in the list. If X is flow-controlled when it is closing it removes it from
 308  * the peers list.
 309  *
 310  * DATA STRUCTURES
 311  * ===============
 312  *
 313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
 314  * endpoint state. For connection-oriented transports it has a keeps a list
 315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
 316  * list of endpoints flow controlled by this one.
 317  *
 318  * Each transport type is represented by a per-transport data structure
 319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
 320  * endpoint address hash tables for each transport. It also contains pointer to
 321  * transport serializer for connectionless transports.
 322  *
 323  * Each endpoint keeps a link to its transport structure, so the code can find
 324  * all per-transport information quickly.
 325  */
 326 
 327 #include        <sys/types.h>
 328 #include        <sys/inttypes.h>
 329 #include        <sys/stream.h>
 330 #include        <sys/stropts.h>
 331 #define _SUN_TPI_VERSION 2
 332 #include        <sys/tihdr.h>
 333 #include        <sys/strlog.h>
 334 #include        <sys/debug.h>
 335 #include        <sys/cred.h>
 336 #include        <sys/errno.h>
 337 #include        <sys/kmem.h>
 338 #include        <sys/id_space.h>
 339 #include        <sys/modhash.h>
 340 #include        <sys/mkdev.h>
 341 #include        <sys/tl.h>
 342 #include        <sys/stat.h>
 343 #include        <sys/conf.h>
 344 #include        <sys/modctl.h>
 345 #include        <sys/strsun.h>
 346 #include        <sys/socket.h>
 347 #include        <sys/socketvar.h>
 348 #include        <sys/sysmacros.h>
 349 #include        <sys/xti_xtiopt.h>
 350 #include        <sys/ddi.h>
 351 #include        <sys/sunddi.h>
 352 #include        <sys/zone.h>
 353 #include        <inet/common.h>   /* typedef int (*pfi_t)() for inet/optcom.h */
 354 #include        <inet/optcom.h>
 355 #include        <sys/strsubr.h>
 356 #include        <sys/ucred.h>
 357 #include        <sys/suntpi.h>
 358 #include        <sys/list.h>
 359 #include        <sys/serializer.h>
 360 
 361 /*
 362  * TBD List
 363  * 14 Eliminate state changes through table
 364  * 16. AF_UNIX socket options
 365  * 17. connect() for ticlts
 366  * 18. support for "netstat" to show AF_UNIX plus TLI local
 367  *      transport connections
 368  * 21. sanity check to flushing on sending M_ERROR
 369  */
 370 
 371 /*
 372  * CONSTANT DECLARATIONS
 373  * --------------------
 374  */
 375 
 376 /*
 377  * Local declarations
 378  */
 379 #define NEXTSTATE(EV, ST)       ti_statetbl[EV][ST]
 380 
 381 #define BADSEQNUM       (-1)    /* initial seq number used by T_DISCON_IND */
 382 #define TL_BUFWAIT      (10000) /* usecs to wait for allocb buffer timeout */
 383 #define TL_TIDUSZ (64*1024)     /* tidu size when "strmsgz" is unlimited (0) */
 384 /*
 385  * Hash tables size.
 386  */
 387 #define TL_HASH_SIZE 311
 388 
 389 /*
 390  * Definitions for module_info
 391  */
 392 #define         TL_ID           (104)           /* module ID number */
 393 #define         TL_NAME         "tl"            /* module name */
 394 #define         TL_MINPSZ       (0)             /* min packet size */
 395 #define         TL_MAXPSZ       INFPSZ          /* max packet size ZZZ */
 396 #define         TL_HIWAT        (16*1024)       /* hi water mark */
 397 #define         TL_LOWAT        (256)           /* lo water mark */
 398 /*
 399  * Definition of minor numbers/modes for new transport provider modes.
 400  * We view the socket use as a separate mode to get a separate name space.
 401  */
 402 #define         TL_TICOTS       0       /* connection oriented transport */
 403 #define         TL_TICOTSORD    1       /* COTS w/ orderly release */
 404 #define         TL_TICLTS       2       /* connectionless transport */
 405 #define         TL_UNUSED       3
 406 #define         TL_SOCKET       4       /* Socket */
 407 #define         TL_SOCK_COTS    (TL_SOCKET|TL_TICOTS)
 408 #define         TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
 409 #define         TL_SOCK_CLTS    (TL_SOCKET|TL_TICLTS)
 410 
 411 #define         TL_MINOR_MASK   0x7
 412 #define         TL_MINOR_START  (TL_TICLTS + 1)
 413 
 414 /*
 415  * LOCAL MACROS
 416  */
 417 #define T_ALIGN(p)      P2ROUNDUP((p), sizeof (t_scalar_t))
 418 
 419 /*
 420  * EXTERNAL VARIABLE DECLARATIONS
 421  * -----------------------------
 422  */
 423 /*
 424  * state table defined in the OS space.c
 425  */
 426 extern  char    ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
 427 
 428 /*
 429  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
 430  */
 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
 432 static int tl_close(queue_t *, int, cred_t *);
 433 static void tl_wput(queue_t *, mblk_t *);
 434 static void tl_wsrv(queue_t *);
 435 static void tl_rsrv(queue_t *);
 436 
 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 440 
 441 
 442 /*
 443  * GLOBAL DATA STRUCTURES AND VARIABLES
 444  * -----------------------------------
 445  */
 446 
 447 /*
 448  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
 449  * For now, we only manage the SO_RECVUCRED option but we also have
 450  * harmless dummy options to make things work with some common code we access.
 451  */
 452 opdes_t tl_opt_arr[] = {
 453         /* The SO_TYPE is needed for the hack below */
 454         {
 455                 SO_TYPE,
 456                 SOL_SOCKET,
 457                 OA_R,
 458                 OA_R,
 459                 OP_NP,
 460                 0,
 461                 sizeof (t_scalar_t),
 462                 0
 463         },
 464         {
 465                 SO_RECVUCRED,
 466                 SOL_SOCKET,
 467                 OA_RW,
 468                 OA_RW,
 469                 OP_NP,
 470                 0,
 471                 sizeof (int),
 472                 0
 473         }
 474 };
 475 
 476 /*
 477  * Table of all supported levels
 478  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 479  * any supported options so we need this info separately.
 480  *
 481  * This is needed only for topmost tpi providers.
 482  */
 483 optlevel_t      tl_valid_levels_arr[] = {
 484         XTI_GENERIC,
 485         SOL_SOCKET,
 486         TL_PROT_LEVEL
 487 };
 488 
 489 #define TL_VALID_LEVELS_CNT     A_CNT(tl_valid_levels_arr)
 490 /*
 491  * Current upper bound on the amount of space needed to return all options.
 492  * Additional options with data size of sizeof(long) are handled automatically.
 493  * Others need hand job.
 494  */
 495 #define TL_MAX_OPT_BUF_LEN                                              \
 496                 ((A_CNT(tl_opt_arr) << 2) +                               \
 497                 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +          \
 498                 + 64 + sizeof (struct T_optmgmt_ack))
 499 
 500 #define TL_OPT_ARR_CNT  A_CNT(tl_opt_arr)
 501 
 502 /*
 503  *      transport addr structure
 504  */
 505 typedef struct tl_addr {
 506         zoneid_t        ta_zoneid;              /* Zone scope of address */
 507         t_scalar_t      ta_alen;                /* length of abuf */
 508         void            *ta_abuf;               /* the addr itself */
 509 } tl_addr_t;
 510 
 511 /*
 512  * Refcounted version of serializer.
 513  */
 514 typedef struct tl_serializer {
 515         uint_t          ts_refcnt;
 516         serializer_t    *ts_serializer;
 517 } tl_serializer_t;
 518 
 519 /*
 520  * Each transport type has a separate state.
 521  * Per-transport state.
 522  */
 523 typedef struct tl_transport_state {
 524         char            *tr_name;
 525         minor_t         tr_minor;
 526         uint32_t        tr_defaddr;
 527         mod_hash_t      *tr_ai_hash;
 528         mod_hash_t      *tr_addr_hash;
 529         tl_serializer_t *tr_serializer;
 530 } tl_transport_state_t;
 531 
 532 #define TL_DFADDR 0x1000
 533 
 534 static tl_transport_state_t tl_transports[] = {
 535         { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
 536         { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
 537         { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
 538         { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
 539         { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
 540         { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
 541         { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
 542 };
 543 
 544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
 545 
 546 struct tl_endpt;
 547 typedef struct tl_endpt tl_endpt_t;
 548 
 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
 550 
 551 /*
 552  * Data structure used to represent pending connects.
 553  * Records enough information so that the connecting peer can close
 554  * before the connection gets accepted.
 555  */
 556 typedef struct tl_icon {
 557         list_node_t     ti_node;
 558         struct tl_endpt *ti_tep;        /* NULL if peer has already closed */
 559         mblk_t          *ti_mp;         /* b_next list of data + ordrel_ind */
 560         t_scalar_t      ti_seqno;       /* Sequence number */
 561 } tl_icon_t;
 562 
 563 typedef struct so_ux_addr soux_addr_t;
 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
 565 
 566 /*
 567  * Maximum number of unaccepted connection indications allowed per listener.
 568  */
 569 #define TL_MAXQLEN      4096
 570 int tl_maxqlen = TL_MAXQLEN;
 571 
 572 /*
 573  *      transport endpoint structure
 574  */
 575 struct tl_endpt {
 576         queue_t         *te_rq;         /* stream read queue */
 577         queue_t         *te_wq;         /* stream write queue */
 578         uint32_t        te_refcnt;
 579         int32_t         te_state;       /* TPI state of endpoint */
 580         minor_t         te_minor;       /* minor number */
 581 #define te_seqno        te_minor
 582         uint_t          te_flag;        /* flag field */
 583         boolean_t       te_nowsrv;
 584         tl_serializer_t *te_ser;        /* Serializer to use */
 585 #define te_serializer   te_ser->ts_serializer
 586 
 587         soux_addr_t     te_uxaddr;      /* Socket address */
 588 #define te_magic        te_uxaddr.soua_magic
 589 #define te_vp           te_uxaddr.soua_vp
 590         tl_addr_t       te_ap;          /* addr bound to this endpt */
 591 #define te_zoneid te_ap.ta_zoneid
 592 #define te_alen te_ap.ta_alen
 593 #define te_abuf te_ap.ta_abuf
 594 
 595         tl_transport_state_t *te_transport;
 596 #define te_addrhash     te_transport->tr_addr_hash
 597 #define te_aihash       te_transport->tr_ai_hash
 598 #define te_defaddr      te_transport->tr_defaddr
 599         cred_t          *te_credp;      /* endpoint user credentials */
 600         mod_hash_hndl_t te_hash_hndl;   /* Handle for address hash */
 601 
 602         /*
 603          * State specific for connection-oriented and connectionless transports.
 604          */
 605         union {
 606                 /* Connection-oriented state. */
 607                 struct {
 608                         t_uscalar_t _te_nicon;  /* count of conn requests */
 609                         t_uscalar_t _te_qlen;   /* max conn requests */
 610                         tl_endpt_t  *_te_oconp; /* conn request pending */
 611                         tl_endpt_t  *_te_conp;  /* connected endpt */
 612 #ifndef _ILP32
 613                         void        *_te_pad;
 614 #endif
 615                         list_t  _te_iconp;      /* list of conn ind. pending */
 616                 } _te_cots_state;
 617                 /* Connection-less state. */
 618                 struct {
 619                         tl_endpt_t *_te_lastep; /* last dest. endpoint */
 620                         tl_endpt_t *_te_flowq;  /* flow controlled on whom */
 621                         list_node_t _te_flows;  /* lists of connections */
 622                         list_t  _te_flowlist;   /* Who flowcontrols on me */
 623                 } _te_clts_state;
 624         } _te_transport_state;
 625 #define te_nicon        _te_transport_state._te_cots_state._te_nicon
 626 #define te_qlen         _te_transport_state._te_cots_state._te_qlen
 627 #define te_oconp        _te_transport_state._te_cots_state._te_oconp
 628 #define te_conp         _te_transport_state._te_cots_state._te_conp
 629 #define te_iconp        _te_transport_state._te_cots_state._te_iconp
 630 #define te_lastep       _te_transport_state._te_clts_state._te_lastep
 631 #define te_flowq        _te_transport_state._te_clts_state._te_flowq
 632 #define te_flowlist     _te_transport_state._te_clts_state._te_flowlist
 633 #define te_flows        _te_transport_state._te_clts_state._te_flows
 634 
 635         bufcall_id_t    te_bufcid;      /* outstanding bufcall id */
 636         timeout_id_t    te_timoutid;    /* outstanding timeout id */
 637         pid_t           te_cpid;        /* cached pid of endpoint */
 638         t_uscalar_t     te_acceptor_id; /* acceptor id for T_CONN_RES */
 639         /*
 640          * Pieces of the endpoint state needed for closing.
 641          */
 642         kmutex_t        te_closelock;
 643         kcondvar_t      te_closecv;
 644         uint8_t         te_closing;     /* The endpoint started closing */
 645         uint8_t         te_closewait;   /* Wait in close until zero */
 646         mblk_t          te_closemp;     /* for entering serializer on close */
 647         mblk_t          te_rsrvmp;      /* for entering serializer on rsrv */
 648         mblk_t          te_wsrvmp;      /* for entering serializer on wsrv */
 649         kmutex_t        te_srv_lock;
 650         kcondvar_t      te_srv_cv;
 651         uint8_t         te_rsrv_active; /* Running in tl_rsrv() */
 652         uint8_t         te_wsrv_active; /* Running in tl_wsrv() */
 653         /*
 654          * Pieces of the endpoint state needed for serializer transitions.
 655          */
 656         kmutex_t        te_ser_lock;    /* Protects the count below */
 657         uint_t          te_ser_count;   /* Number of messages on serializer */
 658 };
 659 
 660 /*
 661  * Flag values. Lower 4 bits specify that transport used.
 662  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
 663  * they allow to identify the endpoint more easily.
 664  */
 665 #define TL_LISTENER     0x00010 /* the listener endpoint */
 666 #define TL_ACCEPTOR     0x00020 /* the accepting endpoint */
 667 #define TL_EAGER        0x00040 /* connecting endpoint */
 668 #define TL_ACCEPTED     0x00080 /* accepted connection */
 669 #define TL_SETCRED      0x00100 /* flag to indicate sending of credentials */
 670 #define TL_SETUCRED     0x00200 /* flag to indicate sending of ucred */
 671 #define TL_SOCKUCRED    0x00400 /* flag to indicate sending of SCM_UCRED */
 672 #define TL_ADDRHASHED   0x01000 /* Endpoint address is stored in te_addrhash */
 673 #define TL_CLOSE_SER    0x10000 /* Endpoint close has entered the serializer */
 674 /*
 675  * Boolean checks for the endpoint type.
 676  */
 677 #define         IS_CLTS(x)      (((x)->te_flag & TL_TICLTS) != 0)
 678 #define         IS_COTS(x)      (((x)->te_flag & TL_TICLTS) == 0)
 679 #define         IS_COTSORD(x)   (((x)->te_flag & TL_TICOTSORD) != 0)
 680 #define         IS_SOCKET(x)    (((x)->te_flag & TL_SOCKET) != 0)
 681 
 682 /*
 683  * Certain operations are always used together. These macros reduce the chance
 684  * of missing a part of a combination.
 685  */
 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
 688 
 689 #define TL_PUTBQ(x, mp) {               \
 690         ASSERT(!((x)->te_flag & TL_CLOSE_SER));  \
 691         (x)->te_nowsrv = B_TRUE;     \
 692         (void) putbq((x)->te_wq, mp);        \
 693 }
 694 
 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
 697 
 698 /*
 699  * STREAMS driver glue data structures.
 700  */
 701 static  struct  module_info     tl_minfo = {
 702         TL_ID,                  /* mi_idnum */
 703         TL_NAME,                /* mi_idname */
 704         TL_MINPSZ,              /* mi_minpsz */
 705         TL_MAXPSZ,              /* mi_maxpsz */
 706         TL_HIWAT,               /* mi_hiwat */
 707         TL_LOWAT                /* mi_lowat */
 708 };
 709 
 710 static  struct  qinit   tl_rinit = {
 711         NULL,                   /* qi_putp */
 712         (int (*)())tl_rsrv,     /* qi_srvp */
 713         tl_open,                /* qi_qopen */
 714         tl_close,               /* qi_qclose */
 715         NULL,                   /* qi_qadmin */
 716         &tl_minfo,          /* qi_minfo */
 717         NULL                    /* qi_mstat */
 718 };
 719 
 720 static  struct  qinit   tl_winit = {
 721         (int (*)())tl_wput,     /* qi_putp */
 722         (int (*)())tl_wsrv,     /* qi_srvp */
 723         NULL,                   /* qi_qopen */
 724         NULL,                   /* qi_qclose */
 725         NULL,                   /* qi_qadmin */
 726         &tl_minfo,          /* qi_minfo */
 727         NULL                    /* qi_mstat */
 728 };
 729 
 730 static  struct streamtab        tlinfo = {
 731         &tl_rinit,          /* st_rdinit */
 732         &tl_winit,          /* st_wrinit */
 733         NULL,                   /* st_muxrinit */
 734         NULL                    /* st_muxwrinit */
 735 };
 736 
 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
 738     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
 739 
 740 static struct modldrv modldrv = {
 741         &mod_driverops,             /* Type of module -- pseudo driver here */
 742         "TPI Local Transport (tl)",
 743         &tl_devops,         /* driver ops */
 744 };
 745 
 746 /*
 747  * Module linkage information for the kernel.
 748  */
 749 static struct modlinkage modlinkage = {
 750         MODREV_1,
 751         &modldrv,
 752         NULL
 753 };
 754 
 755 /*
 756  * Templates for response to info request
 757  * Check sanity of unlimited connect data etc.
 758  */
 759 
 760 #define         TL_CLTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 761 #define         TL_COTS_PROVIDER_FLAG   (XPG4_1|SENDZERO)
 762 
 763 static struct T_info_ack tl_cots_info_ack =
 764         {
 765                 T_INFO_ACK,     /* PRIM_type -always T_INFO_ACK */
 766                 T_INFINITE,     /* TSDU size */
 767                 T_INFINITE,     /* ETSDU size */
 768                 T_INFINITE,     /* CDATA_size */
 769                 T_INFINITE,     /* DDATA_size */
 770                 T_INFINITE,     /* ADDR_size  */
 771                 T_INFINITE,     /* OPT_size */
 772                 0,              /* TIDU_size - fill at run time */
 773                 T_COTS,         /* SERV_type */
 774                 -1,             /* CURRENT_state */
 775                 TL_COTS_PROVIDER_FLAG   /* PROVIDER_flag */
 776         };
 777 
 778 static struct T_info_ack tl_clts_info_ack =
 779         {
 780                 T_INFO_ACK,     /* PRIM_type - always T_INFO_ACK */
 781                 0,              /* TSDU_size - fill at run time */
 782                 -2,             /* ETSDU_size -2 => not supported */
 783                 -2,             /* CDATA_size -2 => not supported */
 784                 -2,             /* DDATA_size  -2 => not supported */
 785                 -1,             /* ADDR_size -1 => infinite */
 786                 -1,             /* OPT_size */
 787                 0,              /* TIDU_size - fill at run time */
 788                 T_CLTS,         /* SERV_type */
 789                 -1,             /* CURRENT_state */
 790                 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
 791         };
 792 
 793 /*
 794  * private copy of devinfo pointer used in tl_info
 795  */
 796 static dev_info_t *tl_dip;
 797 
 798 /*
 799  * Endpoints cache.
 800  */
 801 static kmem_cache_t *tl_cache;
 802 /*
 803  * Minor number space.
 804  */
 805 static id_space_t *tl_minors;
 806 
 807 /*
 808  * Default Data Unit size.
 809  */
 810 static t_scalar_t tl_tidusz;
 811 
 812 /*
 813  * Size of hash tables.
 814  */
 815 static size_t tl_hash_size = TL_HASH_SIZE;
 816 
 817 /*
 818  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
 819  * for sockets.
 820  */
 821 static int tl_disable_early_connect = 0;
 822 static int tl_client_closing_when_accepting;
 823 
 824 static int tl_serializer_noswitch;
 825 
 826 /*
 827  * LOCAL FUNCTION PROTOTYPES
 828  * -------------------------
 829  */
 830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
 831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
 832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
 833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
 834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
 835         t_scalar_t);
 836 static void tl_bind(mblk_t *, tl_endpt_t *);
 837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
 838 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
 839 static void tl_unbind(mblk_t *, tl_endpt_t *);
 840 static void tl_optmgmt(queue_t *, mblk_t *);
 841 static void tl_conn_req(queue_t *, mblk_t *);
 842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
 843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
 844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
 845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
 846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
 847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
 848 static void tl_info_req(mblk_t *, tl_endpt_t *);
 849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
 850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
 851 static void tl_data(mblk_t  *, tl_endpt_t *);
 852 static void tl_exdata(mblk_t *, tl_endpt_t *);
 853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
 854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
 855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
 856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
 857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
 858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
 859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
 860 static void tl_cl_backenable(tl_endpt_t *);
 861 static void tl_co_unconnect(tl_endpt_t *);
 862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
 863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
 864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
 865 static mblk_t *tl_ordrel_ind_alloc(void);
 866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
 867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
 868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
 869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
 870 static void tl_icon_freemsgs(mblk_t **);
 871 static void tl_merror(queue_t *, mblk_t *, int);
 872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
 873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
 874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
 875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
 876     uchar_t *, void *, cred_t *);
 877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
 878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
 879 static void tl_free(tl_endpt_t *);
 880 static int  tl_constructor(void *, void *, int);
 881 static void tl_destructor(void *, void *);
 882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
 883 static tl_serializer_t *tl_serializer_alloc(int);
 884 static void tl_serializer_refhold(tl_serializer_t *);
 885 static void tl_serializer_refrele(tl_serializer_t *);
 886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
 887 static void tl_serializer_exit(tl_endpt_t *);
 888 static boolean_t tl_noclose(tl_endpt_t *);
 889 static void tl_closeok(tl_endpt_t *);
 890 static void tl_refhold(tl_endpt_t *);
 891 static void tl_refrele(tl_endpt_t *);
 892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
 893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
 894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
 895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
 896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
 897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
 898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
 899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
 900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
 901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
 902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
 903 static void tl_addr_unbind(tl_endpt_t *);
 904 
 905 /*
 906  * Intialize option database object for TL
 907  */
 908 
 909 optdb_obj_t tl_opt_obj = {
 910         tl_default_opt,         /* TL default value function pointer */
 911         tl_get_opt,             /* TL get function pointer */
 912         tl_set_opt,             /* TL set function pointer */
 913         TL_OPT_ARR_CNT,         /* TL option database count of entries */
 914         tl_opt_arr,             /* TL option database */
 915         TL_VALID_LEVELS_CNT,    /* TL valid level count of entries */
 916         tl_valid_levels_arr     /* TL valid level array */
 917 };
 918 
 919 /*
 920  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
 921  * ---------------------------------------
 922  */
 923 
 924 /*
 925  * Loadable module routines
 926  */
 927 int
 928 _init(void)
 929 {
 930         return (mod_install(&modlinkage));
 931 }
 932 
 933 int
 934 _fini(void)
 935 {
 936         return (mod_remove(&modlinkage));
 937 }
 938 
 939 int
 940 _info(struct modinfo *modinfop)
 941 {
 942         return (mod_info(&modlinkage, modinfop));
 943 }
 944 
 945 /*
 946  * Driver Entry Points and Other routines
 947  */
 948 static int
 949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 950 {
 951         int i;
 952         char name[32];
 953 
 954         /*
 955          * Resume from a checkpoint state.
 956          */
 957         if (cmd == DDI_RESUME)
 958                 return (DDI_SUCCESS);
 959 
 960         if (cmd != DDI_ATTACH)
 961                 return (DDI_FAILURE);
 962 
 963         /*
 964          * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
 965          * streams message sizes can be unlimited. We use a defined constant
 966          * instead.
 967          */
 968         tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
 969 
 970         /*
 971          * Create subdevices for each transport.
 972          */
 973         for (i = 0; i < TL_UNUSED; i++) {
 974                 if (ddi_create_minor_node(devi,
 975                     tl_transports[i].tr_name,
 976                     S_IFCHR, tl_transports[i].tr_minor,
 977                     DDI_PSEUDO, NULL) == DDI_FAILURE) {
 978                         ddi_remove_minor_node(devi, NULL);
 979                         return (DDI_FAILURE);
 980                 }
 981         }
 982 
 983         tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
 984             0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
 985 
 986         if (tl_cache == NULL) {
 987                 ddi_remove_minor_node(devi, NULL);
 988                 return (DDI_FAILURE);
 989         }
 990 
 991         tl_minors = id_space_create("tl_minor_space",
 992             TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
 993 
 994         /*
 995          * Create ID space for minor numbers
 996          */
 997         for (i = 0; i < TL_MAXTRANSPORT; i++) {
 998                 tl_transport_state_t *t = &tl_transports[i];
 999 
1000                 if (i == TL_UNUSED)
1001                         continue;
1002 
1003                 /* Socket COTSORD shares namespace with COTS */
1004                 if (i == TL_SOCK_COTSORD) {
1005                         t->tr_ai_hash =
1006                             tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007                         ASSERT(t->tr_ai_hash != NULL);
1008                         t->tr_addr_hash =
1009                             tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010                         ASSERT(t->tr_addr_hash != NULL);
1011                         continue;
1012                 }
1013 
1014                 /*
1015                  * Create hash tables.
1016                  */
1017                 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018                     t->tr_name);
1019 #ifdef _ILP32
1020                 if (i & TL_SOCKET)
1021                         t->tr_ai_hash =
1022                             mod_hash_create_idhash(name, tl_hash_size - 1,
1023                             mod_hash_null_valdtor);
1024                 else
1025                         t->tr_ai_hash =
1026                             mod_hash_create_ptrhash(name, tl_hash_size,
1027                             mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029                 t->tr_ai_hash =
1030                     mod_hash_create_idhash(name, tl_hash_size - 1,
1031                     mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1033 
1034                 if (i & TL_SOCKET) {
1035                         (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036                             t->tr_name);
1037                         t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038                             tl_hash_size, mod_hash_null_valdtor,
1039                             sizeof (uintptr_t));
1040                 } else {
1041                         (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042                             t->tr_name);
1043                         t->tr_addr_hash = mod_hash_create_extended(name,
1044                             tl_hash_size, mod_hash_null_keydtor,
1045                             mod_hash_null_valdtor,
1046                             tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047                 }
1048 
1049                 /* Create serializer for connectionless transports. */
1050                 if (i & TL_TICLTS)
1051                         t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052         }
1053 
1054         tl_dip = devi;
1055 
1056         return (DDI_SUCCESS);
1057 }
1058 
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 {
1062         int i;
1063 
1064         if (cmd == DDI_SUSPEND)
1065                 return (DDI_SUCCESS);
1066 
1067         if (cmd != DDI_DETACH)
1068                 return (DDI_FAILURE);
1069 
1070         /*
1071          * Destroy arenas and hash tables.
1072          */
1073         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074                 tl_transport_state_t *t = &tl_transports[i];
1075 
1076                 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077                         continue;
1078 
1079                 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080                 if (t->tr_serializer != NULL) {
1081                         tl_serializer_refrele(t->tr_serializer);
1082                         t->tr_serializer = NULL;
1083                 }
1084 
1085 #ifdef _ILP32
1086                 if (i & TL_SOCKET)
1087                         mod_hash_destroy_idhash(t->tr_ai_hash);
1088                 else
1089                         mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091                 mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093                 t->tr_ai_hash = NULL;
1094                 if (i & TL_SOCKET)
1095                         mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096                 else
1097                         mod_hash_destroy_hash(t->tr_addr_hash);
1098                 t->tr_addr_hash = NULL;
1099         }
1100 
1101         kmem_cache_destroy(tl_cache);
1102         tl_cache = NULL;
1103         id_space_destroy(tl_minors);
1104         tl_minors = NULL;
1105         ddi_remove_minor_node(devi, NULL);
1106         return (DDI_SUCCESS);
1107 }
1108 
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 {
1113 
1114         int retcode = DDI_FAILURE;
1115 
1116         switch (infocmd) {
1117 
1118         case DDI_INFO_DEVT2DEVINFO:
1119                 if (tl_dip != NULL) {
1120                         *result = (void *)tl_dip;
1121                         retcode = DDI_SUCCESS;
1122                 }
1123                 break;
1124 
1125         case DDI_INFO_DEVT2INSTANCE:
1126                 *result = (void *)0;
1127                 retcode = DDI_SUCCESS;
1128                 break;
1129 
1130         default:
1131                 break;
1132         }
1133         return (retcode);
1134 }
1135 
1136 /*
1137  * Endpoint reference management.
1138  */
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1141 {
1142         atomic_inc_32(&tep->te_refcnt);
1143 }
1144 
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1147 {
1148         ASSERT(tep->te_refcnt != 0);
1149 
1150         if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151                 tl_free(tep);
1152 }
1153 
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 {
1158         tl_endpt_t *tep = buf;
1159 
1160         bzero(tep, sizeof (tl_endpt_t));
1161         mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162         cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163         mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164         cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165         mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166 
1167         return (0);
1168 }
1169 
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1173 {
1174         tl_endpt_t *tep = buf;
1175 
1176         mutex_destroy(&tep->te_closelock);
1177         cv_destroy(&tep->te_closecv);
1178         mutex_destroy(&tep->te_srv_lock);
1179         cv_destroy(&tep->te_srv_cv);
1180         mutex_destroy(&tep->te_ser_lock);
1181 }
1182 
1183 static void
1184 tl_free(tl_endpt_t *tep)
1185 {
1186         ASSERT(tep->te_refcnt == 0);
1187         ASSERT(tep->te_transport != NULL);
1188         ASSERT(tep->te_rq == NULL);
1189         ASSERT(tep->te_wq == NULL);
1190         ASSERT(tep->te_ser != NULL);
1191         ASSERT(tep->te_ser_count == 0);
1192         ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1193 
1194         if (IS_SOCKET(tep)) {
1195                 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196                 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197                 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198                 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199         } else if (tep->te_abuf != NULL) {
1200                 kmem_free(tep->te_abuf, tep->te_alen);
1201                 tep->te_alen = -1; /* uninitialized */
1202                 tep->te_abuf = NULL;
1203         } else {
1204                 ASSERT(tep->te_alen == -1);
1205         }
1206 
1207         id_free(tl_minors, tep->te_minor);
1208         ASSERT(tep->te_credp == NULL);
1209 
1210         if (tep->te_hash_hndl != NULL)
1211                 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212 
1213         if (IS_COTS(tep)) {
1214                 TL_REMOVE_PEER(tep->te_conp);
1215                 TL_REMOVE_PEER(tep->te_oconp);
1216                 tl_serializer_refrele(tep->te_ser);
1217                 tep->te_ser = NULL;
1218                 ASSERT(tep->te_nicon == 0);
1219                 ASSERT(list_head(&tep->te_iconp) == NULL);
1220         } else {
1221                 ASSERT(tep->te_lastep == NULL);
1222                 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223                 ASSERT(tep->te_flowq == NULL);
1224         }
1225 
1226         ASSERT(tep->te_bufcid == 0);
1227         ASSERT(tep->te_timoutid == 0);
1228         bzero(&tep->te_ap, sizeof (tep->te_ap));
1229         tep->te_acceptor_id = 0;
1230 
1231         ASSERT(tep->te_closewait == 0);
1232         ASSERT(!tep->te_rsrv_active);
1233         ASSERT(!tep->te_wsrv_active);
1234         tep->te_closing = 0;
1235         tep->te_nowsrv = B_FALSE;
1236         tep->te_flag = 0;
1237 
1238         kmem_cache_free(tl_cache, tep);
1239 }
1240 
1241 /*
1242  * Allocate/free reference-counted wrappers for serializers.
1243  */
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1246 {
1247         tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248         serializer_t *ser;
1249 
1250         if (s == NULL)
1251                 return (NULL);
1252 
1253         ser = serializer_create(flags);
1254 
1255         if (ser == NULL) {
1256                 kmem_free(s, sizeof (tl_serializer_t));
1257                 return (NULL);
1258         }
1259 
1260         s->ts_refcnt = 1;
1261         s->ts_serializer = ser;
1262         return (s);
1263 }
1264 
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1267 {
1268         atomic_inc_32(&s->ts_refcnt);
1269 }
1270 
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1273 {
1274         if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275                 serializer_destroy(s->ts_serializer);
1276                 kmem_free(s, sizeof (tl_serializer_t));
1277         }
1278 }
1279 
1280 /*
1281  * Post a request on the endpoint serializer. For COTS transports keep track of
1282  * the number of pending requests.
1283  */
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 {
1287         if (IS_COTS(tep)) {
1288                 mutex_enter(&tep->te_ser_lock);
1289                 tep->te_ser_count++;
1290                 mutex_exit(&tep->te_ser_lock);
1291         }
1292         serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 }
1294 
1295 /*
1296  * Complete processing the request on the serializer. Decrement the counter for
1297  * pending requests for COTS transports.
1298  */
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1301 {
1302         if (IS_COTS(tep)) {
1303                 mutex_enter(&tep->te_ser_lock);
1304                 ASSERT(tep->te_ser_count != 0);
1305                 tep->te_ser_count--;
1306                 mutex_exit(&tep->te_ser_lock);
1307         }
1308 }
1309 
1310 /*
1311  * Hash management functions.
1312  */
1313 
1314 /*
1315  * Return TRUE if two addresses are equal, false otherwise.
1316  */
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 {
1320         return ((ap1->ta_alen > 0) &&
1321             (ap1->ta_alen == ap2->ta_alen) &&
1322             (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323             (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 }
1325 
1326 /*
1327  * This function is called whenever an endpoint is found in the hash table.
1328  */
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 {
1333         tl_refhold((tl_endpt_t *)val);
1334 }
1335 
1336 /*
1337  * Address hash function.
1338  */
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 {
1343         tl_addr_t *ap = (tl_addr_t *)key;
1344         size_t  len = ap->ta_alen;
1345         uchar_t *p = ap->ta_abuf;
1346         uint_t i, g;
1347 
1348         ASSERT((len > 0) && (p != NULL));
1349 
1350         for (i = ap->ta_zoneid; len -- != 0; p++) {
1351                 i = (i << 4) + (*p);
1352                 if ((g = (i & 0xf0000000U)) != 0) {
1353                         i ^= (g >> 24);
1354                         i ^= g;
1355                 }
1356         }
1357         return (i);
1358 }
1359 
1360 /*
1361  * This function is used by hash lookups. It compares two generic addresses.
1362  */
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 {
1366 #ifdef  DEBUG
1367         tl_addr_t *ap1 = (tl_addr_t *)key1;
1368         tl_addr_t *ap2 = (tl_addr_t *)key2;
1369 
1370         ASSERT(key1 != NULL);
1371         ASSERT(key2 != NULL);
1372 
1373         ASSERT(ap1->ta_abuf != NULL);
1374         ASSERT(ap2->ta_abuf != NULL);
1375         ASSERT(ap1->ta_alen > 0);
1376         ASSERT(ap2->ta_alen > 0);
1377 #endif
1378 
1379         return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 }
1381 
1382 /*
1383  * Prevent endpoint from closing if possible.
1384  * Return B_TRUE on success, B_FALSE on failure.
1385  */
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1388 {
1389         boolean_t rc = B_FALSE;
1390 
1391         mutex_enter(&tep->te_closelock);
1392         if (! tep->te_closing) {
1393                 ASSERT(tep->te_closewait == 0);
1394                 tep->te_closewait++;
1395                 rc = B_TRUE;
1396         }
1397         mutex_exit(&tep->te_closelock);
1398         return (rc);
1399 }
1400 
1401 /*
1402  * Allow endpoint to close if needed.
1403  */
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1406 {
1407         ASSERT(tep->te_closewait > 0);
1408         mutex_enter(&tep->te_closelock);
1409         ASSERT(tep->te_closewait == 1);
1410         tep->te_closewait--;
1411         cv_signal(&tep->te_closecv);
1412         mutex_exit(&tep->te_closelock);
1413 }
1414 
1415 /*
1416  * STREAMS open entry point.
1417  */
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t  *credp)
1421 {
1422         tl_endpt_t *tep;
1423         minor_t     minor = getminor(*devp);
1424 
1425         /*
1426          * Driver is called directly. Both CLONEOPEN and MODOPEN
1427          * are illegal
1428          */
1429         if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430                 return (ENXIO);
1431 
1432         if (rq->q_ptr != NULL)
1433                 return (0);
1434 
1435         /* Minor number should specify the mode used for the driver. */
1436         if ((minor >= TL_UNUSED))
1437                 return (ENXIO);
1438 
1439         if (oflag & SO_SOCKSTR) {
1440                 minor |= TL_SOCKET;
1441         }
1442 
1443         tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444         tep->te_refcnt = 1;
1445         tep->te_cpid = curproc->p_pid;
1446         rq->q_ptr = WR(rq)->q_ptr = tep;
1447         tep->te_state = TS_UNBND;
1448         tep->te_credp = credp;
1449         crhold(credp);
1450         tep->te_zoneid = getzoneid();
1451 
1452         tep->te_flag = minor & TL_MINOR_MASK;
1453         tep->te_transport = &tl_transports[minor];
1454 
1455         /* Allocate a unique minor number for this instance. */
1456         tep->te_minor = (minor_t)id_alloc(tl_minors);
1457 
1458         /* Reserve hash handle for bind(). */
1459         (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460 
1461         /* Transport-specific initialization */
1462         if (IS_COTS(tep)) {
1463                 /* Use private serializer */
1464                 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465 
1466                 /* Create list for pending connections */
1467                 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468                     offsetof(tl_icon_t, ti_node));
1469                 tep->te_qlen = 0;
1470                 tep->te_nicon = 0;
1471                 tep->te_oconp = NULL;
1472                 tep->te_conp = NULL;
1473         } else {
1474                 /* Use shared serializer */
1475                 tep->te_ser = tep->te_transport->tr_serializer;
1476                 bzero(&tep->te_flows, sizeof (list_node_t));
1477                 /* Create list for flow control */
1478                 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479                     offsetof(tl_endpt_t, te_flows));
1480                 tep->te_flowq = NULL;
1481                 tep->te_lastep = NULL;
1482 
1483         }
1484 
1485         /* Initialize endpoint address */
1486         if (IS_SOCKET(tep)) {
1487                 /* Socket-specific address handling. */
1488                 tep->te_alen = TL_SOUX_ADDRLEN;
1489                 tep->te_abuf = &tep->te_uxaddr;
1490                 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491                 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492         } else {
1493                 tep->te_alen = -1;
1494                 tep->te_abuf = NULL;
1495         }
1496 
1497         /* clone the driver */
1498         *devp = makedevice(getmajor(*devp), tep->te_minor);
1499 
1500         tep->te_rq = rq;
1501         tep->te_wq = WR(rq);
1502 
1503 #ifdef  _ILP32
1504         if (IS_SOCKET(tep))
1505                 tep->te_acceptor_id = tep->te_minor;
1506         else
1507                 tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509         tep->te_acceptor_id = tep->te_minor;
1510 #endif  /* _ILP32 */
1511 
1512 
1513         qprocson(rq);
1514 
1515         /*
1516          * Insert acceptor ID in the hash. The AI hash always sleeps on
1517          * insertion so insertion can't fail.
1518          */
1519         (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521             (mod_hash_val_t)tep);
1522 
1523         return (0);
1524 }
1525 
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag, cred_t *credp)
1529 {
1530         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531         tl_endpt_t *elp = NULL;
1532         queue_t *wq = tep->te_wq;
1533         int rc;
1534 
1535         ASSERT(wq == WR(rq));
1536 
1537         /*
1538          * Remove the endpoint from acceptor hash.
1539          */
1540         rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542             (mod_hash_val_t *)&elp);
1543         ASSERT(rc == 0 && tep == elp);
1544         if ((rc != 0) || (tep != elp)) {
1545                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1546                     SL_TRACE|SL_ERROR,
1547                     "tl_close:inconsistency in AI hash"));
1548         }
1549 
1550         /*
1551          * Wait till close is safe, then mark endpoint as closing.
1552          */
1553         mutex_enter(&tep->te_closelock);
1554         while (tep->te_closewait)
1555                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556         tep->te_closing = B_TRUE;
1557         /*
1558          * Will wait for the serializer part of the close to finish, so set
1559          * te_closewait now.
1560          */
1561         tep->te_closewait = 1;
1562         tep->te_nowsrv = B_FALSE;
1563         mutex_exit(&tep->te_closelock);
1564 
1565         /*
1566          * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567          * It is safe because close will wait for tl_close_ser to finish.
1568          */
1569         tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570 
1571         /*
1572          * Wait for the first phase of close to complete before qprocsoff().
1573          */
1574         mutex_enter(&tep->te_closelock);
1575         while (tep->te_closewait)
1576                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577         mutex_exit(&tep->te_closelock);
1578 
1579         qprocsoff(rq);
1580 
1581         if (tep->te_bufcid) {
1582                 qunbufcall(rq, tep->te_bufcid);
1583                 tep->te_bufcid = 0;
1584         }
1585         if (tep->te_timoutid) {
1586                 (void) quntimeout(rq, tep->te_timoutid);
1587                 tep->te_timoutid = 0;
1588         }
1589 
1590         /*
1591          * Finish close behind serializer.
1592          *
1593          * For a CLTS endpoint increase a refcount and continue close processing
1594          * with serializer protection. This processing may happen asynchronously
1595          * with the completion of tl_close().
1596          *
1597          * Fot a COTS endpoint wait before destroying tep since the serializer
1598          * may go away together with tep and we need to destroy serializer
1599          * outside of serializer context.
1600          */
1601         ASSERT(tep->te_closewait == 0);
1602         if (IS_COTS(tep))
1603                 tep->te_closewait = 1;
1604         else
1605                 tl_refhold(tep);
1606 
1607         tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608 
1609         /*
1610          * For connection-oriented transports wait for all serializer activity
1611          * to settle down.
1612          */
1613         if (IS_COTS(tep)) {
1614                 mutex_enter(&tep->te_closelock);
1615                 while (tep->te_closewait)
1616                         cv_wait(&tep->te_closecv, &tep->te_closelock);
1617                 mutex_exit(&tep->te_closelock);
1618         }
1619 
1620         crfree(tep->te_credp);
1621         tep->te_credp = NULL;
1622         tep->te_wq = NULL;
1623         tl_refrele(tep);
1624         /*
1625          * tep is likely to be destroyed now, so can't reference it any more.
1626          */
1627 
1628         rq->q_ptr = wq->q_ptr = NULL;
1629         return (0);
1630 }
1631 
1632 /*
1633  * First phase of close processing done behind the serializer.
1634  *
1635  * Do not drop the reference in the end - tl_close() wants this reference to
1636  * stay.
1637  */
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 {
1642         ASSERT(tep->te_closing);
1643         ASSERT(tep->te_closewait == 1);
1644         ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645 
1646         tep->te_flag |= TL_CLOSE_SER;
1647 
1648         /*
1649          * Drain out all messages on queue except for TL_TICOTS where the
1650          * abortive release semantics permit discarding of data on close
1651          */
1652         if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653                 tl_wsrv_ser(NULL, tep);
1654         }
1655 
1656         /* Remove address from hash table. */
1657         tl_addr_unbind(tep);
1658         /*
1659          * qprocsoff() gets confused when q->q_next is not NULL on the write
1660          * queue of the driver, so clear these before qprocsoff() is called.
1661          * Also clear q_next for the peer since this queue is going away.
1662          */
1663         if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664                 tl_endpt_t *peer_tep = tep->te_conp;
1665 
1666                 tep->te_wq->q_next = NULL;
1667                 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668                         peer_tep->te_wq->q_next = NULL;
1669         }
1670 
1671         tep->te_rq = NULL;
1672 
1673         /* wake up tl_close() */
1674         tl_closeok(tep);
1675         tl_serializer_exit(tep);
1676 }
1677 
1678 /*
1679  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680  * the reference for CLTS.
1681  *
1682  * Called from serializer. Should drop reference count for CLTS only.
1683  */
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 {
1688         ASSERT(tep->te_closing);
1689         IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690         IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691 
1692         tep->te_state = -1;  /* Uninitialized */
1693         if (IS_COTS(tep)) {
1694                 tl_co_unconnect(tep);
1695         } else {
1696                 /* Connectionless specific cleanup */
1697                 TL_REMOVE_PEER(tep->te_lastep);
1698                 /*
1699                  * Backenable anybody that is flow controlled waiting for
1700                  * this endpoint.
1701                  */
1702                 tl_cl_backenable(tep);
1703                 if (tep->te_flowq != NULL) {
1704                         list_remove(&(tep->te_flowq->te_flowlist), tep);
1705                         tep->te_flowq = NULL;
1706                 }
1707         }
1708 
1709         tl_serializer_exit(tep);
1710         if (IS_COTS(tep))
1711                 tl_closeok(tep);
1712         else
1713                 tl_refrele(tep);
1714 }
1715 
1716 /*
1717  * STREAMS write-side put procedure.
1718  * Enter serializer for most of the processing.
1719  *
1720  * The T_CONN_REQ is processed outside of serializer.
1721  */
1722 static void
1723 tl_wput(queue_t *wq, mblk_t *mp)
1724 {
1725         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
1726         ssize_t                 msz = MBLKL(mp);
1727         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
1728         tlproc_t                *tl_proc = NULL;
1729 
1730         switch (DB_TYPE(mp)) {
1731         case M_DATA:
1732                 /* Only valid for connection-oriented transports */
1733                 if (IS_CLTS(tep)) {
1734                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1735                             SL_TRACE|SL_ERROR,
1736                             "tl_wput:M_DATA invalid for ticlts driver"));
1737                         tl_merror(wq, mp, EPROTO);
1738                         return;
1739                 }
1740                 tl_proc = tl_wput_data_ser;
1741                 break;
1742 
1743         case M_IOCTL:
1744                 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745                 case TL_IOC_CREDOPT:
1746                         /* FALLTHROUGH */
1747                 case TL_IOC_UCREDOPT:
1748                         /*
1749                          * Serialize endpoint state change.
1750                          */
1751                         tl_proc = tl_do_ioctl_ser;
1752                         break;
1753 
1754                 default:
1755                         miocnak(wq, mp, 0, EINVAL);
1756                         return;
1757                 }
1758                 break;
1759 
1760         case M_FLUSH:
1761                 /*
1762                  * do canonical M_FLUSH processing
1763                  */
1764                 if (*mp->b_rptr & FLUSHW) {
1765                         flushq(wq, FLUSHALL);
1766                         *mp->b_rptr &= ~FLUSHW;
1767                 }
1768                 if (*mp->b_rptr & FLUSHR) {
1769                         flushq(RD(wq), FLUSHALL);
1770                         qreply(wq, mp);
1771                 } else {
1772                         freemsg(mp);
1773                 }
1774                 return;
1775 
1776         case M_PROTO:
1777                 if (msz < sizeof (prim->type)) {
1778                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1779                             SL_TRACE|SL_ERROR,
1780                             "tl_wput:M_PROTO data too short"));
1781                         tl_merror(wq, mp, EPROTO);
1782                         return;
1783                 }
1784                 switch (prim->type) {
1785                 case T_OPTMGMT_REQ:
1786                 case T_SVR4_OPTMGMT_REQ:
1787                         /*
1788                          * Process TPI option management requests immediately
1789                          * in put procedure regardless of in-order processing
1790                          * of already queued messages.
1791                          * (Note: This driver supports AF_UNIX socket
1792                          * implementation.  Unless we implement this processing,
1793                          * setsockopt() on socket endpoint will block on flow
1794                          * controlled endpoints which it should not. That is
1795                          * required for successful execution of VSU socket tests
1796                          * and is consistent with BSD socket behavior).
1797                          */
1798                         tl_optmgmt(wq, mp);
1799                         return;
1800                 case O_T_BIND_REQ:
1801                 case T_BIND_REQ:
1802                         tl_proc = tl_bind_ser;
1803                         break;
1804                 case T_CONN_REQ:
1805                         if (IS_CLTS(tep)) {
1806                                 tl_merror(wq, mp, EPROTO);
1807                                 return;
1808                         }
1809                         tl_conn_req(wq, mp);
1810                         return;
1811                 case T_DATA_REQ:
1812                 case T_OPTDATA_REQ:
1813                 case T_EXDATA_REQ:
1814                 case T_ORDREL_REQ:
1815                         tl_proc = tl_putq_ser;
1816                         break;
1817                 case T_UNITDATA_REQ:
1818                         if (IS_COTS(tep) ||
1819                             (msz < sizeof (struct T_unitdata_req))) {
1820                                 tl_merror(wq, mp, EPROTO);
1821                                 return;
1822                         }
1823                         if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824                                 tl_proc = tl_unitdata_ser;
1825                         } else {
1826                                 tl_proc = tl_putq_ser;
1827                         }
1828                         break;
1829                 default:
1830                         /*
1831                          * process in service procedure if message already
1832                          * queued (maintain in-order processing)
1833                          */
1834                         if (wq->q_first != NULL) {
1835                                 tl_proc = tl_putq_ser;
1836                         } else {
1837                                 tl_proc = tl_wput_ser;
1838                         }
1839                         break;
1840                 }
1841                 break;
1842 
1843         case M_PCPROTO:
1844                 /*
1845                  * Check that the message has enough data to figure out TPI
1846                  * primitive.
1847                  */
1848                 if (msz < sizeof (prim->type)) {
1849                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1850                             SL_TRACE|SL_ERROR,
1851                             "tl_wput:M_PCROTO data too short"));
1852                         tl_merror(wq, mp, EPROTO);
1853                         return;
1854                 }
1855                 switch (prim->type) {
1856                 case T_CAPABILITY_REQ:
1857                         tl_capability_req(mp, tep);
1858                         return;
1859                 case T_INFO_REQ:
1860                         tl_proc = tl_info_req_ser;
1861                         break;
1862                 case T_ADDR_REQ:
1863                         tl_proc = tl_addr_req_ser;
1864                         break;
1865 
1866                 default:
1867                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1868                             SL_TRACE|SL_ERROR,
1869                             "tl_wput:unknown TPI msg primitive"));
1870                         tl_merror(wq, mp, EPROTO);
1871                         return;
1872                 }
1873                 break;
1874         default:
1875                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1876                     "tl_wput:default:unexpected Streams message"));
1877                 freemsg(mp);
1878                 return;
1879         }
1880 
1881         /*
1882          * Continue processing via serializer.
1883          */
1884         ASSERT(tl_proc != NULL);
1885         tl_refhold(tep);
1886         tl_serializer_enter(tep, tl_proc, mp);
1887 }
1888 
1889 /*
1890  * Place message on the queue while preserving order.
1891  */
1892 static void
1893 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1894 {
1895         if (tep->te_closing) {
1896                 tl_wput_ser(mp, tep);
1897         } else {
1898                 TL_PUTQ(tep, mp);
1899                 tl_serializer_exit(tep);
1900                 tl_refrele(tep);
1901         }
1902 
1903 }
1904 
1905 static void
1906 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1907 {
1908         ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1909 
1910         switch (DB_TYPE(mp)) {
1911         case M_DATA:
1912                 tl_data(mp, tep);
1913                 break;
1914         case M_PROTO:
1915                 tl_do_proto(mp, tep);
1916                 break;
1917         default:
1918                 freemsg(mp);
1919                 break;
1920         }
1921 }
1922 
1923 /*
1924  * Write side put procedure called from serializer.
1925  */
1926 static void
1927 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1928 {
1929         tl_wput_common_ser(mp, tep);
1930         tl_serializer_exit(tep);
1931         tl_refrele(tep);
1932 }
1933 
1934 /*
1935  * M_DATA processing. Called from serializer.
1936  */
1937 static void
1938 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1939 {
1940         tl_endpt_t      *peer_tep = tep->te_conp;
1941         queue_t         *peer_rq;
1942 
1943         ASSERT(DB_TYPE(mp) == M_DATA);
1944         ASSERT(IS_COTS(tep));
1945 
1946         IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1947 
1948         /*
1949          * fastpath for data. Ignore flow control if tep is closing.
1950          */
1951         if ((peer_tep != NULL) &&
1952             !peer_tep->te_closing &&
1953             ((tep->te_state == TS_DATA_XFER) ||
1954             (tep->te_state == TS_WREQ_ORDREL)) &&
1955             (tep->te_wq != NULL) &&
1956             (tep->te_wq->q_first == NULL) &&
1957             ((peer_tep->te_state == TS_DATA_XFER) ||
1958             (peer_tep->te_state == TS_WREQ_ORDREL))  &&
1959             ((peer_rq = peer_tep->te_rq) != NULL) &&
1960             (canputnext(peer_rq) || tep->te_closing)) {
1961                 putnext(peer_rq, mp);
1962         } else if (tep->te_closing) {
1963                 /*
1964                  * It is possible that by the time we got here tep started to
1965                  * close. If the write queue is not empty, and the state is
1966                  * TS_DATA_XFER the data should be delivered in order, so we
1967                  * call putq() instead of freeing the data.
1968                  */
1969                 if ((tep->te_wq != NULL) &&
1970                     ((tep->te_state == TS_DATA_XFER) ||
1971                     (tep->te_state == TS_WREQ_ORDREL))) {
1972                         TL_PUTQ(tep, mp);
1973                 } else {
1974                         freemsg(mp);
1975                 }
1976         } else {
1977                 TL_PUTQ(tep, mp);
1978         }
1979 
1980         tl_serializer_exit(tep);
1981         tl_refrele(tep);
1982 }
1983 
1984 /*
1985  * Write side service routine.
1986  *
1987  * All actual processing happens within serializer which is entered
1988  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1989  * messages that need processing may have arrived, so tl_wsrv repeats until
1990  * queue is empty or te_nowsrv is set.
1991  */
1992 static void
1993 tl_wsrv(queue_t *wq)
1994 {
1995         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1996 
1997         while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1998                 mutex_enter(&tep->te_srv_lock);
1999                 ASSERT(tep->te_wsrv_active == B_FALSE);
2000                 tep->te_wsrv_active = B_TRUE;
2001                 mutex_exit(&tep->te_srv_lock);
2002 
2003                 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2004 
2005                 /*
2006                  * Wait for serializer job to complete.
2007                  */
2008                 mutex_enter(&tep->te_srv_lock);
2009                 while (tep->te_wsrv_active) {
2010                         cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2011                 }
2012                 cv_signal(&tep->te_srv_cv);
2013                 mutex_exit(&tep->te_srv_lock);
2014         }
2015 }
2016 
2017 /*
2018  * Serialized write side processing of the STREAMS queue.
2019  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2020  * is NULL.
2021  */
2022 static void
2023 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2024 {
2025         mblk_t *mp;
2026         queue_t *wq = tep->te_wq;
2027 
2028         ASSERT(wq != NULL);
2029         while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2030                 tl_wput_common_ser(mp, tep);
2031         }
2032 
2033         /*
2034          * Wakeup service routine unless called from close.
2035          * If ser_mp is specified, the caller is tl_wsrv().
2036          * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2037          * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2038          * be no matching tl_serializer_exit() in this case.
2039          * Also, there is no need to wakeup anyone since tl_close_ser() is not
2040          * waiting on te_srv_cv.
2041          */
2042         if (ser_mp != NULL) {
2043                 /*
2044                  * We are called from tl_wsrv.
2045                  */
2046                 mutex_enter(&tep->te_srv_lock);
2047                 ASSERT(tep->te_wsrv_active);
2048                 tep->te_wsrv_active = B_FALSE;
2049                 cv_signal(&tep->te_srv_cv);
2050                 mutex_exit(&tep->te_srv_lock);
2051                 tl_serializer_exit(tep);
2052         }
2053 }
2054 
2055 /*
2056  * Called when the stream is backenabled. Enter serializer and qenable everyone
2057  * flow controlled by tep.
2058  *
2059  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2060  * is possible that two instances of tl_rsrv will be running reusing the same
2061  * rsrv mblk.
2062  */
2063 static void
2064 tl_rsrv(queue_t *rq)
2065 {
2066         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2067 
2068         ASSERT(rq->q_first == NULL);
2069         ASSERT(tep->te_rsrv_active == 0);
2070 
2071         tep->te_rsrv_active = B_TRUE;
2072         tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2073         /*
2074          * Wait for serializer job to complete.
2075          */
2076         mutex_enter(&tep->te_srv_lock);
2077         while (tep->te_rsrv_active) {
2078                 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2079         }
2080         cv_signal(&tep->te_srv_cv);
2081         mutex_exit(&tep->te_srv_lock);
2082 }
2083 
2084 /* ARGSUSED */
2085 static void
2086 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2087 {
2088         tl_endpt_t *peer_tep;
2089 
2090         if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2091                 tl_cl_backenable(tep);
2092         } else if (
2093             IS_COTS(tep) &&
2094             ((peer_tep = tep->te_conp) != NULL) &&
2095             !peer_tep->te_closing &&
2096             ((tep->te_state == TS_DATA_XFER) ||
2097             (tep->te_state == TS_WIND_ORDREL)||
2098             (tep->te_state == TS_WREQ_ORDREL))) {
2099                 TL_QENABLE(peer_tep);
2100         }
2101 
2102         /*
2103          * Wakeup read side service routine.
2104          */
2105         mutex_enter(&tep->te_srv_lock);
2106         ASSERT(tep->te_rsrv_active);
2107         tep->te_rsrv_active = B_FALSE;
2108         cv_signal(&tep->te_srv_cv);
2109         mutex_exit(&tep->te_srv_lock);
2110         tl_serializer_exit(tep);
2111 }
2112 
2113 /*
2114  * process M_PROTO messages. Always called from serializer.
2115  */
2116 static void
2117 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2118 {
2119         ssize_t                 msz = MBLKL(mp);
2120         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
2121 
2122         /* Message size was validated by tl_wput(). */
2123         ASSERT(msz >= sizeof (prim->type));
2124 
2125         switch (prim->type) {
2126         case T_UNBIND_REQ:
2127                 tl_unbind(mp, tep);
2128                 break;
2129 
2130         case T_ADDR_REQ:
2131                 tl_addr_req(mp, tep);
2132                 break;
2133 
2134         case O_T_CONN_RES:
2135         case T_CONN_RES:
2136                 if (IS_CLTS(tep)) {
2137                         tl_merror(tep->te_wq, mp, EPROTO);
2138                         break;
2139                 }
2140                 tl_conn_res(mp, tep);
2141                 break;
2142 
2143         case T_DISCON_REQ:
2144                 if (IS_CLTS(tep)) {
2145                         tl_merror(tep->te_wq, mp, EPROTO);
2146                         break;
2147                 }
2148                 tl_discon_req(mp, tep);
2149                 break;
2150 
2151         case T_DATA_REQ:
2152                 if (IS_CLTS(tep)) {
2153                         tl_merror(tep->te_wq, mp, EPROTO);
2154                         break;
2155                 }
2156                 tl_data(mp, tep);
2157                 break;
2158 
2159         case T_OPTDATA_REQ:
2160                 if (IS_CLTS(tep)) {
2161                         tl_merror(tep->te_wq, mp, EPROTO);
2162                         break;
2163                 }
2164                 tl_data(mp, tep);
2165                 break;
2166 
2167         case T_EXDATA_REQ:
2168                 if (IS_CLTS(tep)) {
2169                         tl_merror(tep->te_wq, mp, EPROTO);
2170                         break;
2171                 }
2172                 tl_exdata(mp, tep);
2173                 break;
2174 
2175         case T_ORDREL_REQ:
2176                 if (! IS_COTSORD(tep)) {
2177                         tl_merror(tep->te_wq, mp, EPROTO);
2178                         break;
2179                 }
2180                 tl_ordrel(mp, tep);
2181                 break;
2182 
2183         case T_UNITDATA_REQ:
2184                 if (IS_COTS(tep)) {
2185                         tl_merror(tep->te_wq, mp, EPROTO);
2186                         break;
2187                 }
2188                 tl_unitdata(mp, tep);
2189                 break;
2190 
2191         default:
2192                 tl_merror(tep->te_wq, mp, EPROTO);
2193                 break;
2194         }
2195 }
2196 
2197 /*
2198  * Process ioctl from serializer.
2199  * This is a wrapper around tl_do_ioctl().
2200  */
2201 static void
2202 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2203 {
2204         if (! tep->te_closing)
2205                 tl_do_ioctl(mp, tep);
2206         else
2207                 freemsg(mp);
2208 
2209         tl_serializer_exit(tep);
2210         tl_refrele(tep);
2211 }
2212 
2213 static void
2214 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2215 {
2216         struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2217         int cmd = iocbp->ioc_cmd;
2218         queue_t *wq = tep->te_wq;
2219         int error;
2220         int thisopt, otheropt;
2221 
2222         ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2223 
2224         switch (cmd) {
2225         case TL_IOC_CREDOPT:
2226                 if (cmd == TL_IOC_CREDOPT) {
2227                         thisopt = TL_SETCRED;
2228                         otheropt = TL_SETUCRED;
2229                 } else {
2230                         /* FALLTHROUGH */
2231         case TL_IOC_UCREDOPT:
2232                         thisopt = TL_SETUCRED;
2233                         otheropt = TL_SETCRED;
2234                 }
2235                 /*
2236                  * The credentials passing does not apply to sockets.
2237                  * Only one of the cred options can be set at a given time.
2238                  */
2239                 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2240                         miocnak(wq, mp, 0, EINVAL);
2241                         return;
2242                 }
2243 
2244                 /*
2245                  * Turn on generation of credential options for
2246                  * T_conn_req, T_conn_con, T_unidata_ind.
2247                  */
2248                 error = miocpullup(mp, sizeof (uint32_t));
2249                 if (error != 0) {
2250                         miocnak(wq, mp, 0, error);
2251                         return;
2252                 }
2253                 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2254                         miocnak(wq, mp, 0, EINVAL);
2255                         return;
2256                 }
2257 
2258                 if (*(uint32_t *)mp->b_cont->b_rptr)
2259                         tep->te_flag |= thisopt;
2260                 else
2261                         tep->te_flag &= ~thisopt;
2262 
2263                 miocack(wq, mp, 0, 0);
2264                 break;
2265 
2266         default:
2267                 /* Should not be here */
2268                 miocnak(wq, mp, 0, EINVAL);
2269                 break;
2270         }
2271 }
2272 
2273 
2274 /*
2275  * send T_ERROR_ACK
2276  * Note: assumes enough memory or caller passed big enough mp
2277  *      - no recovery from allocb failures
2278  */
2279 
2280 static void
2281 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2282     t_scalar_t unix_err, t_scalar_t type)
2283 {
2284         struct T_error_ack *err_ack;
2285         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2286             M_PCPROTO, T_ERROR_ACK);
2287 
2288         if (ackmp == NULL) {
2289                 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2290                     "tl_error_ack:out of mblk memory"));
2291                 tl_merror(wq, NULL, ENOSR);
2292                 return;
2293         }
2294         err_ack = (struct T_error_ack *)ackmp->b_rptr;
2295         err_ack->ERROR_prim = type;
2296         err_ack->TLI_error = tli_err;
2297         err_ack->UNIX_error = unix_err;
2298 
2299         /*
2300          * send error ack message
2301          */
2302         qreply(wq, ackmp);
2303 }
2304 
2305 
2306 
2307 /*
2308  * send T_OK_ACK
2309  * Note: assumes enough memory or caller passed big enough mp
2310  *      - no recovery from allocb failures
2311  */
2312 static void
2313 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2314 {
2315         struct T_ok_ack *ok_ack;
2316         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2317             M_PCPROTO, T_OK_ACK);
2318 
2319         if (ackmp == NULL) {
2320                 tl_merror(wq, NULL, ENOMEM);
2321                 return;
2322         }
2323 
2324         ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2325         ok_ack->CORRECT_prim = type;
2326 
2327         (void) qreply(wq, ackmp);
2328 }
2329 
2330 /*
2331  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2332  * This is a wrapper around tl_bind().
2333  */
2334 static void
2335 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2336 {
2337         if (! tep->te_closing)
2338                 tl_bind(mp, tep);
2339         else
2340                 freemsg(mp);
2341 
2342         tl_serializer_exit(tep);
2343         tl_refrele(tep);
2344 }
2345 
2346 /*
2347  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2348  * Assumes that the endpoint is in the unbound.
2349  */
2350 static void
2351 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2352 {
2353         queue_t                 *wq = tep->te_wq;
2354         struct T_bind_ack       *b_ack;
2355         struct T_bind_req       *bind = (struct T_bind_req *)mp->b_rptr;
2356         mblk_t                  *ackmp, *bamp;
2357         soux_addr_t             ux_addr;
2358         t_uscalar_t             qlen = 0;
2359         t_scalar_t              alen, aoff;
2360         tl_addr_t               addr_req;
2361         void                    *addr_startp;
2362         ssize_t                 msz = MBLKL(mp), basize;
2363         t_scalar_t              tli_err = 0, unix_err = 0;
2364         t_scalar_t              save_prim_type = bind->PRIM_type;
2365         t_scalar_t              save_state = tep->te_state;
2366 
2367         if (tep->te_state != TS_UNBND) {
2368                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2369                     SL_TRACE|SL_ERROR,
2370                     "tl_wput:bind_request:out of state, state=%d",
2371                     tep->te_state));
2372                 tli_err = TOUTSTATE;
2373                 goto error;
2374         }
2375 
2376         if (msz < sizeof (struct T_bind_req)) {
2377                 tli_err = TSYSERR; unix_err = EINVAL;
2378                 goto error;
2379         }
2380 
2381         tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2382 
2383         ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2384             (bind->PRIM_type == T_BIND_REQ));
2385 
2386         alen = bind->ADDR_length;
2387         aoff = bind->ADDR_offset;
2388 
2389         /* negotiate max conn req pending */
2390         if (IS_COTS(tep)) {
2391                 qlen = bind->CONIND_number;
2392                 if (qlen > tl_maxqlen)
2393                         qlen = tl_maxqlen;
2394         }
2395 
2396         /*
2397          * Reserve hash handle. It can only be NULL if the endpoint is unbound
2398          * and bound again.
2399          */
2400         if ((tep->te_hash_hndl == NULL) &&
2401             ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2402             mod_hash_reserve_nosleep(tep->te_addrhash,
2403             &tep->te_hash_hndl) != 0) {
2404                 tli_err = TSYSERR; unix_err = ENOSR;
2405                 goto error;
2406         }
2407 
2408         /*
2409          * Verify address correctness.
2410          */
2411         if (IS_SOCKET(tep)) {
2412                 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2413 
2414                 if ((alen != TL_SOUX_ADDRLEN) ||
2415                     (aoff < 0) ||
2416                     (aoff + alen > msz)) {
2417                         (void) (STRLOG(TL_ID, tep->te_minor,
2418                             1, SL_TRACE|SL_ERROR,
2419                             "tl_bind: invalid socket addr"));
2420                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2421                         tli_err = TSYSERR; unix_err = EINVAL;
2422                         goto error;
2423                 }
2424                 /* Copy address from message to local buffer. */
2425                 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2426                 /*
2427                  * Check that we got correct address from sockets
2428                  */
2429                 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2430                     (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2431                         (void) (STRLOG(TL_ID, tep->te_minor,
2432                             1, SL_TRACE|SL_ERROR,
2433                             "tl_bind: invalid socket magic"));
2434                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2435                         tli_err = TSYSERR; unix_err = EINVAL;
2436                         goto error;
2437                 }
2438                 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2439                     (ux_addr.soua_vp != NULL)) {
2440                         (void) (STRLOG(TL_ID, tep->te_minor,
2441                             1, SL_TRACE|SL_ERROR,
2442                             "tl_bind: implicit addr non-empty"));
2443                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2444                         tli_err = TSYSERR; unix_err = EINVAL;
2445                         goto error;
2446                 }
2447                 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2448                     (ux_addr.soua_vp == NULL)) {
2449                         (void) (STRLOG(TL_ID, tep->te_minor,
2450                             1, SL_TRACE|SL_ERROR,
2451                             "tl_bind: explicit addr empty"));
2452                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2453                         tli_err = TSYSERR; unix_err = EINVAL;
2454                         goto error;
2455                 }
2456         } else {
2457                 if ((alen > 0) && ((aoff < 0) ||
2458                     ((ssize_t)(aoff + alen) > msz) ||
2459                     ((aoff + alen) < 0))) {
2460                         (void) (STRLOG(TL_ID, tep->te_minor,
2461                             1, SL_TRACE|SL_ERROR,
2462                             "tl_bind: invalid message"));
2463                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2464                         tli_err = TSYSERR; unix_err = EINVAL;
2465                         goto error;
2466                 }
2467                 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2468                         (void) (STRLOG(TL_ID, tep->te_minor,
2469                             1, SL_TRACE|SL_ERROR,
2470                             "tl_bind: bad addr in  message"));
2471                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2472                         tli_err = TBADADDR;
2473                         goto error;
2474                 }
2475 #ifdef DEBUG
2476                 /*
2477                  * Mild form of ASSERT()ion to detect broken TPI apps.
2478                  * if (! assertion)
2479                  *      log warning;
2480                  */
2481                 if (! ((alen == 0 && aoff == 0) ||
2482                         (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2483                         (void) (STRLOG(TL_ID, tep->te_minor,
2484                                     3, SL_TRACE|SL_ERROR,
2485                                     "tl_bind: addr overlaps TPI message"));
2486                 }
2487 #endif
2488         }
2489 
2490         /*
2491          * Bind the address provided or allocate one if requested.
2492          * Allow rebinds with a new qlen value.
2493          */
2494         if (IS_SOCKET(tep)) {
2495                 /*
2496                  * For anonymous requests the te_ap is already set up properly
2497                  * so use minor number as an address.
2498                  * For explicit requests need to check whether the address is
2499                  * already in use.
2500                  */
2501                 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2502                         int rc;
2503 
2504                         if (tep->te_flag & TL_ADDRHASHED) {
2505                                 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2506                                 if (tep->te_vp == ux_addr.soua_vp)
2507                                         goto skip_addr_bind;
2508                                 else /* Rebind to a new address. */
2509                                         tl_addr_unbind(tep);
2510                         }
2511                         /*
2512                          * Insert address in the hash if it is not already
2513                          * there.  Since we use preallocated handle, the insert
2514                          * can fail only if the key is already present.
2515                          */
2516                         rc = mod_hash_insert_reserve(tep->te_addrhash,
2517                             (mod_hash_key_t)ux_addr.soua_vp,
2518                             (mod_hash_val_t)tep, tep->te_hash_hndl);
2519 
2520                         if (rc != 0) {
2521                                 ASSERT(rc == MH_ERR_DUPLICATE);
2522                                 /*
2523                                  * Violate O_T_BIND_REQ semantics and fail with
2524                                  * TADDRBUSY - sockets will not use any address
2525                                  * other than supplied one for explicit binds.
2526                                  */
2527                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2528                                     SL_TRACE|SL_ERROR,
2529                                     "tl_bind:requested addr %p is busy",
2530                                     ux_addr.soua_vp));
2531                                 tli_err = TADDRBUSY; unix_err = 0;
2532                                 goto error;
2533                         }
2534                         tep->te_uxaddr = ux_addr;
2535                         tep->te_flag |= TL_ADDRHASHED;
2536                         tep->te_hash_hndl = NULL;
2537                 }
2538         } else if (alen == 0) {
2539                 /*
2540                  * assign any free address
2541                  */
2542                 if (! tl_get_any_addr(tep, NULL)) {
2543                         (void) (STRLOG(TL_ID, tep->te_minor,
2544                             1, SL_TRACE|SL_ERROR,
2545                             "tl_bind:failed to get buffer for any "
2546                             "address"));
2547                         tli_err = TSYSERR; unix_err = ENOSR;
2548                         goto error;
2549                 }
2550         } else {
2551                 addr_req.ta_alen = alen;
2552                 addr_req.ta_abuf = (mp->b_rptr + aoff);
2553                 addr_req.ta_zoneid = tep->te_zoneid;
2554 
2555                 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2556                 if (tep->te_abuf == NULL) {
2557                         tli_err = TSYSERR; unix_err = ENOSR;
2558                         goto error;
2559                 }
2560                 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2561                 tep->te_alen = alen;
2562 
2563                 if (mod_hash_insert_reserve(tep->te_addrhash,
2564                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2565                     tep->te_hash_hndl) != 0) {
2566                         if (save_prim_type == T_BIND_REQ) {
2567                                 /*
2568                                  * The bind semantics for this primitive
2569                                  * require a failure if the exact address
2570                                  * requested is busy
2571                                  */
2572                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2573                                     SL_TRACE|SL_ERROR,
2574                                     "tl_bind:requested addr is busy"));
2575                                 tli_err = TADDRBUSY; unix_err = 0;
2576                                 goto error;
2577                         }
2578 
2579                         /*
2580                          * O_T_BIND_REQ semantics say if address if requested
2581                          * address is busy, bind to any available free address
2582                          */
2583                         if (! tl_get_any_addr(tep, &addr_req)) {
2584                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2585                                     SL_TRACE|SL_ERROR,
2586                                     "tl_bind:unable to get any addr buf"));
2587                                 tli_err = TSYSERR; unix_err = ENOMEM;
2588                                 goto error;
2589                         }
2590                 } else {
2591                         tep->te_flag |= TL_ADDRHASHED;
2592                         tep->te_hash_hndl = NULL;
2593                 }
2594         }
2595 
2596         ASSERT(tep->te_alen >= 0);
2597 
2598 skip_addr_bind:
2599         /*
2600          * prepare T_BIND_ACK TPI message
2601          */
2602         basize = sizeof (struct T_bind_ack) + tep->te_alen;
2603         bamp = reallocb(mp, basize, 0);
2604         if (bamp == NULL) {
2605                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2606                     "tl_wput:tl_bind: allocb failed"));
2607                 /*
2608                  * roll back state changes
2609                  */
2610                 tl_addr_unbind(tep);
2611                 tep->te_state = TS_UNBND;
2612                 tl_memrecover(wq, mp, basize);
2613                 return;
2614         }
2615 
2616         DB_TYPE(bamp) = M_PCPROTO;
2617         bamp->b_wptr = bamp->b_rptr + basize;
2618         b_ack = (struct T_bind_ack *)bamp->b_rptr;
2619         b_ack->PRIM_type = T_BIND_ACK;
2620         b_ack->CONIND_number = qlen;
2621         b_ack->ADDR_length = tep->te_alen;
2622         b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2623         addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2624         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2625 
2626         if (IS_COTS(tep)) {
2627                 tep->te_qlen = qlen;
2628                 if (qlen > 0)
2629                         tep->te_flag |= TL_LISTENER;
2630         }
2631 
2632         tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2633         /*
2634          * send T_BIND_ACK message
2635          */
2636         (void) qreply(wq, bamp);
2637         return;
2638 
2639 error:
2640         ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2641         if (ackmp == NULL) {
2642                 /*
2643                  * roll back state changes
2644                  */
2645                 tep->te_state = save_state;
2646                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2647                 return;
2648         }
2649         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2650         tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2651 }
2652 
2653 /*
2654  * Process T_UNBIND_REQ.
2655  * Called from serializer.
2656  */
2657 static void
2658 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2659 {
2660         queue_t *wq;
2661         mblk_t *ackmp;
2662 
2663         if (tep->te_closing) {
2664                 freemsg(mp);
2665                 return;
2666         }
2667 
2668         wq = tep->te_wq;
2669 
2670         /*
2671          * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2672          * ==> allocate for T_ERROR_ACK (known max)
2673          */
2674         if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2675                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2676                 return;
2677         }
2678         /*
2679          * memory resources committed
2680          * Note: no message validation. T_UNBIND_REQ message is
2681          * same size as PRIM_type field so already verified earlier.
2682          */
2683 
2684         /*
2685          * validate state
2686          */
2687         if (tep->te_state != TS_IDLE) {
2688                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2689                     SL_TRACE|SL_ERROR,
2690                     "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2691                     tep->te_state));
2692                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2693                 return;
2694         }
2695         tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2696 
2697         /*
2698          * TPI says on T_UNBIND_REQ:
2699          *    send up a M_FLUSH to flush both
2700          *    read and write queues
2701          */
2702         (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2703 
2704         if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2705             tep->te_magic != SOU_MAGIC_EXPLICIT) {
2706 
2707                 /*
2708                  * Sockets use bind with qlen==0 followed by bind() to
2709                  * the same address with qlen > 0 for listeners.
2710                  * We allow rebind with a new qlen value.
2711                  */
2712                 tl_addr_unbind(tep);
2713         }
2714 
2715         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2716         /*
2717          * send  T_OK_ACK
2718          */
2719         tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2720 }
2721 
2722 
2723 /*
2724  * Option management code from drv/ip is used here
2725  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2726  *      database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2727  *      However, that is what we want as that option is 'unorthodox'
2728  *      and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2729  *      and not in T_SVR4_OPTMGMT_REQ/ACK
2730  * Note2: use of optcom_req means this routine is an exception to
2731  *       recovery from allocb() failures.
2732  */
2733 
2734 static void
2735 tl_optmgmt(queue_t *wq, mblk_t *mp)
2736 {
2737         tl_endpt_t *tep;
2738         mblk_t *ackmp;
2739         union T_primitives *prim;
2740         cred_t *cr;
2741 
2742         tep = (tl_endpt_t *)wq->q_ptr;
2743         prim = (union T_primitives *)mp->b_rptr;
2744 
2745         /*
2746          * All Solaris components should pass a db_credp
2747          * for this TPI message, hence we ASSERT.
2748          * But in case there is some other M_PROTO that looks
2749          * like a TPI message sent by some other kernel
2750          * component, we check and return an error.
2751          */
2752         cr = msg_getcred(mp, NULL);
2753         ASSERT(cr != NULL);
2754         if (cr == NULL) {
2755                 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2756                 return;
2757         }
2758 
2759         /*  all states OK for AF_UNIX options ? */
2760         if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2761             prim->type == T_SVR4_OPTMGMT_REQ) {
2762                 /*
2763                  * Broken TLI semantics that options can only be managed
2764                  * in TS_IDLE state. Needed for Sparc ABI test suite that
2765                  * tests this TLI (mis)feature using this device driver.
2766                  */
2767                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2768                     SL_TRACE|SL_ERROR,
2769                     "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2770                     tep->te_state));
2771                 /*
2772                  * preallocate memory for T_ERROR_ACK
2773                  */
2774                 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2775                 if (! ackmp) {
2776                         tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2777                         return;
2778                 }
2779 
2780                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2781                 freemsg(mp);
2782                 return;
2783         }
2784 
2785         /*
2786          * call common option management routine from drv/ip
2787          */
2788         if (prim->type == T_SVR4_OPTMGMT_REQ) {
2789                 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2790         } else {
2791                 ASSERT(prim->type == T_OPTMGMT_REQ);
2792                 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2793         }
2794 }
2795 
2796 /*
2797  * Handle T_conn_req - the driver part of accept().
2798  * If TL_SET[U]CRED generate the credentials options.
2799  * If this is a socket pass through options unmodified.
2800  * For sockets generate the T_CONN_CON here instead of
2801  * waiting for the T_CONN_RES.
2802  */
2803 static void
2804 tl_conn_req(queue_t *wq, mblk_t *mp)
2805 {
2806         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
2807         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_rptr;
2808         ssize_t                 msz = MBLKL(mp);
2809         t_scalar_t              alen, aoff, olen, ooff, err = 0;
2810         tl_endpt_t              *peer_tep = NULL;
2811         mblk_t                  *ackmp;
2812         mblk_t                  *dimp;
2813         struct T_discon_ind     *di;
2814         soux_addr_t             ux_addr;
2815         tl_addr_t               dst;
2816 
2817         ASSERT(IS_COTS(tep));
2818 
2819         if (tep->te_closing) {
2820                 freemsg(mp);
2821                 return;
2822         }
2823 
2824         /*
2825          * preallocate memory for:
2826          * 1. max of T_ERROR_ACK and T_OK_ACK
2827          *      ==> known max T_ERROR_ACK
2828          * 2. max of T_DISCON_IND and T_CONN_IND
2829          */
2830         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2831         if (! ackmp) {
2832                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2833                 return;
2834         }
2835         /*
2836          * memory committed for T_OK_ACK/T_ERROR_ACK now
2837          * will be committed for T_DISCON_IND/T_CONN_IND later
2838          */
2839 
2840         if (tep->te_state != TS_IDLE) {
2841                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2842                     SL_TRACE|SL_ERROR,
2843                     "tl_wput:T_CONN_REQ:out of state, state=%d",
2844                     tep->te_state));
2845                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2846                 freemsg(mp);
2847                 return;
2848         }
2849 
2850         /*
2851          * validate the message
2852          * Note: dereference fields in struct inside message only
2853          * after validating the message length.
2854          */
2855         if (msz < sizeof (struct T_conn_req)) {
2856                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2857                     "tl_conn_req:invalid message length"));
2858                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2859                 freemsg(mp);
2860                 return;
2861         }
2862         alen = creq->DEST_length;
2863         aoff = creq->DEST_offset;
2864         olen = creq->OPT_length;
2865         ooff = creq->OPT_offset;
2866         if (olen == 0)
2867                 ooff = 0;
2868 
2869         if (IS_SOCKET(tep)) {
2870                 if ((alen != TL_SOUX_ADDRLEN) ||
2871                     (aoff < 0) ||
2872                     (aoff + alen > msz) ||
2873                     (alen > msz - sizeof (struct T_conn_req))) {
2874                         (void) (STRLOG(TL_ID, tep->te_minor,
2875                                     1, SL_TRACE|SL_ERROR,
2876                                     "tl_conn_req: invalid socket addr"));
2877                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878                         freemsg(mp);
2879                         return;
2880                 }
2881                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2882                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2883                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2884                         (void) (STRLOG(TL_ID, tep->te_minor,
2885                             1, SL_TRACE|SL_ERROR,
2886                             "tl_conn_req: invalid socket magic"));
2887                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2888                         freemsg(mp);
2889                         return;
2890                 }
2891         } else {
2892                 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2893                     (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2894                     ooff + olen < 0)) ||
2895                     olen < 0 || ooff < 0) {
2896                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2897                             SL_TRACE|SL_ERROR,
2898                             "tl_conn_req:invalid message"));
2899                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2900                         freemsg(mp);
2901                         return;
2902                 }
2903 
2904                 if (alen <= 0 || aoff < 0 ||
2905                     (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2906                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2907                                     SL_TRACE|SL_ERROR,
2908                                     "tl_conn_req:bad addr in message, "
2909                                     "alen=%d, msz=%ld",
2910                                     alen, msz));
2911                         tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2912                         freemsg(mp);
2913                         return;
2914                 }
2915 #ifdef DEBUG
2916                 /*
2917                  * Mild form of ASSERT()ion to detect broken TPI apps.
2918                  * if (! assertion)
2919                  *      log warning;
2920                  */
2921                 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2922                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
2923                             SL_TRACE|SL_ERROR,
2924                             "tl_conn_req: addr overlaps TPI message"));
2925                 }
2926 #endif
2927                 if (olen) {
2928                         /*
2929                          * no opts in connect req
2930                          * supported in this provider except for sockets.
2931                          */
2932                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2933                             SL_TRACE|SL_ERROR,
2934                             "tl_conn_req:options not supported "
2935                             "in message"));
2936                         tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2937                         freemsg(mp);
2938                         return;
2939                 }
2940         }
2941 
2942         /*
2943          * Prevent tep from closing on us.
2944          */
2945         if (! tl_noclose(tep)) {
2946                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2947                     "tl_conn_req:endpoint is closing"));
2948                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2949                 freemsg(mp);
2950                 return;
2951         }
2952 
2953         tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2954         /*
2955          * get endpoint to connect to
2956          * check that peer with DEST addr is bound to addr
2957          * and has CONIND_number > 0
2958          */
2959         dst.ta_alen = alen;
2960         dst.ta_abuf = mp->b_rptr + aoff;
2961         dst.ta_zoneid = tep->te_zoneid;
2962 
2963         /*
2964          * Verify if remote addr is in use
2965          */
2966         peer_tep = (IS_SOCKET(tep) ?
2967             tl_sock_find_peer(tep, &ux_addr) :
2968             tl_find_peer(tep, &dst));
2969 
2970         if (peer_tep == NULL) {
2971                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2972                     "tl_conn_req:no one at connect address"));
2973                 err = ECONNREFUSED;
2974         } else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2975                 /*
2976                  * validate that number of incoming connection is
2977                  * not to capacity on destination endpoint
2978                  */
2979                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2980                     "tl_conn_req: qlen overflow connection refused"));
2981                         err = ECONNREFUSED;
2982         }
2983 
2984         /*
2985          * Send T_DISCON_IND in case of error
2986          */
2987         if (err != 0) {
2988                 if (peer_tep != NULL)
2989                         tl_refrele(peer_tep);
2990                 /* We are still expected to send T_OK_ACK */
2991                 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2992                 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2993                 tl_closeok(tep);
2994                 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2995                     M_PROTO, T_DISCON_IND);
2996                 if (dimp == NULL) {
2997                         tl_merror(wq, NULL, ENOSR);
2998                         return;
2999                 }
3000                 di = (struct T_discon_ind *)dimp->b_rptr;
3001                 di->DISCON_reason = err;
3002                 di->SEQ_number = BADSEQNUM;
3003 
3004                 tep->te_state = TS_IDLE;
3005                 /*
3006                  * send T_DISCON_IND message
3007                  */
3008                 putnext(tep->te_rq, dimp);
3009                 return;
3010         }
3011 
3012         ASSERT(IS_COTS(peer_tep));
3013 
3014         /*
3015          * Found the listener. At this point processing will continue on
3016          * listener serializer. Close of the endpoint should be blocked while we
3017          * switch serializers.
3018          */
3019         tl_serializer_refhold(peer_tep->te_ser);
3020         tl_serializer_refrele(tep->te_ser);
3021         tep->te_ser = peer_tep->te_ser;
3022         ASSERT(tep->te_oconp == NULL);
3023         tep->te_oconp = peer_tep;
3024 
3025         /*
3026          * It is safe to close now. Close may continue on listener serializer.
3027          */
3028         tl_closeok(tep);
3029 
3030         /*
3031          * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3032          * data, so we link mp to ackmp.
3033          */
3034         ackmp->b_cont = mp;
3035         mp = ackmp;
3036 
3037         tl_refhold(tep);
3038         tl_serializer_enter(tep, tl_conn_req_ser, mp);
3039 }
3040 
3041 /*
3042  * Finish T_CONN_REQ processing on listener serializer.
3043  */
3044 static void
3045 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3046 {
3047         queue_t         *wq;
3048         tl_endpt_t      *peer_tep = tep->te_oconp;
3049         mblk_t          *confmp, *cimp, *indmp;
3050         void            *opts = NULL;
3051         mblk_t          *ackmp = mp;
3052         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3053         struct T_conn_ind       *ci;
3054         tl_icon_t       *tip;
3055         void            *addr_startp;
3056         t_scalar_t      olen = creq->OPT_length;
3057         t_scalar_t      ooff = creq->OPT_offset;
3058         size_t          ci_msz;
3059         size_t          size;
3060         cred_t          *cr = NULL;
3061         pid_t           cpid;
3062 
3063         if (tep->te_closing) {
3064                 TL_UNCONNECT(tep->te_oconp);
3065                 tl_serializer_exit(tep);
3066                 tl_refrele(tep);
3067                 freemsg(mp);
3068                 return;
3069         }
3070 
3071         wq = tep->te_wq;
3072         tep->te_flag |= TL_EAGER;
3073 
3074         /*
3075          * Extract preallocated ackmp from mp.
3076          */
3077         mp = mp->b_cont;
3078         ackmp->b_cont = NULL;
3079 
3080         if (olen == 0)
3081                 ooff = 0;
3082 
3083         if (peer_tep->te_closing ||
3084             !((peer_tep->te_state == TS_IDLE) ||
3085             (peer_tep->te_state == TS_WRES_CIND))) {
3086                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3087                     "tl_conn_req:peer in bad state (%d)",
3088                     peer_tep->te_state));
3089                 TL_UNCONNECT(tep->te_oconp);
3090                 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3091                 freemsg(ackmp);
3092                 tl_serializer_exit(tep);
3093                 tl_refrele(tep);
3094                 return;
3095         }
3096 
3097         /*
3098          * preallocate now for T_DISCON_IND or T_CONN_IND
3099          */
3100         /*
3101          * calculate length of T_CONN_IND message
3102          */
3103         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3104                 cr = msg_getcred(mp, &cpid);
3105                 ASSERT(cr != NULL);
3106                 if (peer_tep->te_flag & TL_SETCRED) {
3107                         ooff = 0;
3108                         olen = (t_scalar_t) sizeof (struct opthdr) +
3109                             OPTLEN(sizeof (tl_credopt_t));
3110                         /* 1 option only */
3111                 } else {
3112                         ooff = 0;
3113                         olen = (t_scalar_t)sizeof (struct opthdr) +
3114                             OPTLEN(ucredminsize(cr));
3115                         /* 1 option only */
3116                 }
3117         }
3118         ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3119         ci_msz = T_ALIGN(ci_msz) + olen;
3120         size = max(ci_msz, sizeof (struct T_discon_ind));
3121 
3122         /*
3123          * Save options from mp - we'll need them for T_CONN_IND.
3124          */
3125         if (ooff != 0) {
3126                 opts = kmem_alloc(olen, KM_NOSLEEP);
3127                 if (opts == NULL) {
3128                         /*
3129                          * roll back state changes
3130                          */
3131                         tep->te_state = TS_IDLE;
3132                         tl_memrecover(wq, mp, size);
3133                         freemsg(ackmp);
3134                         TL_UNCONNECT(tep->te_oconp);
3135                         tl_serializer_exit(tep);
3136                         tl_refrele(tep);
3137                         return;
3138                 }
3139                 /* Copy options to a temp buffer */
3140                 bcopy(mp->b_rptr + ooff, opts, olen);
3141         }
3142 
3143         if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3144                 /*
3145                  * Generate a T_CONN_CON that has the identical address
3146                  * (and options) as the T_CONN_REQ.
3147                  * NOTE: assumes that the T_conn_req and T_conn_con structures
3148                  * are isomorphic.
3149                  */
3150                 confmp = copyb(mp);
3151                 if (! confmp) {
3152                         /*
3153                          * roll back state changes
3154                          */
3155                         tep->te_state = TS_IDLE;
3156                         tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3157                         freemsg(ackmp);
3158                         if (opts != NULL)
3159                                 kmem_free(opts, olen);
3160                         TL_UNCONNECT(tep->te_oconp);
3161                         tl_serializer_exit(tep);
3162                         tl_refrele(tep);
3163                         return;
3164                 }
3165                 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3166                     T_CONN_CON;
3167         } else {
3168                 confmp = NULL;
3169         }
3170         if ((indmp = reallocb(mp, size, 0)) == NULL) {
3171                 /*
3172                  * roll back state changes
3173                  */
3174                 tep->te_state = TS_IDLE;
3175                 tl_memrecover(wq, mp, size);
3176                 freemsg(ackmp);
3177                 if (opts != NULL)
3178                         kmem_free(opts, olen);
3179                 freemsg(confmp);
3180                 TL_UNCONNECT(tep->te_oconp);
3181                 tl_serializer_exit(tep);
3182                 tl_refrele(tep);
3183                 return;
3184         }
3185 
3186         tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3187         if (tip == NULL) {
3188                 /*
3189                  * roll back state changes
3190                  */
3191                 tep->te_state = TS_IDLE;
3192                 tl_memrecover(wq, indmp, sizeof (*tip));
3193                 freemsg(ackmp);
3194                 if (opts != NULL)
3195                         kmem_free(opts, olen);
3196                 freemsg(confmp);
3197                 TL_UNCONNECT(tep->te_oconp);
3198                 tl_serializer_exit(tep);
3199                 tl_refrele(tep);
3200                 return;
3201         }
3202         tip->ti_mp = NULL;
3203 
3204         /*
3205          * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3206          * and tl_icon_t cell.
3207          */
3208 
3209         /*
3210          * ack validity of request and send the peer credential in the ACK.
3211          */
3212         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3213 
3214         if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3215             confmp != NULL) {
3216                 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3217         }
3218 
3219         tl_ok_ack(wq, ackmp, T_CONN_REQ);
3220 
3221         /*
3222          * prepare message to send T_CONN_IND
3223          */
3224         /*
3225          * allocate the message - original data blocks retained
3226          * in the returned mblk
3227          */
3228         cimp = tl_resizemp(indmp, size);
3229         if (! cimp) {
3230                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3231                     "tl_conn_req:con_ind:allocb failure"));
3232                 tl_merror(wq, indmp, ENOMEM);
3233                 TL_UNCONNECT(tep->te_oconp);
3234                 tl_serializer_exit(tep);
3235                 tl_refrele(tep);
3236                 if (opts != NULL)
3237                         kmem_free(opts, olen);
3238                 freemsg(confmp);
3239                 ASSERT(tip->ti_mp == NULL);
3240                 kmem_free(tip, sizeof (*tip));
3241                 return;
3242         }
3243 
3244         DB_TYPE(cimp) = M_PROTO;
3245         ci = (struct T_conn_ind *)cimp->b_rptr;
3246         ci->PRIM_type  = T_CONN_IND;
3247         ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3248         ci->SRC_length = tep->te_alen;
3249         ci->SEQ_number = tep->te_seqno;
3250 
3251         addr_startp = cimp->b_rptr + ci->SRC_offset;
3252         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3253         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3254 
3255                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3256                     ci->SRC_length);
3257                 ci->OPT_length = olen; /* because only 1 option */
3258                 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3259                     cr, cpid,
3260                     peer_tep->te_flag, peer_tep->te_credp);
3261         } else if (ooff != 0) {
3262                 /* Copy option from T_CONN_REQ */
3263                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3264                     ci->SRC_length);
3265                 ci->OPT_length = olen;
3266                 ASSERT(opts != NULL);
3267                 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3268         } else {
3269                 ci->OPT_offset = 0;
3270                 ci->OPT_length = 0;
3271         }
3272         if (opts != NULL)
3273                 kmem_free(opts, olen);
3274 
3275         /*
3276          * register connection request with server peer
3277          * append to list of incoming connections
3278          * increment references for both peer_tep and tep: peer_tep is placed on
3279          * te_oconp and tep is placed on listeners queue.
3280          */
3281         tip->ti_tep = tep;
3282         tip->ti_seqno = tep->te_seqno;
3283         list_insert_tail(&peer_tep->te_iconp, tip);
3284         peer_tep->te_nicon++;
3285 
3286         peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3287         /*
3288          * send the T_CONN_IND message
3289          */
3290         putnext(peer_tep->te_rq, cimp);
3291 
3292         /*
3293          * Send a T_CONN_CON message for sockets.
3294          * Disable the queues until we have reached the correct state!
3295          */
3296         if (confmp != NULL) {
3297                 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3298                 noenable(wq);
3299                 putnext(tep->te_rq, confmp);
3300         }
3301         /*
3302          * Now we need to increment tep reference because tep is referenced by
3303          * server list of pending connections. We also need to decrement
3304          * reference before exiting serializer. Two operations void each other
3305          * so we don't modify reference at all.
3306          */
3307         ASSERT(tep->te_refcnt >= 2);
3308         ASSERT(peer_tep->te_refcnt >= 2);
3309         tl_serializer_exit(tep);
3310 }
3311 
3312 
3313 
3314 /*
3315  * Handle T_conn_res on listener stream. Called on listener serializer.
3316  * tl_conn_req has already generated the T_CONN_CON.
3317  * tl_conn_res is called on listener serializer.
3318  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3319  * Switch eager serializer to acceptor's.
3320  *
3321  * If TL_SET[U]CRED generate the credentials options.
3322  * For sockets tl_conn_req has already generated the T_CONN_CON.
3323  */
3324 static void
3325 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3326 {
3327         queue_t                 *wq;
3328         struct T_conn_res       *cres = (struct T_conn_res *)mp->b_rptr;
3329         ssize_t                 msz = MBLKL(mp);
3330         t_scalar_t              olen, ooff, err = 0;
3331         t_scalar_t              prim = cres->PRIM_type;
3332         uchar_t                 *addr_startp;
3333         tl_endpt_t              *acc_ep = NULL, *cl_ep = NULL;
3334         tl_icon_t               *tip;
3335         size_t                  size;
3336         mblk_t                  *ackmp, *respmp;
3337         mblk_t                  *dimp, *ccmp = NULL;
3338         struct T_discon_ind     *di;
3339         struct T_conn_con       *cc;
3340         boolean_t               client_noclose_set = B_FALSE;
3341         boolean_t               switch_client_serializer = B_TRUE;
3342 
3343         ASSERT(IS_COTS(tep));
3344 
3345         if (tep->te_closing) {
3346                 freemsg(mp);
3347                 return;
3348         }
3349 
3350         wq = tep->te_wq;
3351 
3352         /*
3353          * preallocate memory for:
3354          * 1. max of T_ERROR_ACK and T_OK_ACK
3355          *      ==> known max T_ERROR_ACK
3356          * 2. max of T_DISCON_IND and T_CONN_CON
3357          */
3358         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3359         if (! ackmp) {
3360                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3361                 return;
3362         }
3363         /*
3364          * memory committed for T_OK_ACK/T_ERROR_ACK now
3365          * will be committed for T_DISCON_IND/T_CONN_CON later
3366          */
3367 
3368 
3369         ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3370 
3371         /*
3372          * validate state
3373          */
3374         if (tep->te_state != TS_WRES_CIND) {
3375                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3376                     SL_TRACE|SL_ERROR,
3377                     "tl_wput:T_CONN_RES:out of state, state=%d",
3378                     tep->te_state));
3379                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3380                 freemsg(mp);
3381                 return;
3382         }
3383 
3384         /*
3385          * validate the message
3386          * Note: dereference fields in struct inside message only
3387          * after validating the message length.
3388          */
3389         if (msz < sizeof (struct T_conn_res)) {
3390                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3391                     "tl_conn_res:invalid message length"));
3392                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3393                 freemsg(mp);
3394                 return;
3395         }
3396         olen = cres->OPT_length;
3397         ooff = cres->OPT_offset;
3398         if (((olen > 0) && ((ooff + olen) > msz))) {
3399                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3400                     "tl_conn_res:invalid message"));
3401                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3402                 freemsg(mp);
3403                 return;
3404         }
3405         if (olen) {
3406                 /*
3407                  * no opts in connect res
3408                  * supported in this provider
3409                  */
3410                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3411                     "tl_conn_res:options not supported in message"));
3412                 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3413                 freemsg(mp);
3414                 return;
3415         }
3416 
3417         tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3418         ASSERT(tep->te_state == TS_WACK_CRES);
3419 
3420         if (cres->SEQ_number < TL_MINOR_START &&
3421             cres->SEQ_number >= BADSEQNUM) {
3422                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3423                     "tl_conn_res:remote endpoint sequence number bad"));
3424                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3425                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3426                 freemsg(mp);
3427                 return;
3428         }
3429 
3430         /*
3431          * find accepting endpoint. Will have extra reference if found.
3432          */
3433         if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3434             (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3435             (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3436                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3437                     "tl_conn_res:bad accepting endpoint"));
3438                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3439                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3440                 freemsg(mp);
3441                 return;
3442         }
3443 
3444         /*
3445          * Prevent acceptor from closing.
3446          */
3447         if (! tl_noclose(acc_ep)) {
3448                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3449                     "tl_conn_res:bad accepting endpoint"));
3450                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3451                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3452                 tl_refrele(acc_ep);
3453                 freemsg(mp);
3454                 return;
3455         }
3456 
3457         acc_ep->te_flag |= TL_ACCEPTOR;
3458 
3459         /*
3460          * validate that accepting endpoint, if different from listening
3461          * has address bound => state is TS_IDLE
3462          * TROUBLE in XPG4 !!?
3463          */
3464         if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3465                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3466                     "tl_conn_res:accepting endpoint has no address bound,"
3467                     "state=%d", acc_ep->te_state));
3468                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3469                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3470                 freemsg(mp);
3471                 tl_closeok(acc_ep);
3472                 tl_refrele(acc_ep);
3473                 return;
3474         }
3475 
3476         /*
3477          * validate if accepting endpt same as listening, then
3478          * no other incoming connection should be on the queue
3479          */
3480 
3481         if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3482                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3483                     "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3484                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3485                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3486                 freemsg(mp);
3487                 tl_closeok(acc_ep);
3488                 tl_refrele(acc_ep);
3489                 return;
3490         }
3491 
3492         /*
3493          * Mark for deletion, the entry corresponding to client
3494          * on list of pending connections made by the listener
3495          *  search list to see if client is one of the
3496          * recorded as a listener.
3497          */
3498         tip = tl_icon_find(tep, cres->SEQ_number);
3499         if (tip == NULL) {
3500                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3501                     "tl_conn_res:no client in listener list"));
3502                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3503                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3504                 freemsg(mp);
3505                 tl_closeok(acc_ep);
3506                 tl_refrele(acc_ep);
3507                 return;
3508         }
3509 
3510         /*
3511          * If ti_tep is NULL the client has already closed. In this case
3512          * the code below will avoid any action on the client side
3513          * but complete the server and acceptor state transitions.
3514          */
3515         ASSERT(tip->ti_tep == NULL ||
3516             tip->ti_tep->te_seqno == cres->SEQ_number);
3517         cl_ep = tip->ti_tep;
3518 
3519         /*
3520          * If the client is present it is switched from listener's to acceptor's
3521          * serializer. We should block client closes while serializers are
3522          * being switched.
3523          *
3524          * It is possible that the client is present but is currently being
3525          * closed. There are two possible cases:
3526          *
3527          * 1) The client has already entered tl_close_finish_ser() and sent
3528          *    T_ORDREL_IND. In this case we can just ignore the client (but we
3529          *    still need to send all messages from tip->ti_mp to the acceptor).
3530          *
3531          * 2) The client started the close but has not entered
3532          *    tl_close_finish_ser() yet. In this case, the client is already
3533          *    proceeding asynchronously on the listener's serializer, so we're
3534          *    forced to change the acceptor to use the listener's serializer to
3535          *    ensure that any operations on the acceptor are serialized with
3536          *    respect to the close that's in-progress.
3537          */
3538         if (cl_ep != NULL) {
3539                 if (tl_noclose(cl_ep)) {
3540                         client_noclose_set = B_TRUE;
3541                 } else {
3542                         /*
3543                          * Client is closing. If it it has sent the
3544                          * T_ORDREL_IND, we can simply ignore it - otherwise,
3545                          * we have to let let the client continue until it is
3546                          * sent.
3547                          *
3548                          * If we do continue using the client, acceptor will
3549                          * switch to client's serializer which is used by client
3550                          * for its close.
3551                          */
3552                         tl_client_closing_when_accepting++;
3553                         switch_client_serializer = B_FALSE;
3554                         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3555                             cl_ep->te_state == -1)
3556                                 cl_ep = NULL;
3557                 }
3558         }
3559 
3560         if (cl_ep != NULL) {
3561                 /*
3562                  * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3563                  * (latter for sockets only)
3564                  */
3565                 if (cl_ep->te_state != TS_WCON_CREQ &&
3566                     (cl_ep->te_state != TS_DATA_XFER &&
3567                     IS_SOCKET(cl_ep))) {
3568                         err = ECONNREFUSED;
3569                         /*
3570                          * T_DISCON_IND sent later after committing memory
3571                          * and acking validity of request
3572                          */
3573                         (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3574                             "tl_conn_res:peer in bad state"));
3575                 }
3576 
3577                 /*
3578                  * preallocate now for T_DISCON_IND or T_CONN_CONN
3579                  * ack validity of request (T_OK_ACK) after memory committed
3580                  */
3581 
3582                 if (err)
3583                         size = sizeof (struct T_discon_ind);
3584                 else {
3585                         /*
3586                          * calculate length of T_CONN_CON message
3587                          */
3588                         olen = 0;
3589                         if (cl_ep->te_flag & TL_SETCRED) {
3590                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3591                                     OPTLEN(sizeof (tl_credopt_t));
3592                         } else if (cl_ep->te_flag & TL_SETUCRED) {
3593                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3594                                     OPTLEN(ucredminsize(acc_ep->te_credp));
3595                         }
3596                         size = T_ALIGN(sizeof (struct T_conn_con) +
3597                             acc_ep->te_alen) + olen;
3598                 }
3599                 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3600                         /*
3601                          * roll back state changes
3602                          */
3603                         tep->te_state = TS_WRES_CIND;
3604                         tl_memrecover(wq, mp, size);
3605                         freemsg(ackmp);
3606                         if (client_noclose_set)
3607                                 tl_closeok(cl_ep);
3608                         tl_closeok(acc_ep);
3609                         tl_refrele(acc_ep);
3610                         return;
3611                 }
3612                 mp = NULL;
3613         }
3614 
3615         /*
3616          * Now ack validity of request
3617          */
3618         if (tep->te_nicon == 1) {
3619                 if (tep == acc_ep)
3620                         tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3621                 else
3622                         tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3623         } else
3624                 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3625 
3626         /*
3627          * send T_DISCON_IND now if client state validation failed earlier
3628          */
3629         if (err) {
3630                 tl_ok_ack(wq, ackmp, prim);
3631                 /*
3632                  * flush the queues - why always ?
3633                  */
3634                 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3635 
3636                 dimp = tl_resizemp(respmp, size);
3637                 if (! dimp) {
3638                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3639                             SL_TRACE|SL_ERROR,
3640                             "tl_conn_res:con_ind:allocb failure"));
3641                         tl_merror(wq, respmp, ENOMEM);
3642                         tl_closeok(acc_ep);
3643                         if (client_noclose_set)
3644                                 tl_closeok(cl_ep);
3645                         tl_refrele(acc_ep);
3646                         return;
3647                 }
3648                 if (dimp->b_cont) {
3649                         /* no user data in provider generated discon ind */
3650                         freemsg(dimp->b_cont);
3651                         dimp->b_cont = NULL;
3652                 }
3653 
3654                 DB_TYPE(dimp) = M_PROTO;
3655                 di = (struct T_discon_ind *)dimp->b_rptr;
3656                 di->PRIM_type  = T_DISCON_IND;
3657                 di->DISCON_reason = err;
3658                 di->SEQ_number = BADSEQNUM;
3659 
3660                 tep->te_state = TS_IDLE;
3661                 /*
3662                  * send T_DISCON_IND message
3663                  */
3664                 putnext(acc_ep->te_rq, dimp);
3665                 if (client_noclose_set)
3666                         tl_closeok(cl_ep);
3667                 tl_closeok(acc_ep);
3668                 tl_refrele(acc_ep);
3669                 return;
3670         }
3671 
3672         /*
3673          * now start connecting the accepting endpoint
3674          */
3675         if (tep != acc_ep)
3676                 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3677 
3678         if (cl_ep == NULL) {
3679                 /*
3680                  * The client has already closed. Send up any queued messages
3681                  * and change the state accordingly.
3682                  */
3683                 tl_ok_ack(wq, ackmp, prim);
3684                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3685 
3686                 /*
3687                  * remove endpoint from incoming connection
3688                  * delete client from list of incoming connections
3689                  */
3690                 tl_freetip(tep, tip);
3691                 freemsg(mp);
3692                 tl_closeok(acc_ep);
3693                 tl_refrele(acc_ep);
3694                 return;
3695         } else if (tip->ti_mp != NULL) {
3696                 /*
3697                  * The client could have queued a T_DISCON_IND which needs
3698                  * to be sent up.
3699                  * Note that t_discon_req can not operate the same as
3700                  * t_data_req since it is not possible for it to putbq
3701                  * the message and return -1 due to the use of qwriter.
3702                  */
3703                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3704         }
3705 
3706         /*
3707          * prepare connect confirm T_CONN_CON message
3708          */
3709 
3710         /*
3711          * allocate the message - original data blocks
3712          * retained in the returned mblk
3713          */
3714         if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3715                 ccmp = tl_resizemp(respmp, size);
3716                 if (ccmp == NULL) {
3717                         tl_ok_ack(wq, ackmp, prim);
3718                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3719                             SL_TRACE|SL_ERROR,
3720                             "tl_conn_res:conn_con:allocb failure"));
3721                         tl_merror(wq, respmp, ENOMEM);
3722                         tl_closeok(acc_ep);
3723                         if (client_noclose_set)
3724                                 tl_closeok(cl_ep);
3725                         tl_refrele(acc_ep);
3726                         return;
3727                 }
3728 
3729                 DB_TYPE(ccmp) = M_PROTO;
3730                 cc = (struct T_conn_con *)ccmp->b_rptr;
3731                 cc->PRIM_type  = T_CONN_CON;
3732                 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3733                 cc->RES_length = acc_ep->te_alen;
3734                 addr_startp = ccmp->b_rptr + cc->RES_offset;
3735                 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3736                 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3737                         cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3738                             cc->RES_length);
3739                         cc->OPT_length = olen;
3740                         tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3741                             acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3742                             cl_ep->te_credp);
3743                 } else {
3744                         cc->OPT_offset = 0;
3745                         cc->OPT_length = 0;
3746                 }
3747                 /*
3748                  * Forward the credential in the packet so it can be picked up
3749                  * at the higher layers for more complete credential processing
3750                  */
3751                 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3752         } else {
3753                 freemsg(respmp);
3754                 respmp = NULL;
3755         }
3756 
3757         /*
3758          * make connection linking
3759          * accepting and client endpoints
3760          * No need to increment references:
3761          *      on client: it should already have one from tip->ti_tep linkage.
3762          *      on acceptor is should already have one from the table lookup.
3763          *
3764          * At this point both client and acceptor can't close. Set client
3765          * serializer to acceptor's.
3766          */
3767         ASSERT(cl_ep->te_refcnt >= 2);
3768         ASSERT(acc_ep->te_refcnt >= 2);
3769         ASSERT(cl_ep->te_conp == NULL);
3770         ASSERT(acc_ep->te_conp == NULL);
3771         cl_ep->te_conp = acc_ep;
3772         acc_ep->te_conp = cl_ep;
3773         ASSERT(cl_ep->te_ser == tep->te_ser);
3774         if (switch_client_serializer) {
3775                 mutex_enter(&cl_ep->te_ser_lock);
3776                 if (cl_ep->te_ser_count > 0) {
3777                         switch_client_serializer = B_FALSE;
3778                         tl_serializer_noswitch++;
3779                 } else {
3780                         /*
3781                          * Move client to the acceptor's serializer.
3782                          */
3783                         tl_serializer_refhold(acc_ep->te_ser);
3784                         tl_serializer_refrele(cl_ep->te_ser);
3785                         cl_ep->te_ser = acc_ep->te_ser;
3786                 }
3787                 mutex_exit(&cl_ep->te_ser_lock);
3788         }
3789         if (!switch_client_serializer) {
3790                 /*
3791                  * It is not possible to switch client to use acceptor's.
3792                  * Move acceptor to client's serializer (which is the same as
3793                  * listener's).
3794                  */
3795                 tl_serializer_refhold(cl_ep->te_ser);
3796                 tl_serializer_refrele(acc_ep->te_ser);
3797                 acc_ep->te_ser = cl_ep->te_ser;
3798         }
3799 
3800         TL_REMOVE_PEER(cl_ep->te_oconp);
3801         TL_REMOVE_PEER(acc_ep->te_oconp);
3802 
3803         /*
3804          * remove endpoint from incoming connection
3805          * delete client from list of incoming connections
3806          */
3807         tip->ti_tep = NULL;
3808         tl_freetip(tep, tip);
3809         tl_ok_ack(wq, ackmp, prim);
3810 
3811         /*
3812          * data blocks already linked in reallocb()
3813          */
3814 
3815         /*
3816          * link queues so that I_SENDFD will work
3817          */
3818         if (! IS_SOCKET(tep)) {
3819                 acc_ep->te_wq->q_next = cl_ep->te_rq;
3820                 cl_ep->te_wq->q_next = acc_ep->te_rq;
3821         }
3822 
3823         /*
3824          * send T_CONN_CON up on client side unless it was already
3825          * done (for a socket). In cases any data or ordrel req has been
3826          * queued make sure that the service procedure runs.
3827          */
3828         if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3829                 enableok(cl_ep->te_wq);
3830                 TL_QENABLE(cl_ep);
3831                 if (ccmp != NULL)
3832                         freemsg(ccmp);
3833         } else {
3834                 /*
3835                  * change client state on TE_CONN_CON event
3836                  */
3837                 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3838                 putnext(cl_ep->te_rq, ccmp);
3839         }
3840 
3841         /* Mark the both endpoints as accepted */
3842         cl_ep->te_flag |= TL_ACCEPTED;
3843         acc_ep->te_flag |= TL_ACCEPTED;
3844 
3845         /*
3846          * Allow client and acceptor to close.
3847          */
3848         tl_closeok(acc_ep);
3849         if (client_noclose_set)
3850                 tl_closeok(cl_ep);
3851 }
3852 
3853 
3854 
3855 
3856 static void
3857 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3858 {
3859         queue_t                 *wq;
3860         struct T_discon_req     *dr;
3861         ssize_t                 msz;
3862         tl_endpt_t              *peer_tep = tep->te_conp;
3863         tl_endpt_t              *srv_tep = tep->te_oconp;
3864         tl_icon_t               *tip;
3865         size_t                  size;
3866         mblk_t                  *ackmp, *dimp, *respmp;
3867         struct T_discon_ind     *di;
3868         t_scalar_t              save_state, new_state;
3869 
3870         if (tep->te_closing) {
3871                 freemsg(mp);
3872                 return;
3873         }
3874 
3875         if ((peer_tep != NULL) && peer_tep->te_closing) {
3876                 TL_UNCONNECT(tep->te_conp);
3877                 peer_tep = NULL;
3878         }
3879         if ((srv_tep != NULL) && srv_tep->te_closing) {
3880                 TL_UNCONNECT(tep->te_oconp);
3881                 srv_tep = NULL;
3882         }
3883 
3884         wq = tep->te_wq;
3885 
3886         /*
3887          * preallocate memory for:
3888          * 1. max of T_ERROR_ACK and T_OK_ACK
3889          *      ==> known max T_ERROR_ACK
3890          * 2. for  T_DISCON_IND
3891          */
3892         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3893         if (! ackmp) {
3894                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3895                 return;
3896         }
3897         /*
3898          * memory committed for T_OK_ACK/T_ERROR_ACK now
3899          * will be committed for T_DISCON_IND  later
3900          */
3901 
3902         dr = (struct T_discon_req *)mp->b_rptr;
3903         msz = MBLKL(mp);
3904 
3905         /*
3906          * validate the state
3907          */
3908         save_state = new_state = tep->te_state;
3909         if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3910             ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3911                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3912                     SL_TRACE|SL_ERROR,
3913                     "tl_wput:T_DISCON_REQ:out of state, state=%d",
3914                     tep->te_state));
3915                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3916                 freemsg(mp);
3917                 return;
3918         }
3919         /*
3920          * Defer committing the state change until it is determined if
3921          * the message will be queued with the tl_icon or not.
3922          */
3923         new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3924 
3925         /* validate the message */
3926         if (msz < sizeof (struct T_discon_req)) {
3927                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3928                     "tl_discon_req:invalid message"));
3929                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3930                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3931                 freemsg(mp);
3932                 return;
3933         }
3934 
3935         /*
3936          * if server, then validate that client exists
3937          * by connection sequence number etc.
3938          */
3939         if (tep->te_nicon > 0) { /* server */
3940 
3941                 /*
3942                  * search server list for disconnect client
3943                  */
3944                 tip = tl_icon_find(tep, dr->SEQ_number);
3945                 if (tip == NULL) {
3946                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
3947                             SL_TRACE|SL_ERROR,
3948                             "tl_discon_req:no disconnect endpoint"));
3949                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3950                         tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3951                         freemsg(mp);
3952                         return;
3953                 }
3954                 /*
3955                  * If ti_tep is NULL the client has already closed. In this case
3956                  * the code below will avoid any action on the client side.
3957                  */
3958 
3959                 IMPLY(tip->ti_tep != NULL,
3960                     tip->ti_tep->te_seqno == dr->SEQ_number);
3961                 peer_tep = tip->ti_tep;
3962         }
3963 
3964         /*
3965          * preallocate now for T_DISCON_IND
3966          * ack validity of request (T_OK_ACK) after memory committed
3967          */
3968         size = sizeof (struct T_discon_ind);
3969         if ((respmp = reallocb(mp, size, 0)) == NULL) {
3970                 tl_memrecover(wq, mp, size);
3971                 freemsg(ackmp);
3972                 return;
3973         }
3974 
3975         /*
3976          * prepare message to ack validity of request
3977          */
3978         if (tep->te_nicon == 0)
3979                 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3980         else
3981                 if (tep->te_nicon == 1)
3982                         new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3983                 else
3984                         new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3985 
3986         /*
3987          * Flushing queues according to TPI. Using the old state.
3988          */
3989         if ((tep->te_nicon <= 1) &&
3990             ((save_state == TS_DATA_XFER) ||
3991             (save_state == TS_WIND_ORDREL) ||
3992             (save_state == TS_WREQ_ORDREL)))
3993                 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3994 
3995         /* send T_OK_ACK up  */
3996         tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3997 
3998         /*
3999          * now do disconnect business
4000          */
4001         if (tep->te_nicon > 0) { /* listener */
4002                 if (peer_tep != NULL && !peer_tep->te_closing) {
4003                         /*
4004                          * disconnect incoming connect request pending to tep
4005                          */
4006                         if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4007                                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4008                                     SL_TRACE|SL_ERROR,
4009                                     "tl_discon_req: reallocb failed"));
4010                                 tep->te_state = new_state;
4011                                 tl_merror(wq, respmp, ENOMEM);
4012                                 return;
4013                         }
4014                         di = (struct T_discon_ind *)dimp->b_rptr;
4015                         di->SEQ_number = BADSEQNUM;
4016                         save_state = peer_tep->te_state;
4017                         peer_tep->te_state = TS_IDLE;
4018 
4019                         TL_REMOVE_PEER(peer_tep->te_oconp);
4020                         enableok(peer_tep->te_wq);
4021                         TL_QENABLE(peer_tep);
4022                 } else {
4023                         freemsg(respmp);
4024                         dimp = NULL;
4025                 }
4026 
4027                 /*
4028                  * remove endpoint from incoming connection list
4029                  * - remove disconnect client from list on server
4030                  */
4031                 tl_freetip(tep, tip);
4032         } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4033                 /*
4034                  * disconnect an outgoing request pending from tep
4035                  */
4036 
4037                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4038                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4039                             SL_TRACE|SL_ERROR,
4040                             "tl_discon_req: reallocb failed"));
4041                         tep->te_state = new_state;
4042                         tl_merror(wq, respmp, ENOMEM);
4043                         return;
4044                 }
4045                 di = (struct T_discon_ind *)dimp->b_rptr;
4046                 DB_TYPE(dimp) = M_PROTO;
4047                 di->PRIM_type  = T_DISCON_IND;
4048                 di->DISCON_reason = ECONNRESET;
4049                 di->SEQ_number = tep->te_seqno;
4050 
4051                 /*
4052                  * If this is a socket the T_DISCON_IND is queued with
4053                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4054                  * from the list of pending connections.
4055                  * Note that when te_oconp is set the peer better have
4056                  * a t_connind_t for the client.
4057                  */
4058                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4059                         /*
4060                          * No need to check that
4061                          * ti_tep == NULL since the T_DISCON_IND
4062                          * takes precedence over other queued
4063                          * messages.
4064                          */
4065                         tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4066                         peer_tep = NULL;
4067                         dimp = NULL;
4068                         /*
4069                          * Can't clear te_oconp since tl_co_unconnect needs
4070                          * it as a hint not to free the tep.
4071                          * Keep the state unchanged since tl_conn_res inspects
4072                          * it.
4073                          */
4074                         new_state = tep->te_state;
4075                 } else {
4076                         /* Found - delete it */
4077                         tip = tl_icon_find(peer_tep, tep->te_seqno);
4078                         if (tip != NULL) {
4079                                 ASSERT(tep == tip->ti_tep);
4080                                 save_state = peer_tep->te_state;
4081                                 if (peer_tep->te_nicon == 1)
4082                                         peer_tep->te_state =
4083                                             NEXTSTATE(TE_DISCON_IND2,
4084                                             peer_tep->te_state);
4085                                 else
4086                                         peer_tep->te_state =
4087                                             NEXTSTATE(TE_DISCON_IND3,
4088                                             peer_tep->te_state);
4089                                 tl_freetip(peer_tep, tip);
4090                         }
4091                         ASSERT(tep->te_oconp != NULL);
4092                         TL_UNCONNECT(tep->te_oconp);
4093                 }
4094         } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4095                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4096                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4097                             SL_TRACE|SL_ERROR,
4098                             "tl_discon_req: reallocb failed"));
4099                         tep->te_state = new_state;
4100                         tl_merror(wq, respmp, ENOMEM);
4101                         return;
4102                 }
4103                 di = (struct T_discon_ind *)dimp->b_rptr;
4104                 di->SEQ_number = BADSEQNUM;
4105 
4106                 save_state = peer_tep->te_state;
4107                 peer_tep->te_state = TS_IDLE;
4108         } else {
4109                 /* Not connected */
4110                 tep->te_state = new_state;
4111                 freemsg(respmp);
4112                 return;
4113         }
4114 
4115         /* Commit state changes */
4116         tep->te_state = new_state;
4117 
4118         if (peer_tep == NULL) {
4119                 ASSERT(dimp == NULL);
4120                 goto done;
4121         }
4122         /*
4123          * Flush queues on peer before sending up
4124          * T_DISCON_IND according to TPI
4125          */
4126 
4127         if ((save_state == TS_DATA_XFER) ||
4128             (save_state == TS_WIND_ORDREL) ||
4129             (save_state == TS_WREQ_ORDREL))
4130                 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4131 
4132         DB_TYPE(dimp) = M_PROTO;
4133         di->PRIM_type  = T_DISCON_IND;
4134         di->DISCON_reason = ECONNRESET;
4135 
4136         /*
4137          * data blocks already linked into dimp by reallocb()
4138          */
4139         /*
4140          * send indication message to peer user module
4141          */
4142         ASSERT(dimp != NULL);
4143         putnext(peer_tep->te_rq, dimp);
4144 done:
4145         if (tep->te_conp) {  /* disconnect pointers if connected */
4146                 ASSERT(! peer_tep->te_closing);
4147 
4148                 /*
4149                  * Messages may be queued on peer's write queue
4150                  * waiting to be processed by its write service
4151                  * procedure. Before the pointer to the peer transport
4152                  * structure is set to NULL, qenable the peer's write
4153                  * queue so that the queued up messages are processed.
4154                  */
4155                 if ((save_state == TS_DATA_XFER) ||
4156                     (save_state == TS_WIND_ORDREL) ||
4157                     (save_state == TS_WREQ_ORDREL))
4158                         TL_QENABLE(peer_tep);
4159                 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4160                 TL_UNCONNECT(peer_tep->te_conp);
4161                 if (! IS_SOCKET(tep)) {
4162                         /*
4163                          * unlink the streams
4164                          */
4165                         tep->te_wq->q_next = NULL;
4166                         peer_tep->te_wq->q_next = NULL;
4167                 }
4168                 TL_UNCONNECT(tep->te_conp);
4169         }
4170 }
4171 
4172 static void
4173 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4174 {
4175         if (!tep->te_closing)
4176                 tl_addr_req(mp, tep);
4177         else
4178                 freemsg(mp);
4179 
4180         tl_serializer_exit(tep);
4181         tl_refrele(tep);
4182 }
4183 
4184 static void
4185 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4186 {
4187         queue_t                 *wq;
4188         size_t                  ack_sz;
4189         mblk_t                  *ackmp;
4190         struct T_addr_ack       *taa;
4191 
4192         if (tep->te_closing) {
4193                 freemsg(mp);
4194                 return;
4195         }
4196 
4197         wq = tep->te_wq;
4198 
4199         /*
4200          * Note: T_ADDR_REQ message has only PRIM_type field
4201          * so it is already validated earlier.
4202          */
4203 
4204         if (IS_CLTS(tep) ||
4205             (tep->te_state > TS_WREQ_ORDREL) ||
4206             (tep->te_state < TS_DATA_XFER)) {
4207                 /*
4208                  * Either connectionless or connection oriented but not
4209                  * in connected data transfer state or half-closed states.
4210                  */
4211                 ack_sz = sizeof (struct T_addr_ack);
4212                 if (tep->te_state >= TS_IDLE)
4213                         /* is bound */
4214                         ack_sz += tep->te_alen;
4215                 ackmp = reallocb(mp, ack_sz, 0);
4216                 if (ackmp == NULL) {
4217                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4218                             SL_TRACE|SL_ERROR,
4219                             "tl_addr_req: reallocb failed"));
4220                         tl_memrecover(wq, mp, ack_sz);
4221                         return;
4222                 }
4223 
4224                 taa = (struct T_addr_ack *)ackmp->b_rptr;
4225 
4226                 bzero(taa, sizeof (struct T_addr_ack));
4227 
4228                 taa->PRIM_type = T_ADDR_ACK;
4229                 ackmp->b_datap->db_type = M_PCPROTO;
4230                 ackmp->b_wptr = (uchar_t *)&taa[1];
4231 
4232                 if (tep->te_state >= TS_IDLE) {
4233                         /* endpoint is bound */
4234                         taa->LOCADDR_length = tep->te_alen;
4235                         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4236 
4237                         bcopy(tep->te_abuf, ackmp->b_wptr,
4238                             tep->te_alen);
4239                         ackmp->b_wptr += tep->te_alen;
4240                         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4241                 }
4242 
4243                 (void) qreply(wq, ackmp);
4244         } else {
4245                 ASSERT(tep->te_state == TS_DATA_XFER ||
4246                     tep->te_state == TS_WIND_ORDREL ||
4247                     tep->te_state == TS_WREQ_ORDREL);
4248                 /* connection oriented in data transfer */
4249                 tl_connected_cots_addr_req(mp, tep);
4250         }
4251 }
4252 
4253 
4254 static void
4255 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4256 {
4257         tl_endpt_t              *peer_tep = tep->te_conp;
4258         size_t                  ack_sz;
4259         mblk_t                  *ackmp;
4260         struct T_addr_ack       *taa;
4261         uchar_t                 *addr_startp;
4262 
4263         if (tep->te_closing) {
4264                 freemsg(mp);
4265                 return;
4266         }
4267 
4268         if (peer_tep == NULL || peer_tep->te_closing) {
4269                 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4270                 return;
4271         }
4272 
4273         ASSERT(tep->te_state >= TS_IDLE);
4274 
4275         ack_sz = sizeof (struct T_addr_ack);
4276         ack_sz += T_ALIGN(tep->te_alen);
4277         ack_sz += peer_tep->te_alen;
4278 
4279         ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4280         if (ackmp == NULL) {
4281                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4282                     "tl_connected_cots_addr_req: reallocb failed"));
4283                 tl_memrecover(tep->te_wq, mp, ack_sz);
4284                 return;
4285         }
4286 
4287         taa = (struct T_addr_ack *)ackmp->b_rptr;
4288 
4289         /* endpoint is bound */
4290         taa->LOCADDR_length = tep->te_alen;
4291         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4292 
4293         addr_startp = (uchar_t *)&taa[1];
4294 
4295         bcopy(tep->te_abuf, addr_startp,
4296             tep->te_alen);
4297 
4298         taa->REMADDR_length = peer_tep->te_alen;
4299         taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4300             taa->LOCADDR_length);
4301         addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4302         bcopy(peer_tep->te_abuf, addr_startp,
4303             peer_tep->te_alen);
4304         ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4305             taa->REMADDR_offset + peer_tep->te_alen;
4306         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4307 
4308         putnext(tep->te_rq, ackmp);
4309 }
4310 
4311 static void
4312 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4313 {
4314         if (IS_CLTS(tep)) {
4315                 *ia = tl_clts_info_ack;
4316                 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4317         } else {
4318                 *ia = tl_cots_info_ack;
4319                 if (IS_COTSORD(tep))
4320                         ia->SERV_type = T_COTS_ORD;
4321         }
4322         ia->TIDU_size = tl_tidusz;
4323         ia->CURRENT_state = tep->te_state;
4324 }
4325 
4326 /*
4327  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4328  * tl_wput.
4329  */
4330 static void
4331 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4332 {
4333         mblk_t                  *ackmp;
4334         t_uscalar_t             cap_bits1;
4335         struct T_capability_ack *tcap;
4336 
4337         if (tep->te_closing) {
4338                 freemsg(mp);
4339                 return;
4340         }
4341 
4342         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4343 
4344         ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4345             M_PCPROTO, T_CAPABILITY_ACK);
4346         if (ackmp == NULL) {
4347                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4348                     "tl_capability_req: reallocb failed"));
4349                 tl_memrecover(tep->te_wq, mp,
4350                     sizeof (struct T_capability_ack));
4351                 return;
4352         }
4353 
4354         tcap = (struct T_capability_ack *)ackmp->b_rptr;
4355         tcap->CAP_bits1 = 0;
4356 
4357         if (cap_bits1 & TC1_INFO) {
4358                 tl_copy_info(&tcap->INFO_ack, tep);
4359                 tcap->CAP_bits1 |= TC1_INFO;
4360         }
4361 
4362         if (cap_bits1 & TC1_ACCEPTOR_ID) {
4363                 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4364                 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4365         }
4366 
4367         putnext(tep->te_rq, ackmp);
4368 }
4369 
4370 static void
4371 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4372 {
4373         if (! tep->te_closing)
4374                 tl_info_req(mp, tep);
4375         else
4376                 freemsg(mp);
4377 
4378         tl_serializer_exit(tep);
4379         tl_refrele(tep);
4380 }
4381 
4382 static void
4383 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4384 {
4385         mblk_t *ackmp;
4386 
4387         ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4388             M_PCPROTO, T_INFO_ACK);
4389         if (ackmp == NULL) {
4390                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4391                     "tl_info_req: reallocb failed"));
4392                 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4393                 return;
4394         }
4395 
4396         /*
4397          * fill in T_INFO_ACK contents
4398          */
4399         tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4400 
4401         /*
4402          * send ack message
4403          */
4404         putnext(tep->te_rq, ackmp);
4405 }
4406 
4407 /*
4408  * Handle M_DATA, T_data_req and T_optdata_req.
4409  * If this is a socket pass through T_optdata_req options unmodified.
4410  */
4411 static void
4412 tl_data(mblk_t *mp, tl_endpt_t *tep)
4413 {
4414         queue_t                 *wq = tep->te_wq;
4415         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4416         ssize_t                 msz = MBLKL(mp);
4417         tl_endpt_t              *peer_tep;
4418         queue_t                 *peer_rq;
4419         boolean_t               closing = tep->te_closing;
4420 
4421         if (IS_CLTS(tep)) {
4422                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4423                     SL_TRACE|SL_ERROR,
4424                     "tl_wput:clts:unattached M_DATA"));
4425                 if (!closing) {
4426                         tl_merror(wq, mp, EPROTO);
4427                 } else {
4428                         freemsg(mp);
4429                 }
4430                 return;
4431         }
4432 
4433         /*
4434          * If the endpoint is closing it should still forward any data to the
4435          * peer (if it has one). If it is not allowed to forward it can just
4436          * free the message.
4437          */
4438         if (closing &&
4439             (tep->te_state != TS_DATA_XFER) &&
4440             (tep->te_state != TS_WREQ_ORDREL)) {
4441                 freemsg(mp);
4442                 return;
4443         }
4444 
4445         if (DB_TYPE(mp) == M_PROTO) {
4446                 if (prim->type == T_DATA_REQ &&
4447                     msz < sizeof (struct T_data_req)) {
4448                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4449                                 SL_TRACE|SL_ERROR,
4450                                 "tl_data:T_DATA_REQ:invalid message"));
4451                         if (!closing) {
4452                                 tl_merror(wq, mp, EPROTO);
4453                         } else {
4454                                 freemsg(mp);
4455                         }
4456                         return;
4457                 } else if (prim->type == T_OPTDATA_REQ &&
4458                     (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4459                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4460                             SL_TRACE|SL_ERROR,
4461                             "tl_data:T_OPTDATA_REQ:invalid message"));
4462                         if (!closing) {
4463                                 tl_merror(wq, mp, EPROTO);
4464                         } else {
4465                                 freemsg(mp);
4466                         }
4467                         return;
4468                 }
4469         }
4470 
4471         /*
4472          * connection oriented provider
4473          */
4474         switch (tep->te_state) {
4475         case TS_IDLE:
4476                 /*
4477                  * Other end not here - do nothing.
4478                  */
4479                 freemsg(mp);
4480                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4481                     "tl_data:cots with endpoint idle"));
4482                 return;
4483 
4484         case TS_DATA_XFER:
4485                 /* valid states */
4486                 if (tep->te_conp != NULL)
4487                         break;
4488 
4489                 if (tep->te_oconp == NULL) {
4490                         if (!closing) {
4491                                 tl_merror(wq, mp, EPROTO);
4492                         } else {
4493                                 freemsg(mp);
4494                         }
4495                         return;
4496                 }
4497                 /*
4498                  * For a socket the T_CONN_CON is sent early thus
4499                  * the peer might not yet have accepted the connection.
4500                  * If we are closing queue the packet with the T_CONN_IND.
4501                  * Otherwise defer processing the packet until the peer
4502                  * accepts the connection.
4503                  * Note that the queue is noenabled when we go into this
4504                  * state.
4505                  */
4506                 if (!closing) {
4507                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4508                             SL_TRACE|SL_ERROR,
4509                             "tl_data: ocon"));
4510                         TL_PUTBQ(tep, mp);
4511                         return;
4512                 }
4513                 if (DB_TYPE(mp) == M_PROTO) {
4514                         if (msz < sizeof (t_scalar_t)) {
4515                                 freemsg(mp);
4516                                 return;
4517                         }
4518                         /* reuse message block - just change REQ to IND */
4519                         if (prim->type == T_DATA_REQ)
4520                                 prim->type = T_DATA_IND;
4521                         else
4522                                 prim->type = T_OPTDATA_IND;
4523                 }
4524                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4525                 return;
4526 
4527         case TS_WREQ_ORDREL:
4528                 if (tep->te_conp == NULL) {
4529                         /*
4530                          * Other end closed - generate discon_ind
4531                          * with reason 0 to cause an EPIPE but no
4532                          * read side error on AF_UNIX sockets.
4533                          */
4534                         freemsg(mp);
4535                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4536                             SL_TRACE|SL_ERROR,
4537                             "tl_data: WREQ_ORDREL and no peer"));
4538                         tl_discon_ind(tep, 0);
4539                         return;
4540                 }
4541                 break;
4542 
4543         default:
4544                 /* invalid state for event TE_DATA_REQ */
4545                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4546                     "tl_data:cots:out of state"));
4547                 tl_merror(wq, mp, EPROTO);
4548                 return;
4549         }
4550         /*
4551          * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4552          * (State stays same on this event)
4553          */
4554 
4555         /*
4556          * get connected endpoint
4557          */
4558         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4559                 freemsg(mp);
4560                 /* Peer closed */
4561                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4562                     "tl_data: peer gone"));
4563                 return;
4564         }
4565 
4566         ASSERT(tep->te_serializer == peer_tep->te_serializer);
4567         peer_rq = peer_tep->te_rq;
4568 
4569         /*
4570          * Put it back if flow controlled
4571          * Note: Messages already on queue when we are closing is bounded
4572          * so we can ignore flow control.
4573          */
4574         if (!canputnext(peer_rq) && !closing) {
4575                 TL_PUTBQ(tep, mp);
4576                 return;
4577         }
4578 
4579         /*
4580          * validate peer state
4581          */
4582         switch (peer_tep->te_state) {
4583         case TS_DATA_XFER:
4584         case TS_WIND_ORDREL:
4585                 /* valid states */
4586                 break;
4587         default:
4588                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4589                     "tl_data:rx side:invalid state"));
4590                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4591                 return;
4592         }
4593         if (DB_TYPE(mp) == M_PROTO) {
4594                 /* reuse message block - just change REQ to IND */
4595                 if (prim->type == T_DATA_REQ)
4596                         prim->type = T_DATA_IND;
4597                 else
4598                         prim->type = T_OPTDATA_IND;
4599         }
4600         /*
4601          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4602          * (peer state stays same on this event)
4603          */
4604         /*
4605          * send data to connected peer
4606          */
4607         putnext(peer_rq, mp);
4608 }
4609 
4610 
4611 
4612 static void
4613 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4614 {
4615         queue_t                 *wq = tep->te_wq;
4616         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4617         ssize_t                 msz = MBLKL(mp);
4618         tl_endpt_t              *peer_tep;
4619         queue_t                 *peer_rq;
4620         boolean_t               closing = tep->te_closing;
4621 
4622         if (msz < sizeof (struct T_exdata_req)) {
4623                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4624                     "tl_exdata:invalid message"));
4625                 if (!closing) {
4626                         tl_merror(wq, mp, EPROTO);
4627                 } else {
4628                         freemsg(mp);
4629                 }
4630                 return;
4631         }
4632 
4633         /*
4634          * If the endpoint is closing it should still forward any data to the
4635          * peer (if it has one). If it is not allowed to forward it can just
4636          * free the message.
4637          */
4638         if (closing &&
4639             (tep->te_state != TS_DATA_XFER) &&
4640             (tep->te_state != TS_WREQ_ORDREL)) {
4641                 freemsg(mp);
4642                 return;
4643         }
4644 
4645         /*
4646          * validate state
4647          */
4648         switch (tep->te_state) {
4649         case TS_IDLE:
4650                 /*
4651                  * Other end not here - do nothing.
4652                  */
4653                 freemsg(mp);
4654                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4655                     "tl_exdata:cots with endpoint idle"));
4656                 return;
4657 
4658         case TS_DATA_XFER:
4659                 /* valid states */
4660                 if (tep->te_conp != NULL)
4661                         break;
4662 
4663                 if (tep->te_oconp == NULL) {
4664                         if (!closing) {
4665                                 tl_merror(wq, mp, EPROTO);
4666                         } else {
4667                                 freemsg(mp);
4668                         }
4669                         return;
4670                 }
4671                 /*
4672                  * For a socket the T_CONN_CON is sent early thus
4673                  * the peer might not yet have accepted the connection.
4674                  * If we are closing queue the packet with the T_CONN_IND.
4675                  * Otherwise defer processing the packet until the peer
4676                  * accepts the connection.
4677                  * Note that the queue is noenabled when we go into this
4678                  * state.
4679                  */
4680                 if (!closing) {
4681                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4682                             SL_TRACE|SL_ERROR,
4683                             "tl_exdata: ocon"));
4684                         TL_PUTBQ(tep, mp);
4685                         return;
4686                 }
4687                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4688                     "tl_exdata: closing socket ocon"));
4689                 prim->type = T_EXDATA_IND;
4690                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4691                 return;
4692 
4693         case TS_WREQ_ORDREL:
4694                 if (tep->te_conp == NULL) {
4695                         /*
4696                          * Other end closed - generate discon_ind
4697                          * with reason 0 to cause an EPIPE but no
4698                          * read side error on AF_UNIX sockets.
4699                          */
4700                         freemsg(mp);
4701                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4702                             SL_TRACE|SL_ERROR,
4703                             "tl_exdata: WREQ_ORDREL and no peer"));
4704                         tl_discon_ind(tep, 0);
4705                         return;
4706                 }
4707                 break;
4708 
4709         default:
4710                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4711                     SL_TRACE|SL_ERROR,
4712                     "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4713                     tep->te_state));
4714                 tl_merror(wq, mp, EPROTO);
4715                 return;
4716         }
4717         /*
4718          * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4719          * (state stays same on this event)
4720          */
4721 
4722         /*
4723          * get connected endpoint
4724          */
4725         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4726                 freemsg(mp);
4727                 /* Peer closed */
4728                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4729                     "tl_exdata: peer gone"));
4730                 return;
4731         }
4732 
4733         peer_rq = peer_tep->te_rq;
4734 
4735         /*
4736          * Put it back if flow controlled
4737          * Note: Messages already on queue when we are closing is bounded
4738          * so we can ignore flow control.
4739          */
4740         if (!canputnext(peer_rq) && !closing) {
4741                 TL_PUTBQ(tep, mp);
4742                 return;
4743         }
4744 
4745         /*
4746          * validate state on peer
4747          */
4748         switch (peer_tep->te_state) {
4749         case TS_DATA_XFER:
4750         case TS_WIND_ORDREL:
4751                 /* valid states */
4752                 break;
4753         default:
4754                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4755                     "tl_exdata:rx side:invalid state"));
4756                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4757                 return;
4758         }
4759         /*
4760          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4761          * (peer state stays same on this event)
4762          */
4763         /*
4764          * reuse message block
4765          */
4766         prim->type = T_EXDATA_IND;
4767 
4768         /*
4769          * send data to connected peer
4770          */
4771         putnext(peer_rq, mp);
4772 }
4773 
4774 
4775 
4776 static void
4777 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4778 {
4779         queue_t                 *wq =  tep->te_wq;
4780         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4781         ssize_t                 msz = MBLKL(mp);
4782         tl_endpt_t              *peer_tep;
4783         queue_t                 *peer_rq;
4784         boolean_t               closing = tep->te_closing;
4785 
4786         if (msz < sizeof (struct T_ordrel_req)) {
4787                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4788                     "tl_ordrel:invalid message"));
4789                 if (!closing) {
4790                         tl_merror(wq, mp, EPROTO);
4791                 } else {
4792                         freemsg(mp);
4793                 }
4794                 return;
4795         }
4796 
4797         /*
4798          * validate state
4799          */
4800         switch (tep->te_state) {
4801         case TS_DATA_XFER:
4802         case TS_WREQ_ORDREL:
4803                 /* valid states */
4804                 if (tep->te_conp != NULL)
4805                         break;
4806 
4807                 if (tep->te_oconp == NULL)
4808                         break;
4809 
4810                 /*
4811                  * For a socket the T_CONN_CON is sent early thus
4812                  * the peer might not yet have accepted the connection.
4813                  * If we are closing queue the packet with the T_CONN_IND.
4814                  * Otherwise defer processing the packet until the peer
4815                  * accepts the connection.
4816                  * Note that the queue is noenabled when we go into this
4817                  * state.
4818                  */
4819                 if (!closing) {
4820                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4821                             SL_TRACE|SL_ERROR,
4822                             "tl_ordlrel: ocon"));
4823                         TL_PUTBQ(tep, mp);
4824                         return;
4825                 }
4826                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4827                     "tl_ordlrel: closing socket ocon"));
4828                 prim->type = T_ORDREL_IND;
4829                 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4830                 return;
4831 
4832         default:
4833                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4834                     SL_TRACE|SL_ERROR,
4835                     "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4836                     tep->te_state));
4837                 if (!closing) {
4838                         tl_merror(wq, mp, EPROTO);
4839                 } else {
4840                         freemsg(mp);
4841                 }
4842                 return;
4843         }
4844         tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4845 
4846         /*
4847          * get connected endpoint
4848          */
4849         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4850                 /* Peer closed */
4851                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4852                     "tl_ordrel: peer gone"));
4853                 freemsg(mp);
4854                 return;
4855         }
4856 
4857         peer_rq = peer_tep->te_rq;
4858 
4859         /*
4860          * Put it back if flow controlled except when we are closing.
4861          * Note: Messages already on queue when we are closing is bounded
4862          * so we can ignore flow control.
4863          */
4864         if (! canputnext(peer_rq) && !closing) {
4865                 TL_PUTBQ(tep, mp);
4866                 return;
4867         }
4868 
4869         /*
4870          * validate state on peer
4871          */
4872         switch (peer_tep->te_state) {
4873         case TS_DATA_XFER:
4874         case TS_WIND_ORDREL:
4875                 /* valid states */
4876                 break;
4877         default:
4878                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4879                     "tl_ordrel:rx side:invalid state"));
4880                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4881                 return;
4882         }
4883         peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4884 
4885         /*
4886          * reuse message block
4887          */
4888         prim->type = T_ORDREL_IND;
4889         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4890             "tl_ordrel: send ordrel_ind"));
4891 
4892         /*
4893          * send data to connected peer
4894          */
4895         putnext(peer_rq, mp);
4896 }
4897 
4898 
4899 /*
4900  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4901  */
4902 static void
4903 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4904 {
4905         size_t                  err_sz;
4906         tl_endpt_t              *tep;
4907         struct T_unitdata_req   *udreq;
4908         mblk_t                  *err_mp;
4909         t_scalar_t              alen;
4910         t_scalar_t              olen;
4911         struct T_uderror_ind    *uderr;
4912         uchar_t                 *addr_startp;
4913 
4914         err_sz = sizeof (struct T_uderror_ind);
4915         tep = (tl_endpt_t *)wq->q_ptr;
4916         udreq = (struct T_unitdata_req *)mp->b_rptr;
4917         alen = udreq->DEST_length;
4918         olen = udreq->OPT_length;
4919 
4920         if (alen > 0)
4921                 err_sz = T_ALIGN(err_sz + alen);
4922         if (olen > 0)
4923                 err_sz += olen;
4924 
4925         err_mp = allocb(err_sz, BPRI_MED);
4926         if (! err_mp) {
4927                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4928                     "tl_uderr:allocb failure"));
4929                 /*
4930                  * Note: no rollback of state needed as it does
4931                  * not change in connectionless transport
4932                  */
4933                 tl_memrecover(wq, mp, err_sz);
4934                 return;
4935         }
4936 
4937         DB_TYPE(err_mp) = M_PROTO;
4938         err_mp->b_wptr = err_mp->b_rptr + err_sz;
4939         uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4940         uderr->PRIM_type = T_UDERROR_IND;
4941         uderr->ERROR_type = err;
4942         uderr->DEST_length = alen;
4943         uderr->OPT_length = olen;
4944         if (alen <= 0) {
4945                 uderr->DEST_offset = 0;
4946         } else {
4947                 uderr->DEST_offset =
4948                     (t_scalar_t)sizeof (struct T_uderror_ind);
4949                 addr_startp  = mp->b_rptr + udreq->DEST_offset;
4950                 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4951                     (size_t)alen);
4952         }
4953         if (olen <= 0) {
4954                 uderr->OPT_offset = 0;
4955         } else {
4956                 uderr->OPT_offset =
4957                     (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4958                     uderr->DEST_length);
4959                 addr_startp  = mp->b_rptr + udreq->OPT_offset;
4960                 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4961                     (size_t)olen);
4962         }
4963         freemsg(mp);
4964 
4965         /*
4966          * send indication message
4967          */
4968         tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4969 
4970         qreply(wq, err_mp);
4971 }
4972 
4973 static void
4974 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4975 {
4976         queue_t *wq = tep->te_wq;
4977 
4978         if (!tep->te_closing && (wq->q_first != NULL)) {
4979                 TL_PUTQ(tep, mp);
4980         } else if (tep->te_rq != NULL)
4981                 tl_unitdata(mp, tep);
4982         else
4983                 freemsg(mp);
4984 
4985         tl_serializer_exit(tep);
4986         tl_refrele(tep);
4987 }
4988 
4989 /*
4990  * Handle T_unitdata_req.
4991  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4992  * If this is a socket pass through options unmodified.
4993  */
4994 static void
4995 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4996 {
4997         queue_t                 *wq = tep->te_wq;
4998         soux_addr_t             ux_addr;
4999         tl_addr_t               destaddr;
5000         uchar_t                 *addr_startp;
5001         tl_endpt_t              *peer_tep;
5002         struct T_unitdata_ind   *udind;
5003         struct T_unitdata_req   *udreq;
5004         ssize_t                 msz, ui_sz, reuse_mb_sz;
5005         t_scalar_t              alen, aoff, olen, ooff;
5006         t_scalar_t              oldolen = 0;
5007         cred_t                  *cr = NULL;
5008         pid_t                   cpid;
5009 
5010         udreq = (struct T_unitdata_req *)mp->b_rptr;
5011         msz = MBLKL(mp);
5012 
5013         /*
5014          * validate the state
5015          */
5016         if (tep->te_state != TS_IDLE) {
5017                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5018                     SL_TRACE|SL_ERROR,
5019                     "tl_wput:T_CONN_REQ:out of state"));
5020                 tl_merror(wq, mp, EPROTO);
5021                 return;
5022         }
5023         /*
5024          * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5025          * (state does not change on this event)
5026          */
5027 
5028         /*
5029          * validate the message
5030          * Note: dereference fields in struct inside message only
5031          * after validating the message length.
5032          */
5033         if (msz < sizeof (struct T_unitdata_req)) {
5034                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5035                     "tl_unitdata:invalid message length"));
5036                 tl_merror(wq, mp, EINVAL);
5037                 return;
5038         }
5039         alen = udreq->DEST_length;
5040         aoff = udreq->DEST_offset;
5041         oldolen = olen = udreq->OPT_length;
5042         ooff = udreq->OPT_offset;
5043         if (olen == 0)
5044                 ooff = 0;
5045 
5046         if (IS_SOCKET(tep)) {
5047                 if ((alen != TL_SOUX_ADDRLEN) ||
5048                     (aoff < 0) ||
5049                     (aoff + alen > msz) ||
5050                     (olen < 0) || (ooff < 0) ||
5051                     ((olen > 0) && ((ooff + olen) > msz))) {
5052                         (void) (STRLOG(TL_ID, tep->te_minor,
5053                             1, SL_TRACE|SL_ERROR,
5054                             "tl_unitdata_req: invalid socket addr "
5055                             "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5056                             (int)msz, alen, aoff, olen, ooff));
5057                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5058                         return;
5059                 }
5060                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5061 
5062                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5063                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5064                         (void) (STRLOG(TL_ID, tep->te_minor,
5065                             1, SL_TRACE|SL_ERROR,
5066                             "tl_conn_req: invalid socket magic"));
5067                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5068                         return;
5069                 }
5070         } else {
5071                 if ((alen < 0) ||
5072                     (aoff < 0) ||
5073                     ((alen > 0) && ((aoff + alen) > msz)) ||
5074                     ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5075                     ((aoff + alen) < 0) ||
5076                     ((olen > 0) && ((ooff + olen) > msz)) ||
5077                     (olen < 0) ||
5078                     (ooff < 0) ||
5079                     ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5080                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5081                                     SL_TRACE|SL_ERROR,
5082                                     "tl_unitdata:invalid unit data message"));
5083                         tl_merror(wq, mp, EINVAL);
5084                         return;
5085                 }
5086         }
5087 
5088         /* Options not supported unless it's a socket */
5089         if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5090                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5091                     "tl_unitdata:option use(unsupported) or zero len addr"));
5092                 tl_uderr(wq, mp, EPROTO);
5093                 return;
5094         }
5095 #ifdef DEBUG
5096         /*
5097          * Mild form of ASSERT()ion to detect broken TPI apps.
5098          * if (! assertion)
5099          *      log warning;
5100          */
5101         if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5102                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5103                     "tl_unitdata:addr overlaps TPI message"));
5104         }
5105 #endif
5106         /*
5107          * get destination endpoint
5108          */
5109         destaddr.ta_alen = alen;
5110         destaddr.ta_abuf = mp->b_rptr + aoff;
5111         destaddr.ta_zoneid = tep->te_zoneid;
5112 
5113         /*
5114          * Check whether the destination is the same that was used previously
5115          * and the destination endpoint is in the right state. If something is
5116          * wrong, find destination again and cache it.
5117          */
5118         peer_tep = tep->te_lastep;
5119 
5120         if ((peer_tep == NULL) || peer_tep->te_closing ||
5121             (peer_tep->te_state != TS_IDLE) ||
5122             !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5123                 /*
5124                  * Not the same as cached destination , need to find the right
5125                  * destination.
5126                  */
5127                 peer_tep = (IS_SOCKET(tep) ?
5128                     tl_sock_find_peer(tep, &ux_addr) :
5129                     tl_find_peer(tep, &destaddr));
5130 
5131                 if (peer_tep == NULL) {
5132                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5133                             SL_TRACE|SL_ERROR,
5134                             "tl_unitdata:no one at destination address"));
5135                         tl_uderr(wq, mp, ECONNRESET);
5136                         return;
5137                 }
5138 
5139                 /*
5140                  * Cache the new peer.
5141                  */
5142                 if (tep->te_lastep != NULL)
5143                         tl_refrele(tep->te_lastep);
5144 
5145                 tep->te_lastep = peer_tep;
5146         }
5147 
5148         if (peer_tep->te_state != TS_IDLE) {
5149                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5150                     "tl_unitdata:provider in invalid state"));
5151                 tl_uderr(wq, mp, EPROTO);
5152                 return;
5153         }
5154 
5155         ASSERT(peer_tep->te_rq != NULL);
5156 
5157         /*
5158          * Put it back if flow controlled except when we are closing.
5159          * Note: Messages already on queue when we are closing is bounded
5160          * so we can ignore flow control.
5161          */
5162         if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5163                 /* record what we are flow controlled on */
5164                 if (tep->te_flowq != NULL) {
5165                         list_remove(&tep->te_flowq->te_flowlist, tep);
5166                 }
5167                 list_insert_head(&peer_tep->te_flowlist, tep);
5168                 tep->te_flowq = peer_tep;
5169                 TL_PUTBQ(tep, mp);
5170                 return;
5171         }
5172         /*
5173          * prepare indication message
5174          */
5175 
5176         /*
5177          * calculate length of message
5178          */
5179         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5180                 cr = msg_getcred(mp, &cpid);
5181                 ASSERT(cr != NULL);
5182 
5183                 if (peer_tep->te_flag & TL_SETCRED) {
5184                         ASSERT(olen == 0);
5185                         olen = (t_scalar_t)sizeof (struct opthdr) +
5186                             OPTLEN(sizeof (tl_credopt_t));
5187                                                 /* 1 option only */
5188                 } else if (peer_tep->te_flag & TL_SETUCRED) {
5189                         ASSERT(olen == 0);
5190                         olen = (t_scalar_t)sizeof (struct opthdr) +
5191                             OPTLEN(ucredminsize(cr));
5192                                                 /* 1 option only */
5193                 } else {
5194                         /* Possibly more than one option */
5195                         olen += (t_scalar_t)sizeof (struct T_opthdr) +
5196                             OPTLEN(ucredminsize(cr));
5197                 }
5198         }
5199 
5200         ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5201         reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5202 
5203         /*
5204          * If the unitdata_ind fits and we are not adding options
5205          * reuse the udreq mblk.
5206          *
5207          * Otherwise, it is possible we need to append an option if one of the
5208          * te_flag bits is set. This requires extra space in the data block for
5209          * the additional option but the traditional technique used below to
5210          * allocate a new block and copy into it will not work when there is a
5211          * message block with a free pointer (since we don't know anything
5212          * about the layout of the data, pointers referencing or within the
5213          * data, etc.). To handle this possibility the upper layers may have
5214          * preallocated some space to use for appending an option. We check the
5215          * overall mblock size against the size we need ('reuse_mb_sz' with the
5216          * original address length [alen] to ensure we won't overrun the
5217          * current mblk data size) to see if there is free space and thus
5218          * avoid allocating a new message block.
5219          */
5220         if (msz >= ui_sz && alen >= tep->te_alen &&
5221             !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5222                 /*
5223                  * Reuse the original mblk. Leave options in place.
5224                  */
5225                 udind =  (struct T_unitdata_ind *)mp->b_rptr;
5226                 udind->PRIM_type = T_UNITDATA_IND;
5227                 udind->SRC_length = tep->te_alen;
5228                 addr_startp = mp->b_rptr + udind->SRC_offset;
5229                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5230 
5231         } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5232             mp->b_datap->db_frtnp != NULL) {
5233                 /*
5234                  * We have a message block with a free pointer, but extra space
5235                  * has been pre-allocated for us in case we need to append an
5236                  * option. Reuse the original mblk, leaving existing options in
5237                  * place.
5238                  */
5239                 udind =  (struct T_unitdata_ind *)mp->b_rptr;
5240                 udind->PRIM_type = T_UNITDATA_IND;
5241                 udind->SRC_length = tep->te_alen;
5242                 addr_startp = mp->b_rptr + udind->SRC_offset;
5243                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5244 
5245                 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5246                         ASSERT(cr != NULL);
5247                         /*
5248                          * We're appending one new option here after the
5249                          * original ones.
5250                          */
5251                         tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5252                             cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5253                 }
5254 
5255         } else if (mp->b_datap->db_frtnp != NULL) {
5256                 /*
5257                  * The next block creates a new mp and tries to copy the data
5258                  * block into it, but that cannot handle a message with a free
5259                  * pointer (for more details see the comment in kstrputmsg()
5260                  * where dupmsg() is called). Since we can never properly
5261                  * duplicate the mp while also extending the data, just error
5262                  * out now.
5263                  */
5264                 tl_uderr(wq, mp, EPROTO);
5265                 return;
5266         } else {
5267                 /* Allocate a new T_unitdata_ind message */
5268                 mblk_t *ui_mp;
5269 
5270                 ui_mp = allocb(ui_sz, BPRI_MED);
5271                 if (! ui_mp) {
5272                         (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5273                             "tl_unitdata:allocb failure:message queued"));
5274                         tl_memrecover(wq, mp, ui_sz);
5275                         return;
5276                 }
5277 
5278                 /*
5279                  * fill in T_UNITDATA_IND contents
5280                  */
5281                 DB_TYPE(ui_mp) = M_PROTO;
5282                 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5283                 udind =  (struct T_unitdata_ind *)ui_mp->b_rptr;
5284                 udind->PRIM_type = T_UNITDATA_IND;
5285                 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5286                 udind->SRC_length = tep->te_alen;
5287                 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5288                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5289                 udind->OPT_offset =
5290                     (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5291                 udind->OPT_length = olen;
5292                 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5293 
5294                         if (oldolen != 0) {
5295                                 bcopy((void *)((uintptr_t)udreq + ooff),
5296                                     (void *)((uintptr_t)udind +
5297                                     udind->OPT_offset),
5298                                     oldolen);
5299                         }
5300                         ASSERT(cr != NULL);
5301 
5302                         tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5303                             oldolen, cr, cpid,
5304                             peer_tep->te_flag, peer_tep->te_credp);
5305                 } else {
5306                         bcopy((void *)((uintptr_t)udreq + ooff),
5307                             (void *)((uintptr_t)udind + udind->OPT_offset),
5308                             olen);
5309                 }
5310 
5311                 /*
5312                  * relink data blocks from mp to ui_mp
5313                  */
5314                 ui_mp->b_cont = mp->b_cont;
5315                 freeb(mp);
5316                 mp = ui_mp;
5317         }
5318         /*
5319          * send indication message
5320          */
5321         peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5322         putnext(peer_tep->te_rq, mp);
5323 }
5324 
5325 
5326 
5327 /*
5328  * Check if a given addr is in use.
5329  * Endpoint ptr returned or NULL if not found.
5330  * The name space is separate for each mode. This implies that
5331  * sockets get their own name space.
5332  */
5333 static tl_endpt_t *
5334 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5335 {
5336         tl_endpt_t *peer_tep = NULL;
5337         int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5338             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5339 
5340         ASSERT(! IS_SOCKET(tep));
5341 
5342         ASSERT(ap != NULL && ap->ta_alen > 0);
5343         ASSERT(ap->ta_zoneid == tep->te_zoneid);
5344         ASSERT(ap->ta_abuf != NULL);
5345         EQUIV(rc == 0, peer_tep != NULL);
5346         IMPLY(rc == 0,
5347             (tep->te_zoneid == peer_tep->te_zoneid) &&
5348             (tep->te_transport == peer_tep->te_transport));
5349 
5350         if ((rc == 0) && (peer_tep->te_closing)) {
5351                 tl_refrele(peer_tep);
5352                 peer_tep = NULL;
5353         }
5354 
5355         return (peer_tep);
5356 }
5357 
5358 /*
5359  * Find peer for a socket based on unix domain address.
5360  * For implicit addresses our peer can be found by minor number in ai hash. For
5361  * explicit binds we look vnode address at addr_hash.
5362  */
5363 static tl_endpt_t *
5364 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5365 {
5366         tl_endpt_t *peer_tep = NULL;
5367         mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5368             tep->te_aihash : tep->te_addrhash;
5369         int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5370             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5371 
5372         ASSERT(IS_SOCKET(tep));
5373         EQUIV(rc == 0, peer_tep != NULL);
5374         IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5375 
5376         if (peer_tep != NULL) {
5377                 /* Don't attempt to use closing peer. */
5378                 if (peer_tep->te_closing)
5379                         goto errout;
5380 
5381                 /*
5382                  * Cross-zone unix sockets are permitted, but for Trusted
5383                  * Extensions only, the "server" for these must be in the
5384                  * global zone.
5385                  */
5386                 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5387                     is_system_labeled() &&
5388                     (peer_tep->te_zoneid != GLOBAL_ZONEID))
5389                         goto errout;
5390         }
5391 
5392         return (peer_tep);
5393 
5394 errout:
5395         tl_refrele(peer_tep);
5396         return (NULL);
5397 }
5398 
5399 /*
5400  * Generate a free addr and return it in struct pointed by ap
5401  * but allocating space for address buffer.
5402  * The generated address will be at least 4 bytes long and, if req->ta_alen
5403  * exceeds 4 bytes, be req->ta_alen bytes long.
5404  *
5405  * If address is found it will be inserted in the hash.
5406  *
5407  * If req->ta_alen is larger than the default alen (4 bytes) the last
5408  * alen-4 bytes will always be the same as in req.
5409  *
5410  * Return 0 for failure.
5411  * Return non-zero for success.
5412  */
5413 static boolean_t
5414 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5415 {
5416         t_scalar_t      alen;
5417         uint32_t        loopcnt;        /* Limit loop to 2^32 */
5418 
5419         ASSERT(tep->te_hash_hndl != NULL);
5420         ASSERT(! IS_SOCKET(tep));
5421 
5422         if (tep->te_hash_hndl == NULL)
5423                 return (B_FALSE);
5424 
5425         /*
5426          * check if default addr is in use
5427          * if it is - bump it and try again
5428          */
5429         if (req == NULL) {
5430                 alen = sizeof (uint32_t);
5431         } else {
5432                 alen = max(req->ta_alen, sizeof (uint32_t));
5433                 ASSERT(tep->te_zoneid == req->ta_zoneid);
5434         }
5435 
5436         if (tep->te_alen < alen) {
5437                 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5438 
5439                 /*
5440                  * Not enough space in tep->ta_ap to hold the address,
5441                  * allocate a bigger space.
5442                  */
5443                 if (abuf == NULL)
5444                         return (B_FALSE);
5445 
5446                 if (tep->te_alen > 0)
5447                         kmem_free(tep->te_abuf, tep->te_alen);
5448 
5449                 tep->te_alen = alen;
5450                 tep->te_abuf = abuf;
5451         }
5452 
5453         /* Copy in the address in req */
5454         if (req != NULL) {
5455                 ASSERT(alen >= req->ta_alen);
5456                 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5457         }
5458 
5459         /*
5460          * First try minor number then try default addresses.
5461          */
5462         bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5463 
5464         for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5465                 if (mod_hash_insert_reserve(tep->te_addrhash,
5466                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5467                     tep->te_hash_hndl) == 0) {
5468                         /*
5469                          * found free address
5470                          */
5471                         tep->te_flag |= TL_ADDRHASHED;
5472                         tep->te_hash_hndl = NULL;
5473 
5474                         return (B_TRUE); /* successful return */
5475                 }
5476                 /*
5477                  * Use default address.
5478                  */
5479                 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5480                 atomic_inc_32(&tep->te_defaddr);
5481         }
5482 
5483         /*
5484          * Failed to find anything.
5485          */
5486         (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5487             "tl_get_any_addr:looped 2^32 times"));
5488         return (B_FALSE);
5489 }
5490 
5491 /*
5492  * reallocb + set r/w ptrs to reflect size.
5493  */
5494 static mblk_t *
5495 tl_resizemp(mblk_t *mp, ssize_t new_size)
5496 {
5497         if ((mp = reallocb(mp, new_size, 0)) == NULL)
5498                 return (NULL);
5499 
5500         mp->b_rptr = DB_BASE(mp);
5501         mp->b_wptr = mp->b_rptr + new_size;
5502         return (mp);
5503 }
5504 
5505 static void
5506 tl_cl_backenable(tl_endpt_t *tep)
5507 {
5508         list_t *l = &tep->te_flowlist;
5509         tl_endpt_t *elp;
5510 
5511         ASSERT(IS_CLTS(tep));
5512 
5513         for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5514                 ASSERT(tep->te_ser == elp->te_ser);
5515                 ASSERT(elp->te_flowq == tep);
5516                 if (! elp->te_closing)
5517                         TL_QENABLE(elp);
5518                 elp->te_flowq = NULL;
5519                 list_remove(l, elp);
5520         }
5521 }
5522 
5523 /*
5524  * Unconnect endpoints.
5525  */
5526 static void
5527 tl_co_unconnect(tl_endpt_t *tep)
5528 {
5529         tl_endpt_t      *peer_tep = tep->te_conp;
5530         tl_endpt_t      *srv_tep = tep->te_oconp;
5531         list_t          *l;
5532         tl_icon_t       *tip;
5533         tl_endpt_t      *cl_tep;
5534         mblk_t          *d_mp;
5535 
5536         ASSERT(IS_COTS(tep));
5537         /*
5538          * If our peer is closing, don't use it.
5539          */
5540         if ((peer_tep != NULL) && peer_tep->te_closing) {
5541                 TL_UNCONNECT(tep->te_conp);
5542                 peer_tep = NULL;
5543         }
5544         if ((srv_tep != NULL) && srv_tep->te_closing) {
5545                 TL_UNCONNECT(tep->te_oconp);
5546                 srv_tep = NULL;
5547         }
5548 
5549         if (tep->te_nicon > 0) {
5550                 l = &tep->te_iconp;
5551                 /*
5552                  * If incoming requests pending, change state
5553                  * of clients on disconnect ind event and send
5554                  * discon_ind pdu to modules above them
5555                  * for server: all clients get disconnect
5556                  */
5557 
5558                 while (tep->te_nicon > 0) {
5559                         tip    = list_head(l);
5560                         cl_tep = tip->ti_tep;
5561 
5562                         if (cl_tep == NULL) {
5563                                 tl_freetip(tep, tip);
5564                                 continue;
5565                         }
5566 
5567                         if (cl_tep->te_oconp != NULL) {
5568                                 ASSERT(cl_tep != cl_tep->te_oconp);
5569                                 TL_UNCONNECT(cl_tep->te_oconp);
5570                         }
5571 
5572                         if (cl_tep->te_closing) {
5573                                 tl_freetip(tep, tip);
5574                                 continue;
5575                         }
5576 
5577                         enableok(cl_tep->te_wq);
5578                         TL_QENABLE(cl_tep);
5579                         d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5580                         if (d_mp != NULL) {
5581                                 cl_tep->te_state = TS_IDLE;
5582                                 putnext(cl_tep->te_rq, d_mp);
5583                         } else {
5584                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5585                                     SL_TRACE|SL_ERROR,
5586                                     "tl_co_unconnect:icmng: "
5587                                     "allocb failure"));
5588                         }
5589                         tl_freetip(tep, tip);
5590                 }
5591         } else if (srv_tep != NULL) {
5592                 /*
5593                  * If outgoing request pending, change state
5594                  * of server on discon ind event
5595                  */
5596 
5597                 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5598                     IS_COTSORD(srv_tep) &&
5599                     !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5600                         /*
5601                          * Queue ordrel_ind for server to be picked up
5602                          * when the connection is accepted.
5603                          */
5604                         d_mp = tl_ordrel_ind_alloc();
5605                 } else {
5606                         /*
5607                          * send discon_ind to server
5608                          */
5609                         d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5610                 }
5611                 if (d_mp == NULL) {
5612                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5613                             SL_TRACE|SL_ERROR,
5614                             "tl_co_unconnect:outgoing:allocb failure"));
5615                         TL_UNCONNECT(tep->te_oconp);
5616                         goto discon_peer;
5617                 }
5618 
5619                 /*
5620                  * If this is a socket the T_DISCON_IND is queued with
5621                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5622                  * from the list of pending connections.
5623                  * Note that when te_oconp is set the peer better have
5624                  * a t_connind_t for the client.
5625                  */
5626                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5627                         /*
5628                          * Queue the disconnection message.
5629                          */
5630                         tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5631                 } else {
5632                         tip = tl_icon_find(srv_tep, tep->te_seqno);
5633                         if (tip == NULL) {
5634                                 freemsg(d_mp);
5635                         } else {
5636                                 ASSERT(tep == tip->ti_tep);
5637                                 ASSERT(tep->te_ser == srv_tep->te_ser);
5638                                 /*
5639                                  * Delete tip from the server list.
5640                                  */
5641                                 if (srv_tep->te_nicon == 1) {
5642                                         srv_tep->te_state =
5643                                             NEXTSTATE(TE_DISCON_IND2,
5644                                             srv_tep->te_state);
5645                                 } else {
5646                                         srv_tep->te_state =
5647                                             NEXTSTATE(TE_DISCON_IND3,
5648                                             srv_tep->te_state);
5649                                 }
5650                                 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5651                                     T_DISCON_IND);
5652                                 putnext(srv_tep->te_rq, d_mp);
5653                                 tl_freetip(srv_tep, tip);
5654                         }
5655                         TL_UNCONNECT(tep->te_oconp);
5656                         srv_tep = NULL;
5657                 }
5658         } else if (peer_tep != NULL) {
5659                 /*
5660                  * unconnect existing connection
5661                  * If connected, change state of peer on
5662                  * discon ind event and send discon ind pdu
5663                  * to module above it
5664                  */
5665 
5666                 ASSERT(tep->te_ser == peer_tep->te_ser);
5667                 if (IS_COTSORD(peer_tep) &&
5668                     (peer_tep->te_state == TS_WIND_ORDREL ||
5669                     peer_tep->te_state == TS_DATA_XFER)) {
5670                         /*
5671                          * send ordrel ind
5672                          */
5673                         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5674                         "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5675                             peer_tep->te_state,
5676                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5677                         d_mp = tl_ordrel_ind_alloc();
5678                         if (! d_mp) {
5679                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5680                                     SL_TRACE|SL_ERROR,
5681                                     "tl_co_unconnect:connected:"
5682                                     "allocb failure"));
5683                                 /*
5684                                  * Continue with cleaning up peer as
5685                                  * this side may go away with the close
5686                                  */
5687                                 TL_QENABLE(peer_tep);
5688                                 goto discon_peer;
5689                         }
5690                         peer_tep->te_state =
5691                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5692 
5693                         putnext(peer_tep->te_rq, d_mp);
5694                         /*
5695                          * Handle flow control case.  This will generate
5696                          * a t_discon_ind message with reason 0 if there
5697                          * is data queued on the write side.
5698                          */
5699                         TL_QENABLE(peer_tep);
5700                 } else if (IS_COTSORD(peer_tep) &&
5701                     peer_tep->te_state == TS_WREQ_ORDREL) {
5702                         /*
5703                          * Sent an ordrel_ind. We send a discon with
5704                          * with error 0 to inform that the peer is gone.
5705                          */
5706                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5707                             SL_TRACE|SL_ERROR,
5708                             "tl_co_unconnect: discon in state %d",
5709                             tep->te_state));
5710                         tl_discon_ind(peer_tep, 0);
5711                 } else {
5712                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5713                             SL_TRACE|SL_ERROR,
5714                             "tl_co_unconnect: state %d", tep->te_state));
5715                         tl_discon_ind(peer_tep, ECONNRESET);
5716                 }
5717 
5718 discon_peer:
5719                 /*
5720                  * Disconnect cross-pointers only for close
5721                  */
5722                 if (tep->te_closing) {
5723                         peer_tep = tep->te_conp;
5724                         TL_REMOVE_PEER(peer_tep->te_conp);
5725                         TL_REMOVE_PEER(tep->te_conp);
5726                 }
5727         }
5728 }
5729 
5730 /*
5731  * Note: The following routine does not recover from allocb()
5732  * failures
5733  * The reason should be from the <sys/errno.h> space.
5734  */
5735 static void
5736 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5737 {
5738         mblk_t *d_mp;
5739 
5740         if (tep->te_closing)
5741                 return;
5742 
5743         /*
5744          * flush the queues.
5745          */
5746         flushq(tep->te_rq, FLUSHDATA);
5747         (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5748 
5749         /*
5750          * send discon ind
5751          */
5752         d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5753         if (! d_mp) {
5754                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5755                     "tl_discon_ind:allocb failure"));
5756                 return;
5757         }
5758         tep->te_state = TS_IDLE;
5759         putnext(tep->te_rq, d_mp);
5760 }
5761 
5762 /*
5763  * Note: The following routine does not recover from allocb()
5764  * failures
5765  * The reason should be from the <sys/errno.h> space.
5766  */
5767 static mblk_t *
5768 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5769 {
5770         mblk_t *mp;
5771         struct T_discon_ind *tdi;
5772 
5773         if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5774                 DB_TYPE(mp) = M_PROTO;
5775                 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5776                 tdi = (struct T_discon_ind *)mp->b_rptr;
5777                 tdi->PRIM_type = T_DISCON_IND;
5778                 tdi->DISCON_reason = reason;
5779                 tdi->SEQ_number = seqnum;
5780         }
5781         return (mp);
5782 }
5783 
5784 
5785 /*
5786  * Note: The following routine does not recover from allocb()
5787  * failures
5788  */
5789 static mblk_t *
5790 tl_ordrel_ind_alloc(void)
5791 {
5792         mblk_t *mp;
5793         struct T_ordrel_ind *toi;
5794 
5795         if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5796                 DB_TYPE(mp) = M_PROTO;
5797                 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5798                 toi = (struct T_ordrel_ind *)mp->b_rptr;
5799                 toi->PRIM_type = T_ORDREL_IND;
5800         }
5801         return (mp);
5802 }
5803 
5804 
5805 /*
5806  * Lookup the seqno in the list of queued connections.
5807  */
5808 static tl_icon_t *
5809 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5810 {
5811         list_t *l = &tep->te_iconp;
5812         tl_icon_t *tip = list_head(l);
5813 
5814         ASSERT(seqno != 0);
5815 
5816         for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5817                 ;
5818 
5819         return (tip);
5820 }
5821 
5822 /*
5823  * Queue data for a given T_CONN_IND while verifying that redundant
5824  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5825  * Used when the originator of the connection closes.
5826  */
5827 static void
5828 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5829 {
5830         tl_icon_t               *tip;
5831         mblk_t                  **mpp, *mp;
5832         int                     prim, nprim;
5833 
5834         if (nmp->b_datap->db_type == M_PROTO)
5835                 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5836         else
5837                 nprim = -1;     /* M_DATA */
5838 
5839         tip = tl_icon_find(tep, seqno);
5840         if (tip == NULL) {
5841                 freemsg(nmp);
5842                 return;
5843         }
5844 
5845         ASSERT(tip->ti_seqno != 0);
5846         mpp = &tip->ti_mp;
5847         while (*mpp != NULL) {
5848                 mp = *mpp;
5849 
5850                 if (mp->b_datap->db_type == M_PROTO)
5851                         prim = ((union T_primitives *)mp->b_rptr)->type;
5852                 else
5853                         prim = -1;      /* M_DATA */
5854 
5855                 /*
5856                  * Allow nothing after a T_DISCON_IND
5857                  */
5858                 if (prim == T_DISCON_IND) {
5859                         freemsg(nmp);
5860                         return;
5861                 }
5862                 /*
5863                  * Only allow a T_DISCON_IND after an T_ORDREL_IND
5864                  */
5865                 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5866                         freemsg(nmp);
5867                         return;
5868                 }
5869                 mpp = &(mp->b_next);
5870         }
5871         *mpp = nmp;
5872 }
5873 
5874 /*
5875  * Verify if a certain TPI primitive exists on the connind queue.
5876  * Use prim -1 for M_DATA.
5877  * Return non-zero if found.
5878  */
5879 static boolean_t
5880 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5881 {
5882         tl_icon_t *tip = tl_icon_find(tep, seqno);
5883         boolean_t found = B_FALSE;
5884 
5885         if (tip != NULL) {
5886                 mblk_t *mp;
5887                 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5888                         found = (DB_TYPE(mp) == M_PROTO &&
5889                             ((union T_primitives *)mp->b_rptr)->type == prim);
5890                 }
5891         }
5892         return (found);
5893 }
5894 
5895 /*
5896  * Send the b_next mblk chain that has accumulated before the connection
5897  * was accepted. Perform the necessary state transitions.
5898  */
5899 static void
5900 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5901 {
5902         mblk_t                  *mp;
5903         union T_primitives      *primp;
5904 
5905         if (tep->te_closing) {
5906                 tl_icon_freemsgs(mpp);
5907                 return;
5908         }
5909 
5910         ASSERT(tep->te_state == TS_DATA_XFER);
5911         ASSERT(tep->te_rq->q_first == NULL);
5912 
5913         while ((mp = *mpp) != NULL) {
5914                 *mpp = mp->b_next;
5915                 mp->b_next = NULL;
5916 
5917                 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5918                 switch (DB_TYPE(mp)) {
5919                 default:
5920                         freemsg(mp);
5921                         break;
5922                 case M_DATA:
5923                         putnext(tep->te_rq, mp);
5924                         break;
5925                 case M_PROTO:
5926                         primp = (union T_primitives *)mp->b_rptr;
5927                         switch (primp->type) {
5928                         case T_UNITDATA_IND:
5929                         case T_DATA_IND:
5930                         case T_OPTDATA_IND:
5931                         case T_EXDATA_IND:
5932                                 putnext(tep->te_rq, mp);
5933                                 break;
5934                         case T_ORDREL_IND:
5935                                 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5936                                     tep->te_state);
5937                                 putnext(tep->te_rq, mp);
5938                                 break;
5939                         case T_DISCON_IND:
5940                                 tep->te_state = TS_IDLE;
5941                                 putnext(tep->te_rq, mp);
5942                                 break;
5943                         default:
5944 #ifdef DEBUG
5945                                 cmn_err(CE_PANIC,
5946                                     "tl_icon_sendmsgs: unknown primitive");
5947 #endif /* DEBUG */
5948                                 freemsg(mp);
5949                                 break;
5950                         }
5951                         break;
5952                 }
5953         }
5954 }
5955 
5956 /*
5957  * Free the b_next mblk chain that has accumulated before the connection
5958  * was accepted.
5959  */
5960 static void
5961 tl_icon_freemsgs(mblk_t **mpp)
5962 {
5963         mblk_t *mp;
5964 
5965         while ((mp = *mpp) != NULL) {
5966                 *mpp = mp->b_next;
5967                 mp->b_next = NULL;
5968                 freemsg(mp);
5969         }
5970 }
5971 
5972 /*
5973  * Send M_ERROR
5974  * Note: assumes caller ensured enough space in mp or enough
5975  *      memory available. Does not attempt recovery from allocb()
5976  *      failures
5977  */
5978 
5979 static void
5980 tl_merror(queue_t *wq, mblk_t *mp, int error)
5981 {
5982         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5983 
5984         if (tep->te_closing) {
5985                 freemsg(mp);
5986                 return;
5987         }
5988 
5989         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5990             SL_TRACE|SL_ERROR,
5991             "tl_merror: tep=%p, err=%d", (void *)tep, error));
5992 
5993         /*
5994          * flush all messages on queue. we are shutting
5995          * the stream down on fatal error
5996          */
5997         flushq(wq, FLUSHALL);
5998         if (IS_COTS(tep)) {
5999                 /* connection oriented - unconnect endpoints */
6000                 tl_co_unconnect(tep);
6001         }
6002         if (mp->b_cont) {
6003                 freemsg(mp->b_cont);
6004                 mp->b_cont = NULL;
6005         }
6006 
6007         if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6008                 freemsg(mp);
6009                 mp = allocb(1, BPRI_HI);
6010                 if (!mp) {
6011                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6012                             SL_TRACE|SL_ERROR,
6013                             "tl_merror:M_PROTO: out of memory"));
6014                         return;
6015                 }
6016         }
6017         if (mp) {
6018                 DB_TYPE(mp) = M_ERROR;
6019                 mp->b_rptr = DB_BASE(mp);
6020                 *mp->b_rptr = (char)error;
6021                 mp->b_wptr = mp->b_rptr + sizeof (char);
6022                 qreply(wq, mp);
6023         } else {
6024                 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6025         }
6026 }
6027 
6028 static void
6029 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6030 {
6031         ASSERT(cr != NULL);
6032 
6033         if (flag & TL_SETCRED) {
6034                 struct opthdr *opt = (struct opthdr *)buf;
6035                 tl_credopt_t *tlcred;
6036 
6037                 opt->level = TL_PROT_LEVEL;
6038                 opt->name = TL_OPT_PEER_CRED;
6039                 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6040 
6041                 tlcred = (tl_credopt_t *)(opt + 1);
6042                 tlcred->tc_uid = crgetuid(cr);
6043                 tlcred->tc_gid = crgetgid(cr);
6044                 tlcred->tc_ruid = crgetruid(cr);
6045                 tlcred->tc_rgid = crgetrgid(cr);
6046                 tlcred->tc_suid = crgetsuid(cr);
6047                 tlcred->tc_sgid = crgetsgid(cr);
6048                 tlcred->tc_ngroups = crgetngroups(cr);
6049         } else if (flag & TL_SETUCRED) {
6050                 struct opthdr *opt = (struct opthdr *)buf;
6051 
6052                 opt->level = TL_PROT_LEVEL;
6053                 opt->name = TL_OPT_PEER_UCRED;
6054                 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6055 
6056                 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6057         } else {
6058                 struct T_opthdr *topt = (struct T_opthdr *)buf;
6059                 ASSERT(flag & TL_SOCKUCRED);
6060 
6061                 topt->level = SOL_SOCKET;
6062                 topt->name = SCM_UCRED;
6063                 topt->len = ucredminsize(cr) + sizeof (*topt);
6064                 topt->status = 0;
6065                 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6066         }
6067 }
6068 
6069 /* ARGSUSED */
6070 static int
6071 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6072 {
6073         /* no default value processed in protocol specific code currently */
6074         return (-1);
6075 }
6076 
6077 /* ARGSUSED */
6078 static int
6079 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6080 {
6081         int len;
6082         tl_endpt_t *tep;
6083         int *valp;
6084 
6085         tep = (tl_endpt_t *)wq->q_ptr;
6086 
6087         len = 0;
6088 
6089         /*
6090          * Assumes: option level and name sanity check done elsewhere
6091          */
6092 
6093         switch (level) {
6094         case SOL_SOCKET:
6095                 if (! IS_SOCKET(tep))
6096                         break;
6097                 switch (name) {
6098                 case SO_RECVUCRED:
6099                         len = sizeof (int);
6100                         valp = (int *)ptr;
6101                         *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6102                         break;
6103                 default:
6104                         break;
6105                 }
6106                 break;
6107         case TL_PROT_LEVEL:
6108                 switch (name) {
6109                 case TL_OPT_PEER_CRED:
6110                 case TL_OPT_PEER_UCRED:
6111                         /*
6112                          * option not supposed to retrieved directly
6113                          * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6114                          * when some internal flags set by other options
6115                          * Direct retrieval always designed to fail(ignored)
6116                          * for this option.
6117                          */
6118                         break;
6119                 }
6120         }
6121         return (len);
6122 }
6123 
6124 /* ARGSUSED */
6125 static int
6126 tl_set_opt(
6127         queue_t         *wq,
6128         uint_t          mgmt_flags,
6129         int             level,
6130         int             name,
6131         uint_t          inlen,
6132         uchar_t         *invalp,
6133         uint_t          *outlenp,
6134         uchar_t         *outvalp,
6135         void            *thisdg_attrs,
6136         cred_t          *cr)
6137 {
6138         int error;
6139         tl_endpt_t *tep;
6140 
6141         tep = (tl_endpt_t *)wq->q_ptr;
6142 
6143         error = 0;              /* NOERROR */
6144 
6145         /*
6146          * Assumes: option level and name sanity checks done elsewhere
6147          */
6148 
6149         switch (level) {
6150         case SOL_SOCKET:
6151                 if (! IS_SOCKET(tep)) {
6152                         error = EINVAL;
6153                         break;
6154                 }
6155                 /*
6156                  * TBD: fill in other AF_UNIX socket options and then stop
6157                  * returning error.
6158                  */
6159                 switch (name) {
6160                 case SO_RECVUCRED:
6161                         /*
6162                          * We only support this for datagram sockets;
6163                          * getpeerucred handles the connection oriented
6164                          * transports.
6165                          */
6166                         if (! IS_CLTS(tep)) {
6167                                 error = EINVAL;
6168                                 break;
6169                         }
6170                         if (*(int *)invalp == 0)
6171                                 tep->te_flag &= ~TL_SOCKUCRED;
6172                         else
6173                                 tep->te_flag |= TL_SOCKUCRED;
6174                         break;
6175                 default:
6176                         error = EINVAL;
6177                         break;
6178                 }
6179                 break;
6180         case TL_PROT_LEVEL:
6181                 switch (name) {
6182                 case TL_OPT_PEER_CRED:
6183                 case TL_OPT_PEER_UCRED:
6184                         /*
6185                          * option not supposed to be set directly
6186                          * Its value in initialized for each endpoint at
6187                          * driver open time.
6188                          * Direct setting always designed to fail for this
6189                          * option.
6190                          */
6191                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6192                             SL_TRACE|SL_ERROR,
6193                             "tl_set_opt: option is not supported"));
6194                         error = EPROTO;
6195                         break;
6196                 }
6197         }
6198         return (error);
6199 }
6200 
6201 
6202 static void
6203 tl_timer(void *arg)
6204 {
6205         queue_t *wq = arg;
6206         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6207 
6208         ASSERT(tep);
6209 
6210         tep->te_timoutid = 0;
6211 
6212         enableok(wq);
6213         /*
6214          * Note: can call wsrv directly here and save context switch
6215          * Consider change when qtimeout (not timeout) is active
6216          */
6217         qenable(wq);
6218 }
6219 
6220 static void
6221 tl_buffer(void *arg)
6222 {
6223         queue_t *wq = arg;
6224         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6225 
6226         ASSERT(tep);
6227 
6228         tep->te_bufcid = 0;
6229         tep->te_nowsrv = B_FALSE;
6230 
6231         enableok(wq);
6232         /*
6233          *  Note: can call wsrv directly here and save context switch
6234          * Consider change when qbufcall (not bufcall) is active
6235          */
6236         qenable(wq);
6237 }
6238 
6239 static void
6240 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6241 {
6242         tl_endpt_t *tep;
6243 
6244         tep = (tl_endpt_t *)wq->q_ptr;
6245 
6246         if (tep->te_closing) {
6247                 freemsg(mp);
6248                 return;
6249         }
6250         noenable(wq);
6251 
6252         (void) insq(wq, wq->q_first, mp);
6253 
6254         if (tep->te_bufcid || tep->te_timoutid) {
6255                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6256                     "tl_memrecover:recover %p pending", (void *)wq));
6257                 return;
6258         }
6259 
6260         if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6261                 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6262                     drv_usectohz(TL_BUFWAIT));
6263         }
6264 }
6265 
6266 static void
6267 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6268 {
6269         ASSERT(tip->ti_seqno != 0);
6270 
6271         if (tip->ti_mp != NULL) {
6272                 tl_icon_freemsgs(&tip->ti_mp);
6273                 tip->ti_mp = NULL;
6274         }
6275         if (tip->ti_tep != NULL) {
6276                 tl_refrele(tip->ti_tep);
6277                 tip->ti_tep = NULL;
6278         }
6279         list_remove(&tep->te_iconp, tip);
6280         kmem_free(tip, sizeof (tl_icon_t));
6281         tep->te_nicon--;
6282 }
6283 
6284 /*
6285  * Remove address from address hash.
6286  */
6287 static void
6288 tl_addr_unbind(tl_endpt_t *tep)
6289 {
6290         tl_endpt_t *elp;
6291 
6292         if (tep->te_flag & TL_ADDRHASHED) {
6293                 if (IS_SOCKET(tep)) {
6294                         (void) mod_hash_remove(tep->te_addrhash,
6295                             (mod_hash_key_t)tep->te_vp,
6296                             (mod_hash_val_t *)&elp);
6297                         tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6298                         tep->te_magic = SOU_MAGIC_IMPLICIT;
6299                 } else {
6300                         (void) mod_hash_remove(tep->te_addrhash,
6301                             (mod_hash_key_t)&tep->te_ap,
6302                             (mod_hash_val_t *)&elp);
6303                         (void) kmem_free(tep->te_abuf, tep->te_alen);
6304                         tep->te_alen = -1;
6305                         tep->te_abuf = NULL;
6306                 }
6307                 tep->te_flag &= ~TL_ADDRHASHED;
6308         }
6309 }