1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  27  * Copyright (c) 2012 by Delphix. All rights reserved.
  28  * Copyright (c) 2018, Joyent, Inc.
  29  */
  30 
  31 /*
  32  * Multithreaded STREAMS Local Transport Provider.
  33  *
  34  * OVERVIEW
  35  * ========
  36  *
  37  * This driver provides TLI as well as socket semantics.  It provides
  38  * connectionless, connection oriented, and connection oriented with orderly
  39  * release transports for TLI and sockets. Each transport type has separate name
  40  * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
  41  * this removes any name space conflicts when binding to socket style transport
  42  * addresses.
  43  *
  44  * NOTE: There is one exception: Socket ticots and ticotsord transports share
  45  * the same namespace. In fact, sockets always use ticotsord type transport.
  46  *
  47  * The driver mode is specified during open() by the minor number used for
  48  * open.
  49  *
  50  *  The sockets in addition have the following semantic differences:
  51  *  No support for passing up credentials (TL_SET[U]CRED).
  52  *
  53  *      Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
  54  *      from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
  55  *      T_OPTDATA_IND.
  56  *
  57  *      The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
  58  *      a T_CONN_RES is received from the acceptor. This means that a socket
  59  *      connect will complete before the peer has called accept.
  60  *
  61  *
  62  * MULTITHREADING
  63  * ==============
  64  *
  65  * The driver does not use STREAMS protection mechanisms. Instead it uses a
  66  * generic "serializer" abstraction. Most of the operations are executed behind
  67  * the serializer and are, essentially single-threaded. All functions executed
  68  * behind the same serializer are strictly serialized. So if one thread calls
  69  * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
  70  * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
  71  * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
  72  * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
  73  * same time.
  74  *
  75  * Connectionless transport use a single serializer per transport type (one for
  76  * TLI and one for sockets. Connection-oriented transports use finer-grained
  77  * serializers.
  78  *
  79  * All COTS-type endpoints start their life with private serializers. During
  80  * connection request processing the endpoint serializer is switched to the
  81  * listener's serializer and the rest of T_CONN_REQ processing is done on the
  82  * listener serializer. During T_CONN_RES processing the eager serializer is
  83  * switched from listener to acceptor serializer and after that point all
  84  * processing for eager and acceptor happens on this serializer. To avoid races
  85  * with endpoint closes while its serializer may be changing closes are blocked
  86  * while serializers are manipulated.
  87  *
  88  * References accounting
  89  * ---------------------
  90  *
  91  * Endpoints are reference counted and freed when the last reference is
  92  * dropped. Functions within the serializer may access an endpoint state even
  93  * after an endpoint closed. The te_closing being set on the endpoint indicates
  94  * that the endpoint entered its close routine.
  95  *
  96  * One reference is held for each opened endpoint instance. The reference
  97  * counter is incremented when the endpoint is linked to another endpoint and
  98  * decremented when the link disappears. It is also incremented when the
  99  * endpoint is found by the hash table lookup. This increment is atomic with the
 100  * lookup itself and happens while the hash table read lock is held.
 101  *
 102  * Close synchronization
 103  * ---------------------
 104  *
 105  * During close the endpoint as marked as closing using te_closing flag. It is
 106  * usually enough to check for te_closing flag since all other state changes
 107  * happen after this flag is set and the close entered serializer. Immediately
 108  * after setting te_closing flag tl_close() enters serializer and waits until
 109  * the callback finishes. This allows all functions called within serializer to
 110  * simply check te_closing without any locks.
 111  *
 112  * Serializer management.
 113  * ---------------------
 114  *
 115  * For COTS transports serializers are created when the endpoint is constructed
 116  * and destroyed when the endpoint is destructed. CLTS transports use global
 117  * serializers - one for sockets and one for TLI.
 118  *
 119  * COTS serializers have separate reference counts to deal with several
 120  * endpoints sharing the same serializer. There is a subtle problem related to
 121  * the serializer destruction. The serializer should never be destroyed by any
 122  * function executed inside serializer. This means that close has to wait till
 123  * all serializer activity for this endpoint is finished before it can drop the
 124  * last reference on the endpoint (which may as well free the serializer).  This
 125  * is only relevant for COTS transports which manage serializers
 126  * dynamically. For CLTS transports close may complete without waiting for all
 127  * serializer activity to finish since serializer is only destroyed at driver
 128  * detach time.
 129  *
 130  * COTS endpoints keep track of the number of outstanding requests on the
 131  * serializer for the endpoint. The code handling accept() avoids changing
 132  * client serializer if it has any pending messages on the serializer and
 133  * instead moves acceptor to listener's serializer.
 134  *
 135  *
 136  * Use of hash tables
 137  * ------------------
 138  *
 139  * The driver uses modhash hash table implementation. Each transport uses two
 140  * hash tables - one for finding endpoints by acceptor ID and another one for
 141  * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
 142  * pair of hash tables since sockets only use TICOTSORD.
 143  *
 144  * All hash tables lookups increment a reference count for returned endpoints,
 145  * so we may safely check the endpoint state even when the endpoint is removed
 146  * from the hash by another thread immediately after it is found.
 147  *
 148  *
 149  * CLOSE processing
 150  * ================
 151  *
 152  * The driver enters serializer twice on close(). The close sequence is the
 153  * following:
 154  *
 155  * 1) Wait until closing is safe (te_closewait becomes zero)
 156  *      This step is needed to prevent close during serializer switches. In most
 157  *      cases (close happening after connection establishment) te_closewait is
 158  *      zero.
 159  * 1) Set te_closing.
 160  * 2) Call tl_close_ser() within serializer and wait for it to complete.
 161  *
 162  *      te_close_ser simply marks endpoint and wakes up waiting tl_close().
 163  *      It also needs to clear write-side q_next pointers - this should be done
 164  *      before qprocsoff().
 165  *
 166  *    This synchronous serializer entry during close is needed to ensure that
 167  *    the queue is valid everywhere inside the serializer.
 168  *
 169  *    Note that in many cases close will execute tl_close_ser() synchronously,
 170  *    so it will not wait at all.
 171  *
 172  * 3) Calls qprocsoff().
 173  * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
 174  *      complete (for COTS transports). For CLTS transport there is no wait.
 175  *
 176  *      tl_close_finish_ser() Finishes the close process and wakes up waiting
 177  *      close if there is any.
 178  *
 179  *    Note that in most cases close will enter te_close_ser_finish()
 180  *    synchronously and will not wait at all.
 181  *
 182  *
 183  * Flow Control
 184  * ============
 185  *
 186  * The driver implements both read and write side service routines. No one calls
 187  * putq() on the read queue. The read side service routine tl_rsrv() is called
 188  * when the read side stream is back-enabled. It enters serializer synchronously
 189  * (waits till serializer processing is complete). Within serializer it
 190  * back-enables all endpoints blocked by the queue for connection-less
 191  * transports and enables write side service processing for the peer for
 192  * connection-oriented transports.
 193  *
 194  * Read and write side service routines use special mblk_sized space in the
 195  * endpoint structure to enter perimeter.
 196  *
 197  * Write-side flow control
 198  * -----------------------
 199  *
 200  * Write side flow control is a bit tricky. The driver needs to deal with two
 201  * message queues - the explicit STREAMS message queue maintained by
 202  * putq()/getq()/putbq() and the implicit queue within the serializer. These two
 203  * queues should be synchronized to preserve message ordering and should
 204  * maintain a single order determined by the order in which messages enter
 205  * tl_wput(). In order to maintain the ordering between these two queues the
 206  * STREAMS queue is only manipulated within the serializer, so the ordering is
 207  * provided by the serializer.
 208  *
 209  * Functions called from the tl_wsrv() sometimes may call putbq(). To
 210  * immediately stop any further processing of the STREAMS message queues the
 211  * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
 212  * side service processing stops when the flag is set.
 213  *
 214  * The tl_wsrv() function enters serializer synchronously and waits for it to
 215  * complete. The serializer call-back tl_wsrv_ser() either drains all messages
 216  * on the STREAMS queue or terminates when it notices the te_nowsrv flag
 217  * set. Note that the maximum amount of messages processed by tl_wput_ser() is
 218  * always bounded by the amount of messages on the STREAMS queue at the time
 219  * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
 220  * queue from another serialized entry which can't happen in parallel. This
 221  * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
 222  * of it draining forever while writer places new messages on the STREAMS
 223  * queue).
 224  *
 225  * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
 226  *
 227  *
 228  * Unix Domain Sockets
 229  * ===================
 230  *
 231  * The driver knows the structure of Unix Domain sockets addresses and treats
 232  * them differently from generic TLI addresses. For sockets implicit binds are
 233  * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
 234  * instead of using address length of zero. Explicit binds specify
 235  * SOU_MAGIC_EXPLICIT as magic.
 236  *
 237  * For implicit binds we always use minor number as soua_vp part of the address
 238  * and avoid any hash table lookups. This saves two hash tables lookups per
 239  * anonymous bind.
 240  *
 241  * For explicit address we hash the vnode pointer instead of hashing the
 242  * full-scale address+zone+length. Hashing by pointer is more efficient then
 243  * hashing by the full address.
 244  *
 245  * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
 246  * tep structure, so it should be never freed.
 247  *
 248  * Also for sockets the driver always uses minor number as acceptor id.
 249  *
 250  * TPI VIOLATIONS
 251  * --------------
 252  *
 253  * This driver violates TPI in several respects for Unix Domain Sockets:
 254  *
 255  * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
 256  *      is requested and the endpoint is already in use. There is no point in
 257  *      generating an unused address since this address will be rejected by
 258  *      sockfs anyway. For implicit binds it always generates a new address
 259  *      (sets soua_vp to its minor number).
 260  *
 261  * 2) It always uses minor number as acceptor ID and never uses queue
 262  *      pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
 263  *      message and they do not use the queue pointer.
 264  *
 265  * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
 266  *      followed by listen(). The listen() should be issued with non-zero
 267  *      backlog, so sotpi_listen() issues unbind request followed by bind
 268  *      request to the same address but with a non-zero qlen value. Both
 269  *      tl_bind() and tl_unbind() require write lock on the hash table to
 270  *      insert/remove the address. The driver does not remove the address from
 271  *      the hash for endpoints that are bound to the explicit address and have
 272  *      backlog of zero. During T_BIND_REQ processing if the address requested
 273  *      is equal to the address the endpoint already has it updates the backlog
 274  *      without reinserting the address in the hash table. This optimization
 275  *      avoids two hash table updates for each listener created. It always
 276  *      avoids the problem of a "stolen" address when another listener may use
 277  *      the same address between the unbind and bind and suddenly listen() fails
 278  *      because address is in use even though the bind() succeeded.
 279  *
 280  *
 281  * CONNECTIONLESS TRANSPORTS
 282  * =========================
 283  *
 284  * Connectionless transports all share the same serializer (one for TLI and one
 285  * for Sockets). Functions executing behind serializer can check or modify state
 286  * of any endpoint.
 287  *
 288  * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
 289  * te_lastep field. The next time X talks to some address A it checks whether A
 290  * is the same as Y's address and if it is there is no need to lookup Y. If the
 291  * address is different or the state of Y is not appropriate (e.g. closed or not
 292  * idle) X does a lookup using tl_find_peer() and caches the new address.
 293  * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
 294  * on the endpoint found.
 295  *
 296  * During close of endpoint Y it doesn't try to remove itself from other
 297  * endpoints caches. They will detect that Y is gone and will search the peer
 298  * endpoint again.
 299  *
 300  * Flow Control Handling.
 301  * ----------------------
 302  *
 303  * Each connectionless endpoint keeps a list of endpoints which are
 304  * flow-controlled by its queue. It also keeps a pointer to the queue which
 305  * flow-controls itself.  Whenever flow control releases for endpoint X it
 306  * enables all queues from the list. During close it also back-enables everyone
 307  * in the list. If X is flow-controlled when it is closing it removes it from
 308  * the peers list.
 309  *
 310  * DATA STRUCTURES
 311  * ===============
 312  *
 313  * Each endpoint is represented by the tl_endpt_t structure which keeps all the
 314  * endpoint state. For connection-oriented transports it has a keeps a list
 315  * of pending connections (tl_icon_t). For connectionless transports it keeps a
 316  * list of endpoints flow controlled by this one.
 317  *
 318  * Each transport type is represented by a per-transport data structure
 319  * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
 320  * endpoint address hash tables for each transport. It also contains pointer to
 321  * transport serializer for connectionless transports.
 322  *
 323  * Each endpoint keeps a link to its transport structure, so the code can find
 324  * all per-transport information quickly.
 325  */
 326 
 327 #include        <sys/types.h>
 328 #include        <sys/inttypes.h>
 329 #include        <sys/stream.h>
 330 #include        <sys/stropts.h>
 331 #define _SUN_TPI_VERSION 2
 332 #include        <sys/tihdr.h>
 333 #include        <sys/strlog.h>
 334 #include        <sys/debug.h>
 335 #include        <sys/cred.h>
 336 #include        <sys/errno.h>
 337 #include        <sys/kmem.h>
 338 #include        <sys/id_space.h>
 339 #include        <sys/modhash.h>
 340 #include        <sys/mkdev.h>
 341 #include        <sys/tl.h>
 342 #include        <sys/stat.h>
 343 #include        <sys/conf.h>
 344 #include        <sys/modctl.h>
 345 #include        <sys/strsun.h>
 346 #include        <sys/socket.h>
 347 #include        <sys/socketvar.h>
 348 #include        <sys/sysmacros.h>
 349 #include        <sys/xti_xtiopt.h>
 350 #include        <sys/ddi.h>
 351 #include        <sys/sunddi.h>
 352 #include        <sys/zone.h>
 353 #include        <inet/common.h>   /* typedef int (*pfi_t)() for inet/optcom.h */
 354 #include        <inet/optcom.h>
 355 #include        <sys/strsubr.h>
 356 #include        <sys/ucred.h>
 357 #include        <sys/suntpi.h>
 358 #include        <sys/list.h>
 359 #include        <sys/serializer.h>
 360 
 361 /*
 362  * TBD List
 363  * 14 Eliminate state changes through table
 364  * 16. AF_UNIX socket options
 365  * 17. connect() for ticlts
 366  * 18. support for "netstat" to show AF_UNIX plus TLI local
 367  *      transport connections
 368  * 21. sanity check to flushing on sending M_ERROR
 369  */
 370 
 371 /*
 372  * CONSTANT DECLARATIONS
 373  * --------------------
 374  */
 375 
 376 /*
 377  * Local declarations
 378  */
 379 #define NEXTSTATE(EV, ST)       ti_statetbl[EV][ST]
 380 
 381 #define BADSEQNUM       (-1)    /* initial seq number used by T_DISCON_IND */
 382 #define TL_BUFWAIT      (10000) /* usecs to wait for allocb buffer timeout */
 383 #define TL_TIDUSZ (64*1024)     /* tidu size when "strmsgz" is unlimited (0) */
 384 /*
 385  * Hash tables size.
 386  */
 387 #define TL_HASH_SIZE 311
 388 
 389 /*
 390  * Definitions for module_info
 391  */
 392 #define         TL_ID           (104)           /* module ID number */
 393 #define         TL_NAME         "tl"            /* module name */
 394 #define         TL_MINPSZ       (0)             /* min packet size */
 395 #define         TL_MAXPSZ       INFPSZ          /* max packet size ZZZ */
 396 #define         TL_HIWAT        (16*1024)       /* hi water mark */
 397 #define         TL_LOWAT        (256)           /* lo water mark */
 398 /*
 399  * Definition of minor numbers/modes for new transport provider modes.
 400  * We view the socket use as a separate mode to get a separate name space.
 401  */
 402 #define         TL_TICOTS       0       /* connection oriented transport */
 403 #define         TL_TICOTSORD    1       /* COTS w/ orderly release */
 404 #define         TL_TICLTS       2       /* connectionless transport */
 405 #define         TL_UNUSED       3
 406 #define         TL_SOCKET       4       /* Socket */
 407 #define         TL_SOCK_COTS    (TL_SOCKET | TL_TICOTS)
 408 #define         TL_SOCK_COTSORD (TL_SOCKET | TL_TICOTSORD)
 409 #define         TL_SOCK_CLTS    (TL_SOCKET | TL_TICLTS)
 410 
 411 #define         TL_MINOR_MASK   0x7
 412 #define         TL_MINOR_START  (TL_TICLTS + 1)
 413 
 414 /*
 415  * LOCAL MACROS
 416  */
 417 #define T_ALIGN(p)      P2ROUNDUP((p), sizeof (t_scalar_t))
 418 
 419 /*
 420  * EXTERNAL VARIABLE DECLARATIONS
 421  * -----------------------------
 422  */
 423 /*
 424  * state table defined in the OS space.c
 425  */
 426 extern  char    ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
 427 
 428 /*
 429  * STREAMS DRIVER ENTRY POINTS PROTOTYPES
 430  */
 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
 432 static int tl_close(queue_t *, int, cred_t *);
 433 static int tl_wput(queue_t *, mblk_t *);
 434 static int tl_wsrv(queue_t *);
 435 static int tl_rsrv(queue_t *);
 436 
 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 440 
 441 
 442 /*
 443  * GLOBAL DATA STRUCTURES AND VARIABLES
 444  * -----------------------------------
 445  */
 446 
 447 /*
 448  * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
 449  * For now, we only manage the SO_RECVUCRED option but we also have
 450  * harmless dummy options to make things work with some common code we access.
 451  */
 452 opdes_t tl_opt_arr[] = {
 453         /* The SO_TYPE is needed for the hack below */
 454         {
 455                 SO_TYPE,
 456                 SOL_SOCKET,
 457                 OA_R,
 458                 OA_R,
 459                 OP_NP,
 460                 0,
 461                 sizeof (t_scalar_t),
 462                 0
 463         },
 464         {
 465                 SO_RECVUCRED,
 466                 SOL_SOCKET,
 467                 OA_RW,
 468                 OA_RW,
 469                 OP_NP,
 470                 0,
 471                 sizeof (int),
 472                 0
 473         }
 474 };
 475 
 476 /*
 477  * Table of all supported levels
 478  * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
 479  * any supported options so we need this info separately.
 480  *
 481  * This is needed only for topmost tpi providers.
 482  */
 483 optlevel_t      tl_valid_levels_arr[] = {
 484         XTI_GENERIC,
 485         SOL_SOCKET,
 486         TL_PROT_LEVEL
 487 };
 488 
 489 #define TL_VALID_LEVELS_CNT     A_CNT(tl_valid_levels_arr)
 490 /*
 491  * Current upper bound on the amount of space needed to return all options.
 492  * Additional options with data size of sizeof(long) are handled automatically.
 493  * Others need hand job.
 494  */
 495 #define TL_MAX_OPT_BUF_LEN                                              \
 496                 ((A_CNT(tl_opt_arr) << 2) +                               \
 497                 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) +          \
 498                 + 64 + sizeof (struct T_optmgmt_ack))
 499 
 500 #define TL_OPT_ARR_CNT  A_CNT(tl_opt_arr)
 501 
 502 /*
 503  *      transport addr structure
 504  */
 505 typedef struct tl_addr {
 506         zoneid_t        ta_zoneid;              /* Zone scope of address */
 507         t_scalar_t      ta_alen;                /* length of abuf */
 508         void            *ta_abuf;               /* the addr itself */
 509 } tl_addr_t;
 510 
 511 /*
 512  * Refcounted version of serializer.
 513  */
 514 typedef struct tl_serializer {
 515         uint_t          ts_refcnt;
 516         serializer_t    *ts_serializer;
 517 } tl_serializer_t;
 518 
 519 /*
 520  * Each transport type has a separate state.
 521  * Per-transport state.
 522  */
 523 typedef struct tl_transport_state {
 524         char            *tr_name;
 525         minor_t         tr_minor;
 526         uint32_t        tr_defaddr;
 527         mod_hash_t      *tr_ai_hash;
 528         mod_hash_t      *tr_addr_hash;
 529         tl_serializer_t *tr_serializer;
 530 } tl_transport_state_t;
 531 
 532 #define TL_DFADDR 0x1000
 533 
 534 static tl_transport_state_t tl_transports[] = {
 535         { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
 536         { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
 537         { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
 538         { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
 539         { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
 540         { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
 541         { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
 542 };
 543 
 544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
 545 
 546 struct tl_endpt;
 547 typedef struct tl_endpt tl_endpt_t;
 548 
 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
 550 
 551 /*
 552  * Data structure used to represent pending connects.
 553  * Records enough information so that the connecting peer can close
 554  * before the connection gets accepted.
 555  */
 556 typedef struct tl_icon {
 557         list_node_t     ti_node;
 558         struct tl_endpt *ti_tep;        /* NULL if peer has already closed */
 559         mblk_t          *ti_mp;         /* b_next list of data + ordrel_ind */
 560         t_scalar_t      ti_seqno;       /* Sequence number */
 561 } tl_icon_t;
 562 
 563 typedef struct so_ux_addr soux_addr_t;
 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
 565 
 566 /*
 567  * Maximum number of unaccepted connection indications allowed per listener.
 568  */
 569 #define TL_MAXQLEN      4096
 570 int tl_maxqlen = TL_MAXQLEN;
 571 
 572 /*
 573  *      transport endpoint structure
 574  */
 575 struct tl_endpt {
 576         queue_t         *te_rq;         /* stream read queue */
 577         queue_t         *te_wq;         /* stream write queue */
 578         uint32_t        te_refcnt;
 579         int32_t         te_state;       /* TPI state of endpoint */
 580         minor_t         te_minor;       /* minor number */
 581 #define te_seqno        te_minor
 582         uint_t          te_flag;        /* flag field */
 583         boolean_t       te_nowsrv;
 584         tl_serializer_t *te_ser;        /* Serializer to use */
 585 #define te_serializer   te_ser->ts_serializer
 586 
 587         soux_addr_t     te_uxaddr;      /* Socket address */
 588 #define te_magic        te_uxaddr.soua_magic
 589 #define te_vp           te_uxaddr.soua_vp
 590         tl_addr_t       te_ap;          /* addr bound to this endpt */
 591 #define te_zoneid te_ap.ta_zoneid
 592 #define te_alen te_ap.ta_alen
 593 #define te_abuf te_ap.ta_abuf
 594 
 595         tl_transport_state_t *te_transport;
 596 #define te_addrhash     te_transport->tr_addr_hash
 597 #define te_aihash       te_transport->tr_ai_hash
 598 #define te_defaddr      te_transport->tr_defaddr
 599         cred_t          *te_credp;      /* endpoint user credentials */
 600         mod_hash_hndl_t te_hash_hndl;   /* Handle for address hash */
 601 
 602         /*
 603          * State specific for connection-oriented and connectionless transports.
 604          */
 605         union {
 606                 /* Connection-oriented state. */
 607                 struct {
 608                         t_uscalar_t _te_nicon;  /* count of conn requests */
 609                         t_uscalar_t _te_qlen;   /* max conn requests */
 610                         tl_endpt_t  *_te_oconp; /* conn request pending */
 611                         tl_endpt_t  *_te_conp;  /* connected endpt */
 612 #ifndef _ILP32
 613                         void        *_te_pad;
 614 #endif
 615                         list_t  _te_iconp;      /* list of conn ind. pending */
 616                 } _te_cots_state;
 617                 /* Connection-less state. */
 618                 struct {
 619                         tl_endpt_t *_te_lastep; /* last dest. endpoint */
 620                         tl_endpt_t *_te_flowq;  /* flow controlled on whom */
 621                         list_node_t _te_flows;  /* lists of connections */
 622                         list_t  _te_flowlist;   /* Who flowcontrols on me */
 623                 } _te_clts_state;
 624         } _te_transport_state;
 625 #define te_nicon        _te_transport_state._te_cots_state._te_nicon
 626 #define te_qlen         _te_transport_state._te_cots_state._te_qlen
 627 #define te_oconp        _te_transport_state._te_cots_state._te_oconp
 628 #define te_conp         _te_transport_state._te_cots_state._te_conp
 629 #define te_iconp        _te_transport_state._te_cots_state._te_iconp
 630 #define te_lastep       _te_transport_state._te_clts_state._te_lastep
 631 #define te_flowq        _te_transport_state._te_clts_state._te_flowq
 632 #define te_flowlist     _te_transport_state._te_clts_state._te_flowlist
 633 #define te_flows        _te_transport_state._te_clts_state._te_flows
 634 
 635         bufcall_id_t    te_bufcid;      /* outstanding bufcall id */
 636         timeout_id_t    te_timoutid;    /* outstanding timeout id */
 637         pid_t           te_cpid;        /* cached pid of endpoint */
 638         t_uscalar_t     te_acceptor_id; /* acceptor id for T_CONN_RES */
 639         /*
 640          * Pieces of the endpoint state needed for closing.
 641          */
 642         kmutex_t        te_closelock;
 643         kcondvar_t      te_closecv;
 644         uint8_t         te_closing;     /* The endpoint started closing */
 645         uint8_t         te_closewait;   /* Wait in close until zero */
 646         mblk_t          te_closemp;     /* for entering serializer on close */
 647         mblk_t          te_rsrvmp;      /* for entering serializer on rsrv */
 648         mblk_t          te_wsrvmp;      /* for entering serializer on wsrv */
 649         kmutex_t        te_srv_lock;
 650         kcondvar_t      te_srv_cv;
 651         uint8_t         te_rsrv_active; /* Running in tl_rsrv() */
 652         uint8_t         te_wsrv_active; /* Running in tl_wsrv() */
 653         /*
 654          * Pieces of the endpoint state needed for serializer transitions.
 655          */
 656         kmutex_t        te_ser_lock;    /* Protects the count below */
 657         uint_t          te_ser_count;   /* Number of messages on serializer */
 658 };
 659 
 660 /*
 661  * Flag values. Lower 4 bits specify that transport used.
 662  * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
 663  * they allow to identify the endpoint more easily.
 664  */
 665 #define TL_LISTENER     0x00010 /* the listener endpoint */
 666 #define TL_ACCEPTOR     0x00020 /* the accepting endpoint */
 667 #define TL_EAGER        0x00040 /* connecting endpoint */
 668 #define TL_ACCEPTED     0x00080 /* accepted connection */
 669 #define TL_SETCRED      0x00100 /* flag to indicate sending of credentials */
 670 #define TL_SETUCRED     0x00200 /* flag to indicate sending of ucred */
 671 #define TL_SOCKUCRED    0x00400 /* flag to indicate sending of SCM_UCRED */
 672 #define TL_ADDRHASHED   0x01000 /* Endpoint address is stored in te_addrhash */
 673 #define TL_CLOSE_SER    0x10000 /* Endpoint close has entered the serializer */
 674 /*
 675  * Boolean checks for the endpoint type.
 676  */
 677 #define         IS_CLTS(x)      (((x)->te_flag & TL_TICLTS) != 0)
 678 #define         IS_COTS(x)      (((x)->te_flag & TL_TICLTS) == 0)
 679 #define         IS_COTSORD(x)   (((x)->te_flag & TL_TICOTSORD) != 0)
 680 #define         IS_SOCKET(x)    (((x)->te_flag & TL_SOCKET) != 0)
 681 
 682 /*
 683  * Certain operations are always used together. These macros reduce the chance
 684  * of missing a part of a combination.
 685  */
 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
 688 
 689 #define TL_PUTBQ(x, mp) {               \
 690         ASSERT(!((x)->te_flag & TL_CLOSE_SER));  \
 691         (x)->te_nowsrv = B_TRUE;     \
 692         (void) putbq((x)->te_wq, mp);        \
 693 }
 694 
 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
 697 
 698 /*
 699  * STREAMS driver glue data structures.
 700  */
 701 static  struct  module_info     tl_minfo = {
 702         TL_ID,                  /* mi_idnum */
 703         TL_NAME,                /* mi_idname */
 704         TL_MINPSZ,              /* mi_minpsz */
 705         TL_MAXPSZ,              /* mi_maxpsz */
 706         TL_HIWAT,               /* mi_hiwat */
 707         TL_LOWAT                /* mi_lowat */
 708 };
 709 
 710 static  struct  qinit   tl_rinit = {
 711         NULL,                   /* qi_putp */
 712         tl_rsrv,                /* qi_srvp */
 713         tl_open,                /* qi_qopen */
 714         tl_close,               /* qi_qclose */
 715         NULL,                   /* qi_qadmin */
 716         &tl_minfo,          /* qi_minfo */
 717         NULL                    /* qi_mstat */
 718 };
 719 
 720 static  struct  qinit   tl_winit = {
 721         tl_wput,                /* qi_putp */
 722         tl_wsrv,                /* qi_srvp */
 723         NULL,                   /* qi_qopen */
 724         NULL,                   /* qi_qclose */
 725         NULL,                   /* qi_qadmin */
 726         &tl_minfo,          /* qi_minfo */
 727         NULL                    /* qi_mstat */
 728 };
 729 
 730 static  struct streamtab        tlinfo = {
 731         &tl_rinit,          /* st_rdinit */
 732         &tl_winit,          /* st_wrinit */
 733         NULL,                   /* st_muxrinit */
 734         NULL                    /* st_muxwrinit */
 735 };
 736 
 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
 738     nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
 739 
 740 static struct modldrv modldrv = {
 741         &mod_driverops,             /* Type of module -- pseudo driver here */
 742         "TPI Local Transport (tl)",
 743         &tl_devops,         /* driver ops */
 744 };
 745 
 746 /*
 747  * Module linkage information for the kernel.
 748  */
 749 static struct modlinkage modlinkage = {
 750         MODREV_1,
 751         &modldrv,
 752         NULL
 753 };
 754 
 755 /*
 756  * Templates for response to info request
 757  * Check sanity of unlimited connect data etc.
 758  */
 759 
 760 #define         TL_CLTS_PROVIDER_FLAG   (XPG4_1 | SENDZERO)
 761 #define         TL_COTS_PROVIDER_FLAG   (XPG4_1 | SENDZERO)
 762 
 763 static struct T_info_ack tl_cots_info_ack =
 764         {
 765                 T_INFO_ACK,     /* PRIM_type -always T_INFO_ACK */
 766                 T_INFINITE,     /* TSDU size */
 767                 T_INFINITE,     /* ETSDU size */
 768                 T_INFINITE,     /* CDATA_size */
 769                 T_INFINITE,     /* DDATA_size */
 770                 T_INFINITE,     /* ADDR_size  */
 771                 T_INFINITE,     /* OPT_size */
 772                 0,              /* TIDU_size - fill at run time */
 773                 T_COTS,         /* SERV_type */
 774                 -1,             /* CURRENT_state */
 775                 TL_COTS_PROVIDER_FLAG   /* PROVIDER_flag */
 776         };
 777 
 778 static struct T_info_ack tl_clts_info_ack =
 779         {
 780                 T_INFO_ACK,     /* PRIM_type - always T_INFO_ACK */
 781                 0,              /* TSDU_size - fill at run time */
 782                 -2,             /* ETSDU_size -2 => not supported */
 783                 -2,             /* CDATA_size -2 => not supported */
 784                 -2,             /* DDATA_size  -2 => not supported */
 785                 -1,             /* ADDR_size -1 => infinite */
 786                 -1,             /* OPT_size */
 787                 0,              /* TIDU_size - fill at run time */
 788                 T_CLTS,         /* SERV_type */
 789                 -1,             /* CURRENT_state */
 790                 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
 791         };
 792 
 793 /*
 794  * private copy of devinfo pointer used in tl_info
 795  */
 796 static dev_info_t *tl_dip;
 797 
 798 /*
 799  * Endpoints cache.
 800  */
 801 static kmem_cache_t *tl_cache;
 802 /*
 803  * Minor number space.
 804  */
 805 static id_space_t *tl_minors;
 806 
 807 /*
 808  * Default Data Unit size.
 809  */
 810 static t_scalar_t tl_tidusz;
 811 
 812 /*
 813  * Size of hash tables.
 814  */
 815 static size_t tl_hash_size = TL_HASH_SIZE;
 816 
 817 /*
 818  * Debug and test variable ONLY. Turn off T_CONN_IND queueing
 819  * for sockets.
 820  */
 821 static int tl_disable_early_connect = 0;
 822 static int tl_client_closing_when_accepting;
 823 
 824 static int tl_serializer_noswitch;
 825 
 826 /*
 827  * LOCAL FUNCTION PROTOTYPES
 828  * -------------------------
 829  */
 830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
 831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
 832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
 833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
 834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
 835     t_scalar_t);
 836 static void tl_bind(mblk_t *, tl_endpt_t *);
 837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
 838 static void tl_ok_ack(queue_t *, mblk_t  *mp, t_scalar_t);
 839 static void tl_unbind(mblk_t *, tl_endpt_t *);
 840 static void tl_optmgmt(queue_t *, mblk_t *);
 841 static void tl_conn_req(queue_t *, mblk_t *);
 842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
 843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
 844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
 845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
 846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
 847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
 848 static void tl_info_req(mblk_t *, tl_endpt_t *);
 849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
 850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
 851 static void tl_data(mblk_t  *, tl_endpt_t *);
 852 static void tl_exdata(mblk_t *, tl_endpt_t *);
 853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
 854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
 855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
 856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
 857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
 858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
 859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
 860 static void tl_cl_backenable(tl_endpt_t *);
 861 static void tl_co_unconnect(tl_endpt_t *);
 862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
 863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
 864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
 865 static mblk_t *tl_ordrel_ind_alloc(void);
 866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
 867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
 868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
 869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
 870 static void tl_icon_freemsgs(mblk_t **);
 871 static void tl_merror(queue_t *, mblk_t *, int);
 872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
 873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
 874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
 875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
 876     uchar_t *, void *, cred_t *);
 877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
 878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
 879 static void tl_free(tl_endpt_t *);
 880 static int  tl_constructor(void *, void *, int);
 881 static void tl_destructor(void *, void *);
 882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
 883 static tl_serializer_t *tl_serializer_alloc(int);
 884 static void tl_serializer_refhold(tl_serializer_t *);
 885 static void tl_serializer_refrele(tl_serializer_t *);
 886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
 887 static void tl_serializer_exit(tl_endpt_t *);
 888 static boolean_t tl_noclose(tl_endpt_t *);
 889 static void tl_closeok(tl_endpt_t *);
 890 static void tl_refhold(tl_endpt_t *);
 891 static void tl_refrele(tl_endpt_t *);
 892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
 893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
 894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
 895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
 896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
 897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
 898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
 899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
 900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
 901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
 902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
 903 static void tl_addr_unbind(tl_endpt_t *);
 904 
 905 /*
 906  * Intialize option database object for TL
 907  */
 908 
 909 optdb_obj_t tl_opt_obj = {
 910         tl_default_opt,         /* TL default value function pointer */
 911         tl_get_opt,             /* TL get function pointer */
 912         tl_set_opt,             /* TL set function pointer */
 913         TL_OPT_ARR_CNT,         /* TL option database count of entries */
 914         tl_opt_arr,             /* TL option database */
 915         TL_VALID_LEVELS_CNT,    /* TL valid level count of entries */
 916         tl_valid_levels_arr     /* TL valid level array */
 917 };
 918 
 919 /*
 920  * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
 921  * ---------------------------------------
 922  */
 923 
 924 /*
 925  * Loadable module routines
 926  */
 927 int
 928 _init(void)
 929 {
 930         return (mod_install(&modlinkage));
 931 }
 932 
 933 int
 934 _fini(void)
 935 {
 936         return (mod_remove(&modlinkage));
 937 }
 938 
 939 int
 940 _info(struct modinfo *modinfop)
 941 {
 942         return (mod_info(&modlinkage, modinfop));
 943 }
 944 
 945 /*
 946  * Driver Entry Points and Other routines
 947  */
 948 static int
 949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
 950 {
 951         int i;
 952         char name[32];
 953 
 954         /*
 955          * Resume from a checkpoint state.
 956          */
 957         if (cmd == DDI_RESUME)
 958                 return (DDI_SUCCESS);
 959 
 960         if (cmd != DDI_ATTACH)
 961                 return (DDI_FAILURE);
 962 
 963         /*
 964          * Deduce TIDU size to use.  Note: "strmsgsz" being 0 has semantics that
 965          * streams message sizes can be unlimited. We use a defined constant
 966          * instead.
 967          */
 968         tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
 969 
 970         /*
 971          * Create subdevices for each transport.
 972          */
 973         for (i = 0; i < TL_UNUSED; i++) {
 974                 if (ddi_create_minor_node(devi,
 975                     tl_transports[i].tr_name,
 976                     S_IFCHR, tl_transports[i].tr_minor,
 977                     DDI_PSEUDO, 0) == DDI_FAILURE) {
 978                         ddi_remove_minor_node(devi, NULL);
 979                         return (DDI_FAILURE);
 980                 }
 981         }
 982 
 983         tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
 984             0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
 985 
 986         if (tl_cache == NULL) {
 987                 ddi_remove_minor_node(devi, NULL);
 988                 return (DDI_FAILURE);
 989         }
 990 
 991         tl_minors = id_space_create("tl_minor_space",
 992             TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
 993 
 994         /*
 995          * Create ID space for minor numbers
 996          */
 997         for (i = 0; i < TL_MAXTRANSPORT; i++) {
 998                 tl_transport_state_t *t = &tl_transports[i];
 999 
1000                 if (i == TL_UNUSED)
1001                         continue;
1002 
1003                 /* Socket COTSORD shares namespace with COTS */
1004                 if (i == TL_SOCK_COTSORD) {
1005                         t->tr_ai_hash =
1006                             tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007                         ASSERT(t->tr_ai_hash != NULL);
1008                         t->tr_addr_hash =
1009                             tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010                         ASSERT(t->tr_addr_hash != NULL);
1011                         continue;
1012                 }
1013 
1014                 /*
1015                  * Create hash tables.
1016                  */
1017                 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018                     t->tr_name);
1019 #ifdef _ILP32
1020                 if (i & TL_SOCKET)
1021                         t->tr_ai_hash =
1022                             mod_hash_create_idhash(name, tl_hash_size - 1,
1023                             mod_hash_null_valdtor);
1024                 else
1025                         t->tr_ai_hash =
1026                             mod_hash_create_ptrhash(name, tl_hash_size,
1027                             mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029                 t->tr_ai_hash =
1030                     mod_hash_create_idhash(name, tl_hash_size - 1,
1031                     mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1033 
1034                 if (i & TL_SOCKET) {
1035                         (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036                             t->tr_name);
1037                         t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038                             tl_hash_size, mod_hash_null_valdtor,
1039                             sizeof (uintptr_t));
1040                 } else {
1041                         (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042                             t->tr_name);
1043                         t->tr_addr_hash = mod_hash_create_extended(name,
1044                             tl_hash_size, mod_hash_null_keydtor,
1045                             mod_hash_null_valdtor,
1046                             tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047                 }
1048 
1049                 /* Create serializer for connectionless transports. */
1050                 if (i & TL_TICLTS)
1051                         t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052         }
1053 
1054         tl_dip = devi;
1055 
1056         return (DDI_SUCCESS);
1057 }
1058 
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 {
1062         int i;
1063 
1064         if (cmd == DDI_SUSPEND)
1065                 return (DDI_SUCCESS);
1066 
1067         if (cmd != DDI_DETACH)
1068                 return (DDI_FAILURE);
1069 
1070         /*
1071          * Destroy arenas and hash tables.
1072          */
1073         for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074                 tl_transport_state_t *t = &tl_transports[i];
1075 
1076                 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077                         continue;
1078 
1079                 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080                 if (t->tr_serializer != NULL) {
1081                         tl_serializer_refrele(t->tr_serializer);
1082                         t->tr_serializer = NULL;
1083                 }
1084 
1085 #ifdef _ILP32
1086                 if (i & TL_SOCKET)
1087                         mod_hash_destroy_idhash(t->tr_ai_hash);
1088                 else
1089                         mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091                 mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093                 t->tr_ai_hash = NULL;
1094                 if (i & TL_SOCKET)
1095                         mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096                 else
1097                         mod_hash_destroy_hash(t->tr_addr_hash);
1098                 t->tr_addr_hash = NULL;
1099         }
1100 
1101         kmem_cache_destroy(tl_cache);
1102         tl_cache = NULL;
1103         id_space_destroy(tl_minors);
1104         tl_minors = NULL;
1105         ddi_remove_minor_node(devi, NULL);
1106         return (DDI_SUCCESS);
1107 }
1108 
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 {
1113 
1114         int retcode = DDI_FAILURE;
1115 
1116         switch (infocmd) {
1117 
1118         case DDI_INFO_DEVT2DEVINFO:
1119                 if (tl_dip != NULL) {
1120                         *result = (void *)tl_dip;
1121                         retcode = DDI_SUCCESS;
1122                 }
1123                 break;
1124 
1125         case DDI_INFO_DEVT2INSTANCE:
1126                 *result = NULL;
1127                 retcode = DDI_SUCCESS;
1128                 break;
1129 
1130         default:
1131                 break;
1132         }
1133         return (retcode);
1134 }
1135 
1136 /*
1137  * Endpoint reference management.
1138  */
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1141 {
1142         atomic_inc_32(&tep->te_refcnt);
1143 }
1144 
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1147 {
1148         ASSERT(tep->te_refcnt != 0);
1149 
1150         if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151                 tl_free(tep);
1152 }
1153 
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 {
1158         tl_endpt_t *tep = buf;
1159 
1160         bzero(tep, sizeof (tl_endpt_t));
1161         mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162         cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163         mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164         cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165         mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166 
1167         return (0);
1168 }
1169 
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1173 {
1174         tl_endpt_t *tep = buf;
1175 
1176         mutex_destroy(&tep->te_closelock);
1177         cv_destroy(&tep->te_closecv);
1178         mutex_destroy(&tep->te_srv_lock);
1179         cv_destroy(&tep->te_srv_cv);
1180         mutex_destroy(&tep->te_ser_lock);
1181 }
1182 
1183 static void
1184 tl_free(tl_endpt_t *tep)
1185 {
1186         ASSERT(tep->te_refcnt == 0);
1187         ASSERT(tep->te_transport != NULL);
1188         ASSERT(tep->te_rq == NULL);
1189         ASSERT(tep->te_wq == NULL);
1190         ASSERT(tep->te_ser != NULL);
1191         ASSERT(tep->te_ser_count == 0);
1192         ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1193 
1194         if (IS_SOCKET(tep)) {
1195                 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196                 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197                 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198                 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199         } else if (tep->te_abuf != NULL) {
1200                 kmem_free(tep->te_abuf, tep->te_alen);
1201                 tep->te_alen = -1; /* uninitialized */
1202                 tep->te_abuf = NULL;
1203         } else {
1204                 ASSERT(tep->te_alen == -1);
1205         }
1206 
1207         id_free(tl_minors, tep->te_minor);
1208         ASSERT(tep->te_credp == NULL);
1209 
1210         if (tep->te_hash_hndl != NULL)
1211                 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212 
1213         if (IS_COTS(tep)) {
1214                 TL_REMOVE_PEER(tep->te_conp);
1215                 TL_REMOVE_PEER(tep->te_oconp);
1216                 tl_serializer_refrele(tep->te_ser);
1217                 tep->te_ser = NULL;
1218                 ASSERT(tep->te_nicon == 0);
1219                 ASSERT(list_head(&tep->te_iconp) == NULL);
1220         } else {
1221                 ASSERT(tep->te_lastep == NULL);
1222                 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223                 ASSERT(tep->te_flowq == NULL);
1224         }
1225 
1226         ASSERT(tep->te_bufcid == 0);
1227         ASSERT(tep->te_timoutid == 0);
1228         bzero(&tep->te_ap, sizeof (tep->te_ap));
1229         tep->te_acceptor_id = 0;
1230 
1231         ASSERT(tep->te_closewait == 0);
1232         ASSERT(!tep->te_rsrv_active);
1233         ASSERT(!tep->te_wsrv_active);
1234         tep->te_closing = 0;
1235         tep->te_nowsrv = B_FALSE;
1236         tep->te_flag = 0;
1237 
1238         kmem_cache_free(tl_cache, tep);
1239 }
1240 
1241 /*
1242  * Allocate/free reference-counted wrappers for serializers.
1243  */
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1246 {
1247         tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248         serializer_t *ser;
1249 
1250         if (s == NULL)
1251                 return (NULL);
1252 
1253         ser = serializer_create(flags);
1254 
1255         if (ser == NULL) {
1256                 kmem_free(s, sizeof (tl_serializer_t));
1257                 return (NULL);
1258         }
1259 
1260         s->ts_refcnt = 1;
1261         s->ts_serializer = ser;
1262         return (s);
1263 }
1264 
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1267 {
1268         atomic_inc_32(&s->ts_refcnt);
1269 }
1270 
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1273 {
1274         if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275                 serializer_destroy(s->ts_serializer);
1276                 kmem_free(s, sizeof (tl_serializer_t));
1277         }
1278 }
1279 
1280 /*
1281  * Post a request on the endpoint serializer. For COTS transports keep track of
1282  * the number of pending requests.
1283  */
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 {
1287         if (IS_COTS(tep)) {
1288                 mutex_enter(&tep->te_ser_lock);
1289                 tep->te_ser_count++;
1290                 mutex_exit(&tep->te_ser_lock);
1291         }
1292         serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 }
1294 
1295 /*
1296  * Complete processing the request on the serializer. Decrement the counter for
1297  * pending requests for COTS transports.
1298  */
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1301 {
1302         if (IS_COTS(tep)) {
1303                 mutex_enter(&tep->te_ser_lock);
1304                 ASSERT(tep->te_ser_count != 0);
1305                 tep->te_ser_count--;
1306                 mutex_exit(&tep->te_ser_lock);
1307         }
1308 }
1309 
1310 /*
1311  * Hash management functions.
1312  */
1313 
1314 /*
1315  * Return TRUE if two addresses are equal, false otherwise.
1316  */
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 {
1320         return ((ap1->ta_alen > 0) &&
1321             (ap1->ta_alen == ap2->ta_alen) &&
1322             (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323             (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 }
1325 
1326 /*
1327  * This function is called whenever an endpoint is found in the hash table.
1328  */
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 {
1333         tl_refhold((tl_endpt_t *)val);
1334 }
1335 
1336 /*
1337  * Address hash function.
1338  */
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 {
1343         tl_addr_t *ap = (tl_addr_t *)key;
1344         size_t  len = ap->ta_alen;
1345         uchar_t *p = ap->ta_abuf;
1346         uint_t i, g;
1347 
1348         ASSERT((len > 0) && (p != NULL));
1349 
1350         for (i = ap->ta_zoneid; len -- != 0; p++) {
1351                 i = (i << 4) + (*p);
1352                 if ((g = (i & 0xf0000000U)) != 0) {
1353                         i ^= (g >> 24);
1354                         i ^= g;
1355                 }
1356         }
1357         return (i);
1358 }
1359 
1360 /*
1361  * This function is used by hash lookups. It compares two generic addresses.
1362  */
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 {
1366 #ifdef  DEBUG
1367         tl_addr_t *ap1 = (tl_addr_t *)key1;
1368         tl_addr_t *ap2 = (tl_addr_t *)key2;
1369 
1370         ASSERT(key1 != NULL);
1371         ASSERT(key2 != NULL);
1372 
1373         ASSERT(ap1->ta_abuf != NULL);
1374         ASSERT(ap2->ta_abuf != NULL);
1375         ASSERT(ap1->ta_alen > 0);
1376         ASSERT(ap2->ta_alen > 0);
1377 #endif
1378 
1379         return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 }
1381 
1382 /*
1383  * Prevent endpoint from closing if possible.
1384  * Return B_TRUE on success, B_FALSE on failure.
1385  */
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1388 {
1389         boolean_t rc = B_FALSE;
1390 
1391         mutex_enter(&tep->te_closelock);
1392         if (!tep->te_closing) {
1393                 ASSERT(tep->te_closewait == 0);
1394                 tep->te_closewait++;
1395                 rc = B_TRUE;
1396         }
1397         mutex_exit(&tep->te_closelock);
1398         return (rc);
1399 }
1400 
1401 /*
1402  * Allow endpoint to close if needed.
1403  */
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1406 {
1407         ASSERT(tep->te_closewait > 0);
1408         mutex_enter(&tep->te_closelock);
1409         ASSERT(tep->te_closewait == 1);
1410         tep->te_closewait--;
1411         cv_signal(&tep->te_closecv);
1412         mutex_exit(&tep->te_closelock);
1413 }
1414 
1415 /*
1416  * STREAMS open entry point.
1417  */
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t  *credp)
1421 {
1422         tl_endpt_t *tep;
1423         minor_t     minor = getminor(*devp);
1424 
1425         /*
1426          * Driver is called directly. Both CLONEOPEN and MODOPEN
1427          * are illegal
1428          */
1429         if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430                 return (ENXIO);
1431 
1432         if (rq->q_ptr != NULL)
1433                 return (0);
1434 
1435         /* Minor number should specify the mode used for the driver. */
1436         if ((minor >= TL_UNUSED))
1437                 return (ENXIO);
1438 
1439         if (oflag & SO_SOCKSTR) {
1440                 minor |= TL_SOCKET;
1441         }
1442 
1443         tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444         tep->te_refcnt = 1;
1445         tep->te_cpid = curproc->p_pid;
1446         rq->q_ptr = WR(rq)->q_ptr = tep;
1447         tep->te_state = TS_UNBND;
1448         tep->te_credp = credp;
1449         crhold(credp);
1450         tep->te_zoneid = getzoneid();
1451 
1452         tep->te_flag = minor & TL_MINOR_MASK;
1453         tep->te_transport = &tl_transports[minor];
1454 
1455         /* Allocate a unique minor number for this instance. */
1456         tep->te_minor = (minor_t)id_alloc(tl_minors);
1457 
1458         /* Reserve hash handle for bind(). */
1459         (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460 
1461         /* Transport-specific initialization */
1462         if (IS_COTS(tep)) {
1463                 /* Use private serializer */
1464                 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465 
1466                 /* Create list for pending connections */
1467                 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468                     offsetof(tl_icon_t, ti_node));
1469                 tep->te_qlen = 0;
1470                 tep->te_nicon = 0;
1471                 tep->te_oconp = NULL;
1472                 tep->te_conp = NULL;
1473         } else {
1474                 /* Use shared serializer */
1475                 tep->te_ser = tep->te_transport->tr_serializer;
1476                 bzero(&tep->te_flows, sizeof (list_node_t));
1477                 /* Create list for flow control */
1478                 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479                     offsetof(tl_endpt_t, te_flows));
1480                 tep->te_flowq = NULL;
1481                 tep->te_lastep = NULL;
1482 
1483         }
1484 
1485         /* Initialize endpoint address */
1486         if (IS_SOCKET(tep)) {
1487                 /* Socket-specific address handling. */
1488                 tep->te_alen = TL_SOUX_ADDRLEN;
1489                 tep->te_abuf = &tep->te_uxaddr;
1490                 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491                 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492         } else {
1493                 tep->te_alen = -1;
1494                 tep->te_abuf = NULL;
1495         }
1496 
1497         /* clone the driver */
1498         *devp = makedevice(getmajor(*devp), tep->te_minor);
1499 
1500         tep->te_rq = rq;
1501         tep->te_wq = WR(rq);
1502 
1503 #ifdef  _ILP32
1504         if (IS_SOCKET(tep))
1505                 tep->te_acceptor_id = tep->te_minor;
1506         else
1507                 tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509         tep->te_acceptor_id = tep->te_minor;
1510 #endif  /* _ILP32 */
1511 
1512 
1513         qprocson(rq);
1514 
1515         /*
1516          * Insert acceptor ID in the hash. The AI hash always sleeps on
1517          * insertion so insertion can't fail.
1518          */
1519         (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521             (mod_hash_val_t)tep);
1522 
1523         return (0);
1524 }
1525 
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag, cred_t *credp)
1529 {
1530         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531         tl_endpt_t *elp = NULL;
1532         queue_t *wq = tep->te_wq;
1533         int rc;
1534 
1535         ASSERT(wq == WR(rq));
1536 
1537         /*
1538          * Remove the endpoint from acceptor hash.
1539          */
1540         rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541             (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542             (mod_hash_val_t *)&elp);
1543         ASSERT(rc == 0 && tep == elp);
1544         if ((rc != 0) || (tep != elp)) {
1545                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1546                     SL_TRACE | SL_ERROR,
1547                     "tl_close:inconsistency in AI hash"));
1548         }
1549 
1550         /*
1551          * Wait till close is safe, then mark endpoint as closing.
1552          */
1553         mutex_enter(&tep->te_closelock);
1554         while (tep->te_closewait)
1555                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556         tep->te_closing = B_TRUE;
1557         /*
1558          * Will wait for the serializer part of the close to finish, so set
1559          * te_closewait now.
1560          */
1561         tep->te_closewait = 1;
1562         tep->te_nowsrv = B_FALSE;
1563         mutex_exit(&tep->te_closelock);
1564 
1565         /*
1566          * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567          * It is safe because close will wait for tl_close_ser to finish.
1568          */
1569         tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570 
1571         /*
1572          * Wait for the first phase of close to complete before qprocsoff().
1573          */
1574         mutex_enter(&tep->te_closelock);
1575         while (tep->te_closewait)
1576                 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577         mutex_exit(&tep->te_closelock);
1578 
1579         qprocsoff(rq);
1580 
1581         if (tep->te_bufcid) {
1582                 qunbufcall(rq, tep->te_bufcid);
1583                 tep->te_bufcid = 0;
1584         }
1585         if (tep->te_timoutid) {
1586                 (void) quntimeout(rq, tep->te_timoutid);
1587                 tep->te_timoutid = 0;
1588         }
1589 
1590         /*
1591          * Finish close behind serializer.
1592          *
1593          * For a CLTS endpoint increase a refcount and continue close processing
1594          * with serializer protection. This processing may happen asynchronously
1595          * with the completion of tl_close().
1596          *
1597          * Fot a COTS endpoint wait before destroying tep since the serializer
1598          * may go away together with tep and we need to destroy serializer
1599          * outside of serializer context.
1600          */
1601         ASSERT(tep->te_closewait == 0);
1602         if (IS_COTS(tep))
1603                 tep->te_closewait = 1;
1604         else
1605                 tl_refhold(tep);
1606 
1607         tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608 
1609         /*
1610          * For connection-oriented transports wait for all serializer activity
1611          * to settle down.
1612          */
1613         if (IS_COTS(tep)) {
1614                 mutex_enter(&tep->te_closelock);
1615                 while (tep->te_closewait)
1616                         cv_wait(&tep->te_closecv, &tep->te_closelock);
1617                 mutex_exit(&tep->te_closelock);
1618         }
1619 
1620         crfree(tep->te_credp);
1621         tep->te_credp = NULL;
1622         tep->te_wq = NULL;
1623         tl_refrele(tep);
1624         /*
1625          * tep is likely to be destroyed now, so can't reference it any more.
1626          */
1627 
1628         rq->q_ptr = wq->q_ptr = NULL;
1629         return (0);
1630 }
1631 
1632 /*
1633  * First phase of close processing done behind the serializer.
1634  *
1635  * Do not drop the reference in the end - tl_close() wants this reference to
1636  * stay.
1637  */
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 {
1642         ASSERT(tep->te_closing);
1643         ASSERT(tep->te_closewait == 1);
1644         ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645 
1646         tep->te_flag |= TL_CLOSE_SER;
1647 
1648         /*
1649          * Drain out all messages on queue except for TL_TICOTS where the
1650          * abortive release semantics permit discarding of data on close
1651          */
1652         if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653                 tl_wsrv_ser(NULL, tep);
1654         }
1655 
1656         /* Remove address from hash table. */
1657         tl_addr_unbind(tep);
1658         /*
1659          * qprocsoff() gets confused when q->q_next is not NULL on the write
1660          * queue of the driver, so clear these before qprocsoff() is called.
1661          * Also clear q_next for the peer since this queue is going away.
1662          */
1663         if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664                 tl_endpt_t *peer_tep = tep->te_conp;
1665 
1666                 tep->te_wq->q_next = NULL;
1667                 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668                         peer_tep->te_wq->q_next = NULL;
1669         }
1670 
1671         tep->te_rq = NULL;
1672 
1673         /* wake up tl_close() */
1674         tl_closeok(tep);
1675         tl_serializer_exit(tep);
1676 }
1677 
1678 /*
1679  * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680  * the reference for CLTS.
1681  *
1682  * Called from serializer. Should drop reference count for CLTS only.
1683  */
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 {
1688         ASSERT(tep->te_closing);
1689         IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690         IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691 
1692         tep->te_state = -1;  /* Uninitialized */
1693         if (IS_COTS(tep)) {
1694                 tl_co_unconnect(tep);
1695         } else {
1696                 /* Connectionless specific cleanup */
1697                 TL_REMOVE_PEER(tep->te_lastep);
1698                 /*
1699                  * Backenable anybody that is flow controlled waiting for
1700                  * this endpoint.
1701                  */
1702                 tl_cl_backenable(tep);
1703                 if (tep->te_flowq != NULL) {
1704                         list_remove(&(tep->te_flowq->te_flowlist), tep);
1705                         tep->te_flowq = NULL;
1706                 }
1707         }
1708 
1709         tl_serializer_exit(tep);
1710         if (IS_COTS(tep))
1711                 tl_closeok(tep);
1712         else
1713                 tl_refrele(tep);
1714 }
1715 
1716 /*
1717  * STREAMS write-side put procedure.
1718  * Enter serializer for most of the processing.
1719  *
1720  * The T_CONN_REQ is processed outside of serializer.
1721  */
1722 static int
1723 tl_wput(queue_t *wq, mblk_t *mp)
1724 {
1725         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
1726         ssize_t                 msz = MBLKL(mp);
1727         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
1728         tlproc_t                *tl_proc = NULL;
1729 
1730         switch (DB_TYPE(mp)) {
1731         case M_DATA:
1732                 /* Only valid for connection-oriented transports */
1733                 if (IS_CLTS(tep)) {
1734                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1735                             SL_TRACE | SL_ERROR,
1736                             "tl_wput:M_DATA invalid for ticlts driver"));
1737                         tl_merror(wq, mp, EPROTO);
1738                         return (0);
1739                 }
1740                 tl_proc = tl_wput_data_ser;
1741                 break;
1742 
1743         case M_IOCTL:
1744                 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745                 case TL_IOC_CREDOPT:
1746                         /* FALLTHROUGH */
1747                 case TL_IOC_UCREDOPT:
1748                         /*
1749                          * Serialize endpoint state change.
1750                          */
1751                         tl_proc = tl_do_ioctl_ser;
1752                         break;
1753 
1754                 default:
1755                         miocnak(wq, mp, 0, EINVAL);
1756                         return (0);
1757                 }
1758                 break;
1759 
1760         case M_FLUSH:
1761                 /*
1762                  * do canonical M_FLUSH processing
1763                  */
1764                 if (*mp->b_rptr & FLUSHW) {
1765                         flushq(wq, FLUSHALL);
1766                         *mp->b_rptr &= ~FLUSHW;
1767                 }
1768                 if (*mp->b_rptr & FLUSHR) {
1769                         flushq(RD(wq), FLUSHALL);
1770                         qreply(wq, mp);
1771                 } else {
1772                         freemsg(mp);
1773                 }
1774                 return (0);
1775 
1776         case M_PROTO:
1777                 if (msz < sizeof (prim->type)) {
1778                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1779                             SL_TRACE | SL_ERROR,
1780                             "tl_wput:M_PROTO data too short"));
1781                         tl_merror(wq, mp, EPROTO);
1782                         return (0);
1783                 }
1784                 switch (prim->type) {
1785                 case T_OPTMGMT_REQ:
1786                 case T_SVR4_OPTMGMT_REQ:
1787                         /*
1788                          * Process TPI option management requests immediately
1789                          * in put procedure regardless of in-order processing
1790                          * of already queued messages.
1791                          * (Note: This driver supports AF_UNIX socket
1792                          * implementation.  Unless we implement this processing,
1793                          * setsockopt() on socket endpoint will block on flow
1794                          * controlled endpoints which it should not. That is
1795                          * required for successful execution of VSU socket tests
1796                          * and is consistent with BSD socket behavior).
1797                          */
1798                         tl_optmgmt(wq, mp);
1799                         return (0);
1800                 case O_T_BIND_REQ:
1801                 case T_BIND_REQ:
1802                         tl_proc = tl_bind_ser;
1803                         break;
1804                 case T_CONN_REQ:
1805                         if (IS_CLTS(tep)) {
1806                                 tl_merror(wq, mp, EPROTO);
1807                                 return (0);
1808                         }
1809                         tl_conn_req(wq, mp);
1810                         return (0);
1811                 case T_DATA_REQ:
1812                 case T_OPTDATA_REQ:
1813                 case T_EXDATA_REQ:
1814                 case T_ORDREL_REQ:
1815                         tl_proc = tl_putq_ser;
1816                         break;
1817                 case T_UNITDATA_REQ:
1818                         if (IS_COTS(tep) ||
1819                             (msz < sizeof (struct T_unitdata_req))) {
1820                                 tl_merror(wq, mp, EPROTO);
1821                                 return (0);
1822                         }
1823                         if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824                                 tl_proc = tl_unitdata_ser;
1825                         } else {
1826                                 tl_proc = tl_putq_ser;
1827                         }
1828                         break;
1829                 default:
1830                         /*
1831                          * process in service procedure if message already
1832                          * queued (maintain in-order processing)
1833                          */
1834                         if (wq->q_first != NULL) {
1835                                 tl_proc = tl_putq_ser;
1836                         } else {
1837                                 tl_proc = tl_wput_ser;
1838                         }
1839                         break;
1840                 }
1841                 break;
1842 
1843         case M_PCPROTO:
1844                 /*
1845                  * Check that the message has enough data to figure out TPI
1846                  * primitive.
1847                  */
1848                 if (msz < sizeof (prim->type)) {
1849                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1850                             SL_TRACE | SL_ERROR,
1851                             "tl_wput:M_PCROTO data too short"));
1852                         tl_merror(wq, mp, EPROTO);
1853                         return (0);
1854                 }
1855                 switch (prim->type) {
1856                 case T_CAPABILITY_REQ:
1857                         tl_capability_req(mp, tep);
1858                         return (0);
1859                 case T_INFO_REQ:
1860                         tl_proc = tl_info_req_ser;
1861                         break;
1862                 case T_ADDR_REQ:
1863                         tl_proc = tl_addr_req_ser;
1864                         break;
1865 
1866                 default:
1867                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
1868                             SL_TRACE | SL_ERROR,
1869                             "tl_wput:unknown TPI msg primitive"));
1870                         tl_merror(wq, mp, EPROTO);
1871                         return (0);
1872                 }
1873                 break;
1874         default:
1875                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1876                     "tl_wput:default:unexpected Streams message"));
1877                 freemsg(mp);
1878                 return (0);
1879         }
1880 
1881         /*
1882          * Continue processing via serializer.
1883          */
1884         ASSERT(tl_proc != NULL);
1885         tl_refhold(tep);
1886         tl_serializer_enter(tep, tl_proc, mp);
1887         return (0);
1888 }
1889 
1890 /*
1891  * Place message on the queue while preserving order.
1892  */
1893 static void
1894 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1895 {
1896         if (tep->te_closing) {
1897                 tl_wput_ser(mp, tep);
1898         } else {
1899                 TL_PUTQ(tep, mp);
1900                 tl_serializer_exit(tep);
1901                 tl_refrele(tep);
1902         }
1903 
1904 }
1905 
1906 static void
1907 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1908 {
1909         ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1910 
1911         switch (DB_TYPE(mp)) {
1912         case M_DATA:
1913                 tl_data(mp, tep);
1914                 break;
1915         case M_PROTO:
1916                 tl_do_proto(mp, tep);
1917                 break;
1918         default:
1919                 freemsg(mp);
1920                 break;
1921         }
1922 }
1923 
1924 /*
1925  * Write side put procedure called from serializer.
1926  */
1927 static void
1928 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1929 {
1930         tl_wput_common_ser(mp, tep);
1931         tl_serializer_exit(tep);
1932         tl_refrele(tep);
1933 }
1934 
1935 /*
1936  * M_DATA processing. Called from serializer.
1937  */
1938 static void
1939 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1940 {
1941         tl_endpt_t      *peer_tep = tep->te_conp;
1942         queue_t         *peer_rq;
1943 
1944         ASSERT(DB_TYPE(mp) == M_DATA);
1945         ASSERT(IS_COTS(tep));
1946 
1947         IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1948 
1949         /*
1950          * fastpath for data. Ignore flow control if tep is closing.
1951          */
1952         if ((peer_tep != NULL) &&
1953             !peer_tep->te_closing &&
1954             ((tep->te_state == TS_DATA_XFER) ||
1955             (tep->te_state == TS_WREQ_ORDREL)) &&
1956             (tep->te_wq != NULL) &&
1957             (tep->te_wq->q_first == NULL) &&
1958             ((peer_tep->te_state == TS_DATA_XFER) ||
1959             (peer_tep->te_state == TS_WREQ_ORDREL))  &&
1960             ((peer_rq = peer_tep->te_rq) != NULL) &&
1961             (canputnext(peer_rq) || tep->te_closing)) {
1962                 putnext(peer_rq, mp);
1963         } else if (tep->te_closing) {
1964                 /*
1965                  * It is possible that by the time we got here tep started to
1966                  * close. If the write queue is not empty, and the state is
1967                  * TS_DATA_XFER the data should be delivered in order, so we
1968                  * call putq() instead of freeing the data.
1969                  */
1970                 if ((tep->te_wq != NULL) &&
1971                     ((tep->te_state == TS_DATA_XFER) ||
1972                     (tep->te_state == TS_WREQ_ORDREL))) {
1973                         TL_PUTQ(tep, mp);
1974                 } else {
1975                         freemsg(mp);
1976                 }
1977         } else {
1978                 TL_PUTQ(tep, mp);
1979         }
1980 
1981         tl_serializer_exit(tep);
1982         tl_refrele(tep);
1983 }
1984 
1985 /*
1986  * Write side service routine.
1987  *
1988  * All actual processing happens within serializer which is entered
1989  * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1990  * messages that need processing may have arrived, so tl_wsrv repeats until
1991  * queue is empty or te_nowsrv is set.
1992  */
1993 static int
1994 tl_wsrv(queue_t *wq)
1995 {
1996         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1997 
1998         while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1999                 mutex_enter(&tep->te_srv_lock);
2000                 ASSERT(tep->te_wsrv_active == B_FALSE);
2001                 tep->te_wsrv_active = B_TRUE;
2002                 mutex_exit(&tep->te_srv_lock);
2003 
2004                 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2005 
2006                 /*
2007                  * Wait for serializer job to complete.
2008                  */
2009                 mutex_enter(&tep->te_srv_lock);
2010                 while (tep->te_wsrv_active) {
2011                         cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2012                 }
2013                 cv_signal(&tep->te_srv_cv);
2014                 mutex_exit(&tep->te_srv_lock);
2015         }
2016         return (0);
2017 }
2018 
2019 /*
2020  * Serialized write side processing of the STREAMS queue.
2021  * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2022  * is NULL.
2023  */
2024 static void
2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2026 {
2027         mblk_t *mp;
2028         queue_t *wq = tep->te_wq;
2029 
2030         ASSERT(wq != NULL);
2031         while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2032                 tl_wput_common_ser(mp, tep);
2033         }
2034 
2035         /*
2036          * Wakeup service routine unless called from close.
2037          * If ser_mp is specified, the caller is tl_wsrv().
2038          * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2039          * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2040          * be no matching tl_serializer_exit() in this case.
2041          * Also, there is no need to wakeup anyone since tl_close_ser() is not
2042          * waiting on te_srv_cv.
2043          */
2044         if (ser_mp != NULL) {
2045                 /*
2046                  * We are called from tl_wsrv.
2047                  */
2048                 mutex_enter(&tep->te_srv_lock);
2049                 ASSERT(tep->te_wsrv_active);
2050                 tep->te_wsrv_active = B_FALSE;
2051                 cv_signal(&tep->te_srv_cv);
2052                 mutex_exit(&tep->te_srv_lock);
2053                 tl_serializer_exit(tep);
2054         }
2055 }
2056 
2057 /*
2058  * Called when the stream is backenabled. Enter serializer and qenable everyone
2059  * flow controlled by tep.
2060  *
2061  * NOTE: The service routine should enter serializer synchronously. Otherwise it
2062  * is possible that two instances of tl_rsrv will be running reusing the same
2063  * rsrv mblk.
2064  */
2065 static int
2066 tl_rsrv(queue_t *rq)
2067 {
2068         tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2069 
2070         ASSERT(rq->q_first == NULL);
2071         ASSERT(tep->te_rsrv_active == 0);
2072 
2073         tep->te_rsrv_active = B_TRUE;
2074         tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2075         /*
2076          * Wait for serializer job to complete.
2077          */
2078         mutex_enter(&tep->te_srv_lock);
2079         while (tep->te_rsrv_active) {
2080                 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2081         }
2082         cv_signal(&tep->te_srv_cv);
2083         mutex_exit(&tep->te_srv_lock);
2084         return (0);
2085 }
2086 
2087 /* ARGSUSED */
2088 static void
2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2090 {
2091         tl_endpt_t *peer_tep;
2092 
2093         if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2094                 tl_cl_backenable(tep);
2095         } else if (
2096             IS_COTS(tep) &&
2097             ((peer_tep = tep->te_conp) != NULL) &&
2098             !peer_tep->te_closing &&
2099             ((tep->te_state == TS_DATA_XFER) ||
2100             (tep->te_state == TS_WIND_ORDREL)||
2101             (tep->te_state == TS_WREQ_ORDREL))) {
2102                 TL_QENABLE(peer_tep);
2103         }
2104 
2105         /*
2106          * Wakeup read side service routine.
2107          */
2108         mutex_enter(&tep->te_srv_lock);
2109         ASSERT(tep->te_rsrv_active);
2110         tep->te_rsrv_active = B_FALSE;
2111         cv_signal(&tep->te_srv_cv);
2112         mutex_exit(&tep->te_srv_lock);
2113         tl_serializer_exit(tep);
2114 }
2115 
2116 /*
2117  * process M_PROTO messages. Always called from serializer.
2118  */
2119 static void
2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2121 {
2122         ssize_t                 msz = MBLKL(mp);
2123         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
2124 
2125         /* Message size was validated by tl_wput(). */
2126         ASSERT(msz >= sizeof (prim->type));
2127 
2128         switch (prim->type) {
2129         case T_UNBIND_REQ:
2130                 tl_unbind(mp, tep);
2131                 break;
2132 
2133         case T_ADDR_REQ:
2134                 tl_addr_req(mp, tep);
2135                 break;
2136 
2137         case O_T_CONN_RES:
2138         case T_CONN_RES:
2139                 if (IS_CLTS(tep)) {
2140                         tl_merror(tep->te_wq, mp, EPROTO);
2141                         break;
2142                 }
2143                 tl_conn_res(mp, tep);
2144                 break;
2145 
2146         case T_DISCON_REQ:
2147                 if (IS_CLTS(tep)) {
2148                         tl_merror(tep->te_wq, mp, EPROTO);
2149                         break;
2150                 }
2151                 tl_discon_req(mp, tep);
2152                 break;
2153 
2154         case T_DATA_REQ:
2155                 if (IS_CLTS(tep)) {
2156                         tl_merror(tep->te_wq, mp, EPROTO);
2157                         break;
2158                 }
2159                 tl_data(mp, tep);
2160                 break;
2161 
2162         case T_OPTDATA_REQ:
2163                 if (IS_CLTS(tep)) {
2164                         tl_merror(tep->te_wq, mp, EPROTO);
2165                         break;
2166                 }
2167                 tl_data(mp, tep);
2168                 break;
2169 
2170         case T_EXDATA_REQ:
2171                 if (IS_CLTS(tep)) {
2172                         tl_merror(tep->te_wq, mp, EPROTO);
2173                         break;
2174                 }
2175                 tl_exdata(mp, tep);
2176                 break;
2177 
2178         case T_ORDREL_REQ:
2179                 if (!IS_COTSORD(tep)) {
2180                         tl_merror(tep->te_wq, mp, EPROTO);
2181                         break;
2182                 }
2183                 tl_ordrel(mp, tep);
2184                 break;
2185 
2186         case T_UNITDATA_REQ:
2187                 if (IS_COTS(tep)) {
2188                         tl_merror(tep->te_wq, mp, EPROTO);
2189                         break;
2190                 }
2191                 tl_unitdata(mp, tep);
2192                 break;
2193 
2194         default:
2195                 tl_merror(tep->te_wq, mp, EPROTO);
2196                 break;
2197         }
2198 }
2199 
2200 /*
2201  * Process ioctl from serializer.
2202  * This is a wrapper around tl_do_ioctl().
2203  */
2204 static void
2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2206 {
2207         if (!tep->te_closing)
2208                 tl_do_ioctl(mp, tep);
2209         else
2210                 freemsg(mp);
2211 
2212         tl_serializer_exit(tep);
2213         tl_refrele(tep);
2214 }
2215 
2216 static void
2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2218 {
2219         struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2220         int cmd = iocbp->ioc_cmd;
2221         queue_t *wq = tep->te_wq;
2222         int error;
2223         int thisopt, otheropt;
2224 
2225         ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2226 
2227         switch (cmd) {
2228         case TL_IOC_CREDOPT:
2229                 if (cmd == TL_IOC_CREDOPT) {
2230                         thisopt = TL_SETCRED;
2231                         otheropt = TL_SETUCRED;
2232                 } else {
2233                         /* FALLTHROUGH */
2234         case TL_IOC_UCREDOPT:
2235                         thisopt = TL_SETUCRED;
2236                         otheropt = TL_SETCRED;
2237                 }
2238                 /*
2239                  * The credentials passing does not apply to sockets.
2240                  * Only one of the cred options can be set at a given time.
2241                  */
2242                 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2243                         miocnak(wq, mp, 0, EINVAL);
2244                         return;
2245                 }
2246 
2247                 /*
2248                  * Turn on generation of credential options for
2249                  * T_conn_req, T_conn_con, T_unidata_ind.
2250                  */
2251                 error = miocpullup(mp, sizeof (uint32_t));
2252                 if (error != 0) {
2253                         miocnak(wq, mp, 0, error);
2254                         return;
2255                 }
2256                 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2257                         miocnak(wq, mp, 0, EINVAL);
2258                         return;
2259                 }
2260 
2261                 if (*(uint32_t *)mp->b_cont->b_rptr)
2262                         tep->te_flag |= thisopt;
2263                 else
2264                         tep->te_flag &= ~thisopt;
2265 
2266                 miocack(wq, mp, 0, 0);
2267                 break;
2268 
2269         default:
2270                 /* Should not be here */
2271                 miocnak(wq, mp, 0, EINVAL);
2272                 break;
2273         }
2274 }
2275 
2276 
2277 /*
2278  * send T_ERROR_ACK
2279  * Note: assumes enough memory or caller passed big enough mp
2280  *      - no recovery from allocb failures
2281  */
2282 
2283 static void
2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2285     t_scalar_t unix_err, t_scalar_t type)
2286 {
2287         struct T_error_ack *err_ack;
2288         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2289             M_PCPROTO, T_ERROR_ACK);
2290 
2291         if (ackmp == NULL) {
2292                 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2293                     "tl_error_ack:out of mblk memory"));
2294                 tl_merror(wq, NULL, ENOSR);
2295                 return;
2296         }
2297         err_ack = (struct T_error_ack *)ackmp->b_rptr;
2298         err_ack->ERROR_prim = type;
2299         err_ack->TLI_error = tli_err;
2300         err_ack->UNIX_error = unix_err;
2301 
2302         /*
2303          * send error ack message
2304          */
2305         qreply(wq, ackmp);
2306 }
2307 
2308 
2309 
2310 /*
2311  * send T_OK_ACK
2312  * Note: assumes enough memory or caller passed big enough mp
2313  *      - no recovery from allocb failures
2314  */
2315 static void
2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2317 {
2318         struct T_ok_ack *ok_ack;
2319         mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2320             M_PCPROTO, T_OK_ACK);
2321 
2322         if (ackmp == NULL) {
2323                 tl_merror(wq, NULL, ENOMEM);
2324                 return;
2325         }
2326 
2327         ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2328         ok_ack->CORRECT_prim = type;
2329 
2330         (void) qreply(wq, ackmp);
2331 }
2332 
2333 /*
2334  * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2335  * This is a wrapper around tl_bind().
2336  */
2337 static void
2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2339 {
2340         if (!tep->te_closing)
2341                 tl_bind(mp, tep);
2342         else
2343                 freemsg(mp);
2344 
2345         tl_serializer_exit(tep);
2346         tl_refrele(tep);
2347 }
2348 
2349 /*
2350  * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2351  * Assumes that the endpoint is in the unbound.
2352  */
2353 static void
2354 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2355 {
2356         queue_t                 *wq = tep->te_wq;
2357         struct T_bind_ack       *b_ack;
2358         struct T_bind_req       *bind = (struct T_bind_req *)mp->b_rptr;
2359         mblk_t                  *ackmp, *bamp;
2360         soux_addr_t             ux_addr;
2361         t_uscalar_t             qlen = 0;
2362         t_scalar_t              alen, aoff;
2363         tl_addr_t               addr_req;
2364         void                    *addr_startp;
2365         ssize_t                 msz = MBLKL(mp), basize;
2366         t_scalar_t              tli_err = 0, unix_err = 0;
2367         t_scalar_t              save_prim_type = bind->PRIM_type;
2368         t_scalar_t              save_state = tep->te_state;
2369 
2370         if (tep->te_state != TS_UNBND) {
2371                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2372                     SL_TRACE | SL_ERROR,
2373                     "tl_wput:bind_request:out of state, state=%d",
2374                     tep->te_state));
2375                 tli_err = TOUTSTATE;
2376                 goto error;
2377         }
2378 
2379         if (msz < sizeof (struct T_bind_req)) {
2380                 tli_err = TSYSERR;
2381                 unix_err = EINVAL;
2382                 goto error;
2383         }
2384 
2385         tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2386 
2387         ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2388             (bind->PRIM_type == T_BIND_REQ));
2389 
2390         alen = bind->ADDR_length;
2391         aoff = bind->ADDR_offset;
2392 
2393         /* negotiate max conn req pending */
2394         if (IS_COTS(tep)) {
2395                 qlen = bind->CONIND_number;
2396                 if (qlen > tl_maxqlen)
2397                         qlen = tl_maxqlen;
2398         }
2399 
2400         /*
2401          * Reserve hash handle. It can only be NULL if the endpoint is unbound
2402          * and bound again.
2403          */
2404         if ((tep->te_hash_hndl == NULL) &&
2405             ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2406             mod_hash_reserve_nosleep(tep->te_addrhash,
2407             &tep->te_hash_hndl) != 0) {
2408                 tli_err = TSYSERR;
2409                 unix_err = ENOSR;
2410                 goto error;
2411         }
2412 
2413         /*
2414          * Verify address correctness.
2415          */
2416         if (IS_SOCKET(tep)) {
2417                 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2418 
2419                 if ((alen != TL_SOUX_ADDRLEN) ||
2420                     (aoff < 0) ||
2421                     (aoff + alen > msz)) {
2422                         (void) (STRLOG(TL_ID, tep->te_minor,
2423                             1, SL_TRACE | SL_ERROR,
2424                             "tl_bind: invalid socket addr"));
2425                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2426                         tli_err = TSYSERR;
2427                         unix_err = EINVAL;
2428                         goto error;
2429                 }
2430                 /* Copy address from message to local buffer. */
2431                 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2432                 /*
2433                  * Check that we got correct address from sockets
2434                  */
2435                 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2436                     (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2437                         (void) (STRLOG(TL_ID, tep->te_minor,
2438                             1, SL_TRACE | SL_ERROR,
2439                             "tl_bind: invalid socket magic"));
2440                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2441                         tli_err = TSYSERR;
2442                         unix_err = EINVAL;
2443                         goto error;
2444                 }
2445                 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2446                     (ux_addr.soua_vp != NULL)) {
2447                         (void) (STRLOG(TL_ID, tep->te_minor,
2448                             1, SL_TRACE | SL_ERROR,
2449                             "tl_bind: implicit addr non-empty"));
2450                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451                         tli_err = TSYSERR;
2452                         unix_err = EINVAL;
2453                         goto error;
2454                 }
2455                 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2456                     (ux_addr.soua_vp == NULL)) {
2457                         (void) (STRLOG(TL_ID, tep->te_minor,
2458                             1, SL_TRACE | SL_ERROR,
2459                             "tl_bind: explicit addr empty"));
2460                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2461                         tli_err = TSYSERR;
2462                         unix_err = EINVAL;
2463                         goto error;
2464                 }
2465         } else {
2466                 if ((alen > 0) && ((aoff < 0) ||
2467                     ((ssize_t)(aoff + alen) > msz) ||
2468                     ((aoff + alen) < 0))) {
2469                         (void) (STRLOG(TL_ID, tep->te_minor,
2470                             1, SL_TRACE | SL_ERROR,
2471                             "tl_bind: invalid message"));
2472                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2473                         tli_err = TSYSERR;
2474                         unix_err = EINVAL;
2475                         goto error;
2476                 }
2477                 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2478                         (void) (STRLOG(TL_ID, tep->te_minor,
2479                             1, SL_TRACE | SL_ERROR,
2480                             "tl_bind: bad addr in  message"));
2481                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2482                         tli_err = TBADADDR;
2483                         goto error;
2484                 }
2485 #ifdef DEBUG
2486                 /*
2487                  * Mild form of ASSERT()ion to detect broken TPI apps.
2488                  * if (!assertion)
2489                  *      log warning;
2490                  */
2491                 if (!((alen == 0 && aoff == 0) ||
2492                         (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2493                         (void) (STRLOG(TL_ID, tep->te_minor,
2494                                     3, SL_TRACE | SL_ERROR,
2495                                     "tl_bind: addr overlaps TPI message"));
2496                 }
2497 #endif
2498         }
2499 
2500         /*
2501          * Bind the address provided or allocate one if requested.
2502          * Allow rebinds with a new qlen value.
2503          */
2504         if (IS_SOCKET(tep)) {
2505                 /*
2506                  * For anonymous requests the te_ap is already set up properly
2507                  * so use minor number as an address.
2508                  * For explicit requests need to check whether the address is
2509                  * already in use.
2510                  */
2511                 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2512                         int rc;
2513 
2514                         if (tep->te_flag & TL_ADDRHASHED) {
2515                                 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2516                                 if (tep->te_vp == ux_addr.soua_vp)
2517                                         goto skip_addr_bind;
2518                                 else /* Rebind to a new address. */
2519                                         tl_addr_unbind(tep);
2520                         }
2521                         /*
2522                          * Insert address in the hash if it is not already
2523                          * there.  Since we use preallocated handle, the insert
2524                          * can fail only if the key is already present.
2525                          */
2526                         rc = mod_hash_insert_reserve(tep->te_addrhash,
2527                             (mod_hash_key_t)ux_addr.soua_vp,
2528                             (mod_hash_val_t)tep, tep->te_hash_hndl);
2529 
2530                         if (rc != 0) {
2531                                 ASSERT(rc == MH_ERR_DUPLICATE);
2532                                 /*
2533                                  * Violate O_T_BIND_REQ semantics and fail with
2534                                  * TADDRBUSY - sockets will not use any address
2535                                  * other than supplied one for explicit binds.
2536                                  */
2537                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2538                                     SL_TRACE | SL_ERROR,
2539                                     "tl_bind:requested addr %p is busy",
2540                                     ux_addr.soua_vp));
2541                                 tli_err = TADDRBUSY;
2542                                 unix_err = 0;
2543                                 goto error;
2544                         }
2545                         tep->te_uxaddr = ux_addr;
2546                         tep->te_flag |= TL_ADDRHASHED;
2547                         tep->te_hash_hndl = NULL;
2548                 }
2549         } else if (alen == 0) {
2550                 /*
2551                  * assign any free address
2552                  */
2553                 if (!tl_get_any_addr(tep, NULL)) {
2554                         (void) (STRLOG(TL_ID, tep->te_minor,
2555                             1, SL_TRACE | SL_ERROR,
2556                             "tl_bind:failed to get buffer for any "
2557                             "address"));
2558                         tli_err = TSYSERR;
2559                         unix_err = ENOSR;
2560                         goto error;
2561                 }
2562         } else {
2563                 addr_req.ta_alen = alen;
2564                 addr_req.ta_abuf = (mp->b_rptr + aoff);
2565                 addr_req.ta_zoneid = tep->te_zoneid;
2566 
2567                 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2568                 if (tep->te_abuf == NULL) {
2569                         tli_err = TSYSERR;
2570                         unix_err = ENOSR;
2571                         goto error;
2572                 }
2573                 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2574                 tep->te_alen = alen;
2575 
2576                 if (mod_hash_insert_reserve(tep->te_addrhash,
2577                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2578                     tep->te_hash_hndl) != 0) {
2579                         if (save_prim_type == T_BIND_REQ) {
2580                                 /*
2581                                  * The bind semantics for this primitive
2582                                  * require a failure if the exact address
2583                                  * requested is busy
2584                                  */
2585                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2586                                     SL_TRACE | SL_ERROR,
2587                                     "tl_bind:requested addr is busy"));
2588                                 tli_err = TADDRBUSY;
2589                                 unix_err = 0;
2590                                 goto error;
2591                         }
2592 
2593                         /*
2594                          * O_T_BIND_REQ semantics say if address if requested
2595                          * address is busy, bind to any available free address
2596                          */
2597                         if (!tl_get_any_addr(tep, &addr_req)) {
2598                                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2599                                     SL_TRACE | SL_ERROR,
2600                                     "tl_bind:unable to get any addr buf"));
2601                                 tli_err = TSYSERR;
2602                                 unix_err = ENOMEM;
2603                                 goto error;
2604                         }
2605                 } else {
2606                         tep->te_flag |= TL_ADDRHASHED;
2607                         tep->te_hash_hndl = NULL;
2608                 }
2609         }
2610 
2611         ASSERT(tep->te_alen >= 0);
2612 
2613 skip_addr_bind:
2614         /*
2615          * prepare T_BIND_ACK TPI message
2616          */
2617         basize = sizeof (struct T_bind_ack) + tep->te_alen;
2618         bamp = reallocb(mp, basize, 0);
2619         if (bamp == NULL) {
2620                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2621                     "tl_wput:tl_bind: allocb failed"));
2622                 /*
2623                  * roll back state changes
2624                  */
2625                 tl_addr_unbind(tep);
2626                 tep->te_state = TS_UNBND;
2627                 tl_memrecover(wq, mp, basize);
2628                 return;
2629         }
2630 
2631         DB_TYPE(bamp) = M_PCPROTO;
2632         bamp->b_wptr = bamp->b_rptr + basize;
2633         b_ack = (struct T_bind_ack *)bamp->b_rptr;
2634         b_ack->PRIM_type = T_BIND_ACK;
2635         b_ack->CONIND_number = qlen;
2636         b_ack->ADDR_length = tep->te_alen;
2637         b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2638         addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2639         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2640 
2641         if (IS_COTS(tep)) {
2642                 tep->te_qlen = qlen;
2643                 if (qlen > 0)
2644                         tep->te_flag |= TL_LISTENER;
2645         }
2646 
2647         tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2648         /*
2649          * send T_BIND_ACK message
2650          */
2651         (void) qreply(wq, bamp);
2652         return;
2653 
2654 error:
2655         ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2656         if (ackmp == NULL) {
2657                 /*
2658                  * roll back state changes
2659                  */
2660                 tep->te_state = save_state;
2661                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2662                 return;
2663         }
2664         tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2665         tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2666 }
2667 
2668 /*
2669  * Process T_UNBIND_REQ.
2670  * Called from serializer.
2671  */
2672 static void
2673 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2674 {
2675         queue_t *wq;
2676         mblk_t *ackmp;
2677 
2678         if (tep->te_closing) {
2679                 freemsg(mp);
2680                 return;
2681         }
2682 
2683         wq = tep->te_wq;
2684 
2685         /*
2686          * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2687          * ==> allocate for T_ERROR_ACK (known max)
2688          */
2689         if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2690                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2691                 return;
2692         }
2693         /*
2694          * memory resources committed
2695          * Note: no message validation. T_UNBIND_REQ message is
2696          * same size as PRIM_type field so already verified earlier.
2697          */
2698 
2699         /*
2700          * validate state
2701          */
2702         if (tep->te_state != TS_IDLE) {
2703                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2704                     SL_TRACE | SL_ERROR,
2705                     "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2706                     tep->te_state));
2707                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2708                 return;
2709         }
2710         tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2711 
2712         /*
2713          * TPI says on T_UNBIND_REQ:
2714          *    send up a M_FLUSH to flush both
2715          *    read and write queues
2716          */
2717         (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2718 
2719         if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2720             tep->te_magic != SOU_MAGIC_EXPLICIT) {
2721 
2722                 /*
2723                  * Sockets use bind with qlen==0 followed by bind() to
2724                  * the same address with qlen > 0 for listeners.
2725                  * We allow rebind with a new qlen value.
2726                  */
2727                 tl_addr_unbind(tep);
2728         }
2729 
2730         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2731         /*
2732          * send  T_OK_ACK
2733          */
2734         tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2735 }
2736 
2737 
2738 /*
2739  * Option management code from drv/ip is used here
2740  * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2741  *      database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2742  *      However, that is what we want as that option is 'unorthodox'
2743  *      and only valid in T_CONN_IND, T_CONN_CON  and T_UNITDATA_IND
2744  *      and not in T_SVR4_OPTMGMT_REQ/ACK
2745  * Note2: use of optcom_req means this routine is an exception to
2746  *       recovery from allocb() failures.
2747  */
2748 
2749 static void
2750 tl_optmgmt(queue_t *wq, mblk_t *mp)
2751 {
2752         tl_endpt_t *tep;
2753         mblk_t *ackmp;
2754         union T_primitives *prim;
2755         cred_t *cr;
2756 
2757         tep = (tl_endpt_t *)wq->q_ptr;
2758         prim = (union T_primitives *)mp->b_rptr;
2759 
2760         /*
2761          * All Solaris components should pass a db_credp
2762          * for this TPI message, hence we ASSERT.
2763          * But in case there is some other M_PROTO that looks
2764          * like a TPI message sent by some other kernel
2765          * component, we check and return an error.
2766          */
2767         cr = msg_getcred(mp, NULL);
2768         ASSERT(cr != NULL);
2769         if (cr == NULL) {
2770                 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2771                 return;
2772         }
2773 
2774         /*  all states OK for AF_UNIX options ? */
2775         if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2776             prim->type == T_SVR4_OPTMGMT_REQ) {
2777                 /*
2778                  * Broken TLI semantics that options can only be managed
2779                  * in TS_IDLE state. Needed for Sparc ABI test suite that
2780                  * tests this TLI (mis)feature using this device driver.
2781                  */
2782                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2783                     SL_TRACE | SL_ERROR,
2784                     "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2785                     tep->te_state));
2786                 /*
2787                  * preallocate memory for T_ERROR_ACK
2788                  */
2789                 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2790                 if (ackmp == NULL) {
2791                         tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2792                         return;
2793                 }
2794 
2795                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2796                 freemsg(mp);
2797                 return;
2798         }
2799 
2800         /*
2801          * call common option management routine from drv/ip
2802          */
2803         if (prim->type == T_SVR4_OPTMGMT_REQ) {
2804                 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2805         } else {
2806                 ASSERT(prim->type == T_OPTMGMT_REQ);
2807                 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2808         }
2809 }
2810 
2811 /*
2812  * Handle T_conn_req - the driver part of accept().
2813  * If TL_SET[U]CRED generate the credentials options.
2814  * If this is a socket pass through options unmodified.
2815  * For sockets generate the T_CONN_CON here instead of
2816  * waiting for the T_CONN_RES.
2817  */
2818 static void
2819 tl_conn_req(queue_t *wq, mblk_t *mp)
2820 {
2821         tl_endpt_t              *tep = (tl_endpt_t *)wq->q_ptr;
2822         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_rptr;
2823         ssize_t                 msz = MBLKL(mp);
2824         t_scalar_t              alen, aoff, olen, ooff, err = 0;
2825         tl_endpt_t              *peer_tep = NULL;
2826         mblk_t                  *ackmp;
2827         mblk_t                  *dimp;
2828         struct T_discon_ind     *di;
2829         soux_addr_t             ux_addr;
2830         tl_addr_t               dst;
2831 
2832         ASSERT(IS_COTS(tep));
2833 
2834         if (tep->te_closing) {
2835                 freemsg(mp);
2836                 return;
2837         }
2838 
2839         /*
2840          * preallocate memory for:
2841          * 1. max of T_ERROR_ACK and T_OK_ACK
2842          *      ==> known max T_ERROR_ACK
2843          * 2. max of T_DISCON_IND and T_CONN_IND
2844          */
2845         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2846         if (ackmp == NULL) {
2847                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2848                 return;
2849         }
2850         /*
2851          * memory committed for T_OK_ACK/T_ERROR_ACK now
2852          * will be committed for T_DISCON_IND/T_CONN_IND later
2853          */
2854 
2855         if (tep->te_state != TS_IDLE) {
2856                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2857                     SL_TRACE | SL_ERROR,
2858                     "tl_wput:T_CONN_REQ:out of state, state=%d",
2859                     tep->te_state));
2860                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2861                 freemsg(mp);
2862                 return;
2863         }
2864 
2865         /*
2866          * validate the message
2867          * Note: dereference fields in struct inside message only
2868          * after validating the message length.
2869          */
2870         if (msz < sizeof (struct T_conn_req)) {
2871                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2872                     "tl_conn_req:invalid message length"));
2873                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2874                 freemsg(mp);
2875                 return;
2876         }
2877         alen = creq->DEST_length;
2878         aoff = creq->DEST_offset;
2879         olen = creq->OPT_length;
2880         ooff = creq->OPT_offset;
2881         if (olen == 0)
2882                 ooff = 0;
2883 
2884         if (IS_SOCKET(tep)) {
2885                 if ((alen != TL_SOUX_ADDRLEN) ||
2886                     (aoff < 0) ||
2887                     (aoff + alen > msz) ||
2888                     (alen > msz - sizeof (struct T_conn_req))) {
2889                         (void) (STRLOG(TL_ID, tep->te_minor,
2890                                     1, SL_TRACE | SL_ERROR,
2891                                     "tl_conn_req: invalid socket addr"));
2892                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2893                         freemsg(mp);
2894                         return;
2895                 }
2896                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2897                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2898                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2899                         (void) (STRLOG(TL_ID, tep->te_minor,
2900                             1, SL_TRACE | SL_ERROR,
2901                             "tl_conn_req: invalid socket magic"));
2902                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2903                         freemsg(mp);
2904                         return;
2905                 }
2906         } else {
2907                 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2908                     (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2909                     ooff + olen < 0)) ||
2910                     olen < 0 || ooff < 0) {
2911                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2912                             SL_TRACE | SL_ERROR,
2913                             "tl_conn_req:invalid message"));
2914                         tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2915                         freemsg(mp);
2916                         return;
2917                 }
2918 
2919                 if (alen <= 0 || aoff < 0 ||
2920                     (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2921                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2922                                     SL_TRACE | SL_ERROR,
2923                                     "tl_conn_req:bad addr in message, "
2924                                     "alen=%d, msz=%ld",
2925                                     alen, msz));
2926                         tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2927                         freemsg(mp);
2928                         return;
2929                 }
2930 #ifdef DEBUG
2931                 /*
2932                  * Mild form of ASSERT()ion to detect broken TPI apps.
2933                  * if (!assertion)
2934                  *      log warning;
2935                  */
2936                 if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2937                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
2938                             SL_TRACE | SL_ERROR,
2939                             "tl_conn_req: addr overlaps TPI message"));
2940                 }
2941 #endif
2942                 if (olen) {
2943                         /*
2944                          * no opts in connect req
2945                          * supported in this provider except for sockets.
2946                          */
2947                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
2948                             SL_TRACE | SL_ERROR,
2949                             "tl_conn_req:options not supported "
2950                             "in message"));
2951                         tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2952                         freemsg(mp);
2953                         return;
2954                 }
2955         }
2956 
2957         /*
2958          * Prevent tep from closing on us.
2959          */
2960         if (!tl_noclose(tep)) {
2961                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2962                     "tl_conn_req:endpoint is closing"));
2963                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2964                 freemsg(mp);
2965                 return;
2966         }
2967 
2968         tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2969         /*
2970          * get endpoint to connect to
2971          * check that peer with DEST addr is bound to addr
2972          * and has CONIND_number > 0
2973          */
2974         dst.ta_alen = alen;
2975         dst.ta_abuf = mp->b_rptr + aoff;
2976         dst.ta_zoneid = tep->te_zoneid;
2977 
2978         /*
2979          * Verify if remote addr is in use
2980          */
2981         peer_tep = (IS_SOCKET(tep) ?
2982             tl_sock_find_peer(tep, &ux_addr) :
2983             tl_find_peer(tep, &dst));
2984 
2985         if (peer_tep == NULL) {
2986                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2987                     "tl_conn_req:no one at connect address"));
2988                 err = ECONNREFUSED;
2989         } else if (peer_tep->te_nicon >= peer_tep->te_qlen)  {
2990                 /*
2991                  * validate that number of incoming connection is
2992                  * not to capacity on destination endpoint
2993                  */
2994                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2995                     "tl_conn_req: qlen overflow connection refused"));
2996                 err = ECONNREFUSED;
2997         }
2998 
2999         /*
3000          * Send T_DISCON_IND in case of error
3001          */
3002         if (err != 0) {
3003                 if (peer_tep != NULL)
3004                         tl_refrele(peer_tep);
3005                 /* We are still expected to send T_OK_ACK */
3006                 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3007                 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3008                 tl_closeok(tep);
3009                 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3010                     M_PROTO, T_DISCON_IND);
3011                 if (dimp == NULL) {
3012                         tl_merror(wq, NULL, ENOSR);
3013                         return;
3014                 }
3015                 di = (struct T_discon_ind *)dimp->b_rptr;
3016                 di->DISCON_reason = err;
3017                 di->SEQ_number = BADSEQNUM;
3018 
3019                 tep->te_state = TS_IDLE;
3020                 /*
3021                  * send T_DISCON_IND message
3022                  */
3023                 putnext(tep->te_rq, dimp);
3024                 return;
3025         }
3026 
3027         ASSERT(IS_COTS(peer_tep));
3028 
3029         /*
3030          * Found the listener. At this point processing will continue on
3031          * listener serializer. Close of the endpoint should be blocked while we
3032          * switch serializers.
3033          */
3034         tl_serializer_refhold(peer_tep->te_ser);
3035         tl_serializer_refrele(tep->te_ser);
3036         tep->te_ser = peer_tep->te_ser;
3037         ASSERT(tep->te_oconp == NULL);
3038         tep->te_oconp = peer_tep;
3039 
3040         /*
3041          * It is safe to close now. Close may continue on listener serializer.
3042          */
3043         tl_closeok(tep);
3044 
3045         /*
3046          * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3047          * data, so we link mp to ackmp.
3048          */
3049         ackmp->b_cont = mp;
3050         mp = ackmp;
3051 
3052         tl_refhold(tep);
3053         tl_serializer_enter(tep, tl_conn_req_ser, mp);
3054 }
3055 
3056 /*
3057  * Finish T_CONN_REQ processing on listener serializer.
3058  */
3059 static void
3060 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3061 {
3062         queue_t         *wq;
3063         tl_endpt_t      *peer_tep = tep->te_oconp;
3064         mblk_t          *confmp, *cimp, *indmp;
3065         void            *opts = NULL;
3066         mblk_t          *ackmp = mp;
3067         struct T_conn_req       *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3068         struct T_conn_ind       *ci;
3069         tl_icon_t       *tip;
3070         void            *addr_startp;
3071         t_scalar_t      olen = creq->OPT_length;
3072         t_scalar_t      ooff = creq->OPT_offset;
3073         size_t          ci_msz;
3074         size_t          size;
3075         cred_t          *cr = NULL;
3076         pid_t           cpid;
3077 
3078         if (tep->te_closing) {
3079                 TL_UNCONNECT(tep->te_oconp);
3080                 tl_serializer_exit(tep);
3081                 tl_refrele(tep);
3082                 freemsg(mp);
3083                 return;
3084         }
3085 
3086         wq = tep->te_wq;
3087         tep->te_flag |= TL_EAGER;
3088 
3089         /*
3090          * Extract preallocated ackmp from mp.
3091          */
3092         mp = mp->b_cont;
3093         ackmp->b_cont = NULL;
3094 
3095         if (olen == 0)
3096                 ooff = 0;
3097 
3098         if (peer_tep->te_closing ||
3099             !((peer_tep->te_state == TS_IDLE) ||
3100             (peer_tep->te_state == TS_WRES_CIND))) {
3101                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3102                     "tl_conn_req:peer in bad state (%d)",
3103                     peer_tep->te_state));
3104                 TL_UNCONNECT(tep->te_oconp);
3105                 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3106                 freemsg(ackmp);
3107                 tl_serializer_exit(tep);
3108                 tl_refrele(tep);
3109                 return;
3110         }
3111 
3112         /*
3113          * preallocate now for T_DISCON_IND or T_CONN_IND
3114          */
3115         /*
3116          * calculate length of T_CONN_IND message
3117          */
3118         if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3119                 cr = msg_getcred(mp, &cpid);
3120                 ASSERT(cr != NULL);
3121                 if (peer_tep->te_flag & TL_SETCRED) {
3122                         ooff = 0;
3123                         olen = (t_scalar_t) sizeof (struct opthdr) +
3124                             OPTLEN(sizeof (tl_credopt_t));
3125                         /* 1 option only */
3126                 } else {
3127                         ooff = 0;
3128                         olen = (t_scalar_t)sizeof (struct opthdr) +
3129                             OPTLEN(ucredminsize(cr));
3130                         /* 1 option only */
3131                 }
3132         }
3133         ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3134         ci_msz = T_ALIGN(ci_msz) + olen;
3135         size = max(ci_msz, sizeof (struct T_discon_ind));
3136 
3137         /*
3138          * Save options from mp - we'll need them for T_CONN_IND.
3139          */
3140         if (ooff != 0) {
3141                 opts = kmem_alloc(olen, KM_NOSLEEP);
3142                 if (opts == NULL) {
3143                         /*
3144                          * roll back state changes
3145                          */
3146                         tep->te_state = TS_IDLE;
3147                         tl_memrecover(wq, mp, size);
3148                         freemsg(ackmp);
3149                         TL_UNCONNECT(tep->te_oconp);
3150                         tl_serializer_exit(tep);
3151                         tl_refrele(tep);
3152                         return;
3153                 }
3154                 /* Copy options to a temp buffer */
3155                 bcopy(mp->b_rptr + ooff, opts, olen);
3156         }
3157 
3158         if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3159                 /*
3160                  * Generate a T_CONN_CON that has the identical address
3161                  * (and options) as the T_CONN_REQ.
3162                  * NOTE: assumes that the T_conn_req and T_conn_con structures
3163                  * are isomorphic.
3164                  */
3165                 confmp = copyb(mp);
3166                 if (confmp == NULL) {
3167                         /*
3168                          * roll back state changes
3169                          */
3170                         tep->te_state = TS_IDLE;
3171                         tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3172                         freemsg(ackmp);
3173                         if (opts != NULL)
3174                                 kmem_free(opts, olen);
3175                         TL_UNCONNECT(tep->te_oconp);
3176                         tl_serializer_exit(tep);
3177                         tl_refrele(tep);
3178                         return;
3179                 }
3180                 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3181                     T_CONN_CON;
3182         } else {
3183                 confmp = NULL;
3184         }
3185         if ((indmp = reallocb(mp, size, 0)) == NULL) {
3186                 /*
3187                  * roll back state changes
3188                  */
3189                 tep->te_state = TS_IDLE;
3190                 tl_memrecover(wq, mp, size);
3191                 freemsg(ackmp);
3192                 if (opts != NULL)
3193                         kmem_free(opts, olen);
3194                 freemsg(confmp);
3195                 TL_UNCONNECT(tep->te_oconp);
3196                 tl_serializer_exit(tep);
3197                 tl_refrele(tep);
3198                 return;
3199         }
3200 
3201         tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3202         if (tip == NULL) {
3203                 /*
3204                  * roll back state changes
3205                  */
3206                 tep->te_state = TS_IDLE;
3207                 tl_memrecover(wq, indmp, sizeof (*tip));
3208                 freemsg(ackmp);
3209                 if (opts != NULL)
3210                         kmem_free(opts, olen);
3211                 freemsg(confmp);
3212                 TL_UNCONNECT(tep->te_oconp);
3213                 tl_serializer_exit(tep);
3214                 tl_refrele(tep);
3215                 return;
3216         }
3217         tip->ti_mp = NULL;
3218 
3219         /*
3220          * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3221          * and tl_icon_t cell.
3222          */
3223 
3224         /*
3225          * ack validity of request and send the peer credential in the ACK.
3226          */
3227         tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3228 
3229         if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3230             confmp != NULL) {
3231                 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3232         }
3233 
3234         tl_ok_ack(wq, ackmp, T_CONN_REQ);
3235 
3236         /*
3237          * prepare message to send T_CONN_IND
3238          */
3239         /*
3240          * allocate the message - original data blocks retained
3241          * in the returned mblk
3242          */
3243         cimp = tl_resizemp(indmp, size);
3244         if (cimp == NULL) {
3245                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3246                     "tl_conn_req:con_ind:allocb failure"));
3247                 tl_merror(wq, indmp, ENOMEM);
3248                 TL_UNCONNECT(tep->te_oconp);
3249                 tl_serializer_exit(tep);
3250                 tl_refrele(tep);
3251                 if (opts != NULL)
3252                         kmem_free(opts, olen);
3253                 freemsg(confmp);
3254                 ASSERT(tip->ti_mp == NULL);
3255                 kmem_free(tip, sizeof (*tip));
3256                 return;
3257         }
3258 
3259         DB_TYPE(cimp) = M_PROTO;
3260         ci = (struct T_conn_ind *)cimp->b_rptr;
3261         ci->PRIM_type  = T_CONN_IND;
3262         ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3263         ci->SRC_length = tep->te_alen;
3264         ci->SEQ_number = tep->te_seqno;
3265 
3266         addr_startp = cimp->b_rptr + ci->SRC_offset;
3267         bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3268         if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3269 
3270                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3271                     ci->SRC_length);
3272                 ci->OPT_length = olen; /* because only 1 option */
3273                 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3274                     cr, cpid,
3275                     peer_tep->te_flag, peer_tep->te_credp);
3276         } else if (ooff != 0) {
3277                 /* Copy option from T_CONN_REQ */
3278                 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3279                     ci->SRC_length);
3280                 ci->OPT_length = olen;
3281                 ASSERT(opts != NULL);
3282                 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3283         } else {
3284                 ci->OPT_offset = 0;
3285                 ci->OPT_length = 0;
3286         }
3287         if (opts != NULL)
3288                 kmem_free(opts, olen);
3289 
3290         /*
3291          * register connection request with server peer
3292          * append to list of incoming connections
3293          * increment references for both peer_tep and tep: peer_tep is placed on
3294          * te_oconp and tep is placed on listeners queue.
3295          */
3296         tip->ti_tep = tep;
3297         tip->ti_seqno = tep->te_seqno;
3298         list_insert_tail(&peer_tep->te_iconp, tip);
3299         peer_tep->te_nicon++;
3300 
3301         peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3302         /*
3303          * send the T_CONN_IND message
3304          */
3305         putnext(peer_tep->te_rq, cimp);
3306 
3307         /*
3308          * Send a T_CONN_CON message for sockets.
3309          * Disable the queues until we have reached the correct state!
3310          */
3311         if (confmp != NULL) {
3312                 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3313                 noenable(wq);
3314                 putnext(tep->te_rq, confmp);
3315         }
3316         /*
3317          * Now we need to increment tep reference because tep is referenced by
3318          * server list of pending connections. We also need to decrement
3319          * reference before exiting serializer. Two operations void each other
3320          * so we don't modify reference at all.
3321          */
3322         ASSERT(tep->te_refcnt >= 2);
3323         ASSERT(peer_tep->te_refcnt >= 2);
3324         tl_serializer_exit(tep);
3325 }
3326 
3327 
3328 
3329 /*
3330  * Handle T_conn_res on listener stream. Called on listener serializer.
3331  * tl_conn_req has already generated the T_CONN_CON.
3332  * tl_conn_res is called on listener serializer.
3333  * No one accesses acceptor at this point, so it is safe to modify acceptor.
3334  * Switch eager serializer to acceptor's.
3335  *
3336  * If TL_SET[U]CRED generate the credentials options.
3337  * For sockets tl_conn_req has already generated the T_CONN_CON.
3338  */
3339 static void
3340 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3341 {
3342         queue_t                 *wq;
3343         struct T_conn_res       *cres = (struct T_conn_res *)mp->b_rptr;
3344         ssize_t                 msz = MBLKL(mp);
3345         t_scalar_t              olen, ooff, err = 0;
3346         t_scalar_t              prim = cres->PRIM_type;
3347         uchar_t                 *addr_startp;
3348         tl_endpt_t              *acc_ep = NULL, *cl_ep = NULL;
3349         tl_icon_t               *tip;
3350         size_t                  size;
3351         mblk_t                  *ackmp, *respmp;
3352         mblk_t                  *dimp, *ccmp = NULL;
3353         struct T_discon_ind     *di;
3354         struct T_conn_con       *cc;
3355         boolean_t               client_noclose_set = B_FALSE;
3356         boolean_t               switch_client_serializer = B_TRUE;
3357 
3358         ASSERT(IS_COTS(tep));
3359 
3360         if (tep->te_closing) {
3361                 freemsg(mp);
3362                 return;
3363         }
3364 
3365         wq = tep->te_wq;
3366 
3367         /*
3368          * preallocate memory for:
3369          * 1. max of T_ERROR_ACK and T_OK_ACK
3370          *      ==> known max T_ERROR_ACK
3371          * 2. max of T_DISCON_IND and T_CONN_CON
3372          */
3373         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3374         if (ackmp == NULL) {
3375                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3376                 return;
3377         }
3378         /*
3379          * memory committed for T_OK_ACK/T_ERROR_ACK now
3380          * will be committed for T_DISCON_IND/T_CONN_CON later
3381          */
3382 
3383 
3384         ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3385 
3386         /*
3387          * validate state
3388          */
3389         if (tep->te_state != TS_WRES_CIND) {
3390                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3391                     SL_TRACE | SL_ERROR,
3392                     "tl_wput:T_CONN_RES:out of state, state=%d",
3393                     tep->te_state));
3394                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3395                 freemsg(mp);
3396                 return;
3397         }
3398 
3399         /*
3400          * validate the message
3401          * Note: dereference fields in struct inside message only
3402          * after validating the message length.
3403          */
3404         if (msz < sizeof (struct T_conn_res)) {
3405                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3406                     "tl_conn_res:invalid message length"));
3407                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3408                 freemsg(mp);
3409                 return;
3410         }
3411         olen = cres->OPT_length;
3412         ooff = cres->OPT_offset;
3413         if (((olen > 0) && ((ooff + olen) > msz))) {
3414                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3415                     "tl_conn_res:invalid message"));
3416                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3417                 freemsg(mp);
3418                 return;
3419         }
3420         if (olen) {
3421                 /*
3422                  * no opts in connect res
3423                  * supported in this provider
3424                  */
3425                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3426                     "tl_conn_res:options not supported in message"));
3427                 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3428                 freemsg(mp);
3429                 return;
3430         }
3431 
3432         tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3433         ASSERT(tep->te_state == TS_WACK_CRES);
3434 
3435         if (cres->SEQ_number < TL_MINOR_START &&
3436             cres->SEQ_number >= BADSEQNUM) {
3437                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3438                     "tl_conn_res:remote endpoint sequence number bad"));
3439                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3440                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3441                 freemsg(mp);
3442                 return;
3443         }
3444 
3445         /*
3446          * find accepting endpoint. Will have extra reference if found.
3447          */
3448         if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3449             (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3450             (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3451                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3452                     "tl_conn_res:bad accepting endpoint"));
3453                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3454                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3455                 freemsg(mp);
3456                 return;
3457         }
3458 
3459         /*
3460          * Prevent acceptor from closing.
3461          */
3462         if (!tl_noclose(acc_ep)) {
3463                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3464                     "tl_conn_res:bad accepting endpoint"));
3465                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3466                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3467                 tl_refrele(acc_ep);
3468                 freemsg(mp);
3469                 return;
3470         }
3471 
3472         acc_ep->te_flag |= TL_ACCEPTOR;
3473 
3474         /*
3475          * validate that accepting endpoint, if different from listening
3476          * has address bound => state is TS_IDLE
3477          * TROUBLE in XPG4 !!?
3478          */
3479         if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3480                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3481                     "tl_conn_res:accepting endpoint has no address bound,"
3482                     "state=%d", acc_ep->te_state));
3483                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3485                 freemsg(mp);
3486                 tl_closeok(acc_ep);
3487                 tl_refrele(acc_ep);
3488                 return;
3489         }
3490 
3491         /*
3492          * validate if accepting endpt same as listening, then
3493          * no other incoming connection should be on the queue
3494          */
3495 
3496         if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3497                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3498                     "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3499                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3500                 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3501                 freemsg(mp);
3502                 tl_closeok(acc_ep);
3503                 tl_refrele(acc_ep);
3504                 return;
3505         }
3506 
3507         /*
3508          * Mark for deletion, the entry corresponding to client
3509          * on list of pending connections made by the listener
3510          *  search list to see if client is one of the
3511          * recorded as a listener.
3512          */
3513         tip = tl_icon_find(tep, cres->SEQ_number);
3514         if (tip == NULL) {
3515                 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3516                     "tl_conn_res:no client in listener list"));
3517                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3518                 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3519                 freemsg(mp);
3520                 tl_closeok(acc_ep);
3521                 tl_refrele(acc_ep);
3522                 return;
3523         }
3524 
3525         /*
3526          * If ti_tep is NULL the client has already closed. In this case
3527          * the code below will avoid any action on the client side
3528          * but complete the server and acceptor state transitions.
3529          */
3530         ASSERT(tip->ti_tep == NULL ||
3531             tip->ti_tep->te_seqno == cres->SEQ_number);
3532         cl_ep = tip->ti_tep;
3533 
3534         /*
3535          * If the client is present it is switched from listener's to acceptor's
3536          * serializer. We should block client closes while serializers are
3537          * being switched.
3538          *
3539          * It is possible that the client is present but is currently being
3540          * closed. There are two possible cases:
3541          *
3542          * 1) The client has already entered tl_close_finish_ser() and sent
3543          *    T_ORDREL_IND. In this case we can just ignore the client (but we
3544          *    still need to send all messages from tip->ti_mp to the acceptor).
3545          *
3546          * 2) The client started the close but has not entered
3547          *    tl_close_finish_ser() yet. In this case, the client is already
3548          *    proceeding asynchronously on the listener's serializer, so we're
3549          *    forced to change the acceptor to use the listener's serializer to
3550          *    ensure that any operations on the acceptor are serialized with
3551          *    respect to the close that's in-progress.
3552          */
3553         if (cl_ep != NULL) {
3554                 if (tl_noclose(cl_ep)) {
3555                         client_noclose_set = B_TRUE;
3556                 } else {
3557                         /*
3558                          * Client is closing. If it it has sent the
3559                          * T_ORDREL_IND, we can simply ignore it - otherwise,
3560                          * we have to let let the client continue until it is
3561                          * sent.
3562                          *
3563                          * If we do continue using the client, acceptor will
3564                          * switch to client's serializer which is used by client
3565                          * for its close.
3566                          */
3567                         tl_client_closing_when_accepting++;
3568                         switch_client_serializer = B_FALSE;
3569                         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3570                             cl_ep->te_state == -1)
3571                                 cl_ep = NULL;
3572                 }
3573         }
3574 
3575         if (cl_ep != NULL) {
3576                 /*
3577                  * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3578                  * (latter for sockets only)
3579                  */
3580                 if (cl_ep->te_state != TS_WCON_CREQ &&
3581                     (cl_ep->te_state != TS_DATA_XFER &&
3582                     IS_SOCKET(cl_ep))) {
3583                         err = ECONNREFUSED;
3584                         /*
3585                          * T_DISCON_IND sent later after committing memory
3586                          * and acking validity of request
3587                          */
3588                         (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3589                             "tl_conn_res:peer in bad state"));
3590                 }
3591 
3592                 /*
3593                  * preallocate now for T_DISCON_IND or T_CONN_CONN
3594                  * ack validity of request (T_OK_ACK) after memory committed
3595                  */
3596 
3597                 if (err) {
3598                         size = sizeof (struct T_discon_ind);
3599                 } else {
3600                         /*
3601                          * calculate length of T_CONN_CON message
3602                          */
3603                         olen = 0;
3604                         if (cl_ep->te_flag & TL_SETCRED) {
3605                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3606                                     OPTLEN(sizeof (tl_credopt_t));
3607                         } else if (cl_ep->te_flag & TL_SETUCRED) {
3608                                 olen = (t_scalar_t)sizeof (struct opthdr) +
3609                                     OPTLEN(ucredminsize(acc_ep->te_credp));
3610                         }
3611                         size = T_ALIGN(sizeof (struct T_conn_con) +
3612                             acc_ep->te_alen) + olen;
3613                 }
3614                 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3615                         /*
3616                          * roll back state changes
3617                          */
3618                         tep->te_state = TS_WRES_CIND;
3619                         tl_memrecover(wq, mp, size);
3620                         freemsg(ackmp);
3621                         if (client_noclose_set)
3622                                 tl_closeok(cl_ep);
3623                         tl_closeok(acc_ep);
3624                         tl_refrele(acc_ep);
3625                         return;
3626                 }
3627                 mp = NULL;
3628         }
3629 
3630         /*
3631          * Now ack validity of request
3632          */
3633         if (tep->te_nicon == 1) {
3634                 if (tep == acc_ep)
3635                         tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3636                 else
3637                         tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3638         } else {
3639                 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3640         }
3641 
3642         /*
3643          * send T_DISCON_IND now if client state validation failed earlier
3644          */
3645         if (err) {
3646                 tl_ok_ack(wq, ackmp, prim);
3647                 /*
3648                  * flush the queues - why always ?
3649                  */
3650                 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3651 
3652                 dimp = tl_resizemp(respmp, size);
3653                 if (dimp == NULL) {
3654                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3655                             SL_TRACE | SL_ERROR,
3656                             "tl_conn_res:con_ind:allocb failure"));
3657                         tl_merror(wq, respmp, ENOMEM);
3658                         tl_closeok(acc_ep);
3659                         if (client_noclose_set)
3660                                 tl_closeok(cl_ep);
3661                         tl_refrele(acc_ep);
3662                         return;
3663                 }
3664                 if (dimp->b_cont) {
3665                         /* no user data in provider generated discon ind */
3666                         freemsg(dimp->b_cont);
3667                         dimp->b_cont = NULL;
3668                 }
3669 
3670                 DB_TYPE(dimp) = M_PROTO;
3671                 di = (struct T_discon_ind *)dimp->b_rptr;
3672                 di->PRIM_type  = T_DISCON_IND;
3673                 di->DISCON_reason = err;
3674                 di->SEQ_number = BADSEQNUM;
3675 
3676                 tep->te_state = TS_IDLE;
3677                 /*
3678                  * send T_DISCON_IND message
3679                  */
3680                 putnext(acc_ep->te_rq, dimp);
3681                 if (client_noclose_set)
3682                         tl_closeok(cl_ep);
3683                 tl_closeok(acc_ep);
3684                 tl_refrele(acc_ep);
3685                 return;
3686         }
3687 
3688         /*
3689          * now start connecting the accepting endpoint
3690          */
3691         if (tep != acc_ep)
3692                 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3693 
3694         if (cl_ep == NULL) {
3695                 /*
3696                  * The client has already closed. Send up any queued messages
3697                  * and change the state accordingly.
3698                  */
3699                 tl_ok_ack(wq, ackmp, prim);
3700                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3701 
3702                 /*
3703                  * remove endpoint from incoming connection
3704                  * delete client from list of incoming connections
3705                  */
3706                 tl_freetip(tep, tip);
3707                 freemsg(mp);
3708                 tl_closeok(acc_ep);
3709                 tl_refrele(acc_ep);
3710                 return;
3711         } else if (tip->ti_mp != NULL) {
3712                 /*
3713                  * The client could have queued a T_DISCON_IND which needs
3714                  * to be sent up.
3715                  * Note that t_discon_req can not operate the same as
3716                  * t_data_req since it is not possible for it to putbq
3717                  * the message and return -1 due to the use of qwriter.
3718                  */
3719                 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3720         }
3721 
3722         /*
3723          * prepare connect confirm T_CONN_CON message
3724          */
3725 
3726         /*
3727          * allocate the message - original data blocks
3728          * retained in the returned mblk
3729          */
3730         if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3731                 ccmp = tl_resizemp(respmp, size);
3732                 if (ccmp == NULL) {
3733                         tl_ok_ack(wq, ackmp, prim);
3734                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
3735                             SL_TRACE | SL_ERROR,
3736                             "tl_conn_res:conn_con:allocb failure"));
3737                         tl_merror(wq, respmp, ENOMEM);
3738                         tl_closeok(acc_ep);
3739                         if (client_noclose_set)
3740                                 tl_closeok(cl_ep);
3741                         tl_refrele(acc_ep);
3742                         return;
3743                 }
3744 
3745                 DB_TYPE(ccmp) = M_PROTO;
3746                 cc = (struct T_conn_con *)ccmp->b_rptr;
3747                 cc->PRIM_type  = T_CONN_CON;
3748                 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3749                 cc->RES_length = acc_ep->te_alen;
3750                 addr_startp = ccmp->b_rptr + cc->RES_offset;
3751                 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3752                 if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3753                         cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3754                             cc->RES_length);
3755                         cc->OPT_length = olen;
3756                         tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3757                             acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3758                             cl_ep->te_credp);
3759                 } else {
3760                         cc->OPT_offset = 0;
3761                         cc->OPT_length = 0;
3762                 }
3763                 /*
3764                  * Forward the credential in the packet so it can be picked up
3765                  * at the higher layers for more complete credential processing
3766                  */
3767                 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3768         } else {
3769                 freemsg(respmp);
3770                 respmp = NULL;
3771         }
3772 
3773         /*
3774          * make connection linking
3775          * accepting and client endpoints
3776          * No need to increment references:
3777          *      on client: it should already have one from tip->ti_tep linkage.
3778          *      on acceptor is should already have one from the table lookup.
3779          *
3780          * At this point both client and acceptor can't close. Set client
3781          * serializer to acceptor's.
3782          */
3783         ASSERT(cl_ep->te_refcnt >= 2);
3784         ASSERT(acc_ep->te_refcnt >= 2);
3785         ASSERT(cl_ep->te_conp == NULL);
3786         ASSERT(acc_ep->te_conp == NULL);
3787         cl_ep->te_conp = acc_ep;
3788         acc_ep->te_conp = cl_ep;
3789         ASSERT(cl_ep->te_ser == tep->te_ser);
3790         if (switch_client_serializer) {
3791                 mutex_enter(&cl_ep->te_ser_lock);
3792                 if (cl_ep->te_ser_count > 0) {
3793                         switch_client_serializer = B_FALSE;
3794                         tl_serializer_noswitch++;
3795                 } else {
3796                         /*
3797                          * Move client to the acceptor's serializer.
3798                          */
3799                         tl_serializer_refhold(acc_ep->te_ser);
3800                         tl_serializer_refrele(cl_ep->te_ser);
3801                         cl_ep->te_ser = acc_ep->te_ser;
3802                 }
3803                 mutex_exit(&cl_ep->te_ser_lock);
3804         }
3805         if (!switch_client_serializer) {
3806                 /*
3807                  * It is not possible to switch client to use acceptor's.
3808                  * Move acceptor to client's serializer (which is the same as
3809                  * listener's).
3810                  */
3811                 tl_serializer_refhold(cl_ep->te_ser);
3812                 tl_serializer_refrele(acc_ep->te_ser);
3813                 acc_ep->te_ser = cl_ep->te_ser;
3814         }
3815 
3816         TL_REMOVE_PEER(cl_ep->te_oconp);
3817         TL_REMOVE_PEER(acc_ep->te_oconp);
3818 
3819         /*
3820          * remove endpoint from incoming connection
3821          * delete client from list of incoming connections
3822          */
3823         tip->ti_tep = NULL;
3824         tl_freetip(tep, tip);
3825         tl_ok_ack(wq, ackmp, prim);
3826 
3827         /*
3828          * data blocks already linked in reallocb()
3829          */
3830 
3831         /*
3832          * link queues so that I_SENDFD will work
3833          */
3834         if (!IS_SOCKET(tep)) {
3835                 acc_ep->te_wq->q_next = cl_ep->te_rq;
3836                 cl_ep->te_wq->q_next = acc_ep->te_rq;
3837         }
3838 
3839         /*
3840          * send T_CONN_CON up on client side unless it was already
3841          * done (for a socket). In cases any data or ordrel req has been
3842          * queued make sure that the service procedure runs.
3843          */
3844         if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3845                 enableok(cl_ep->te_wq);
3846                 TL_QENABLE(cl_ep);
3847                 if (ccmp != NULL)
3848                         freemsg(ccmp);
3849         } else {
3850                 /*
3851                  * change client state on TE_CONN_CON event
3852                  */
3853                 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3854                 putnext(cl_ep->te_rq, ccmp);
3855         }
3856 
3857         /* Mark the both endpoints as accepted */
3858         cl_ep->te_flag |= TL_ACCEPTED;
3859         acc_ep->te_flag |= TL_ACCEPTED;
3860 
3861         /*
3862          * Allow client and acceptor to close.
3863          */
3864         tl_closeok(acc_ep);
3865         if (client_noclose_set)
3866                 tl_closeok(cl_ep);
3867 }
3868 
3869 
3870 
3871 
3872 static void
3873 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3874 {
3875         queue_t                 *wq;
3876         struct T_discon_req     *dr;
3877         ssize_t                 msz;
3878         tl_endpt_t              *peer_tep = tep->te_conp;
3879         tl_endpt_t              *srv_tep = tep->te_oconp;
3880         tl_icon_t               *tip;
3881         size_t                  size;
3882         mblk_t                  *ackmp, *dimp, *respmp;
3883         struct T_discon_ind     *di;
3884         t_scalar_t              save_state, new_state;
3885 
3886         if (tep->te_closing) {
3887                 freemsg(mp);
3888                 return;
3889         }
3890 
3891         if ((peer_tep != NULL) && peer_tep->te_closing) {
3892                 TL_UNCONNECT(tep->te_conp);
3893                 peer_tep = NULL;
3894         }
3895         if ((srv_tep != NULL) && srv_tep->te_closing) {
3896                 TL_UNCONNECT(tep->te_oconp);
3897                 srv_tep = NULL;
3898         }
3899 
3900         wq = tep->te_wq;
3901 
3902         /*
3903          * preallocate memory for:
3904          * 1. max of T_ERROR_ACK and T_OK_ACK
3905          *      ==> known max T_ERROR_ACK
3906          * 2. for  T_DISCON_IND
3907          */
3908         ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3909         if (ackmp == NULL) {
3910                 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3911                 return;
3912         }
3913         /*
3914          * memory committed for T_OK_ACK/T_ERROR_ACK now
3915          * will be committed for T_DISCON_IND  later
3916          */
3917 
3918         dr = (struct T_discon_req *)mp->b_rptr;
3919         msz = MBLKL(mp);
3920 
3921         /*
3922          * validate the state
3923          */
3924         save_state = new_state = tep->te_state;
3925         if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3926             !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3927                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3928                     SL_TRACE | SL_ERROR,
3929                     "tl_wput:T_DISCON_REQ:out of state, state=%d",
3930                     tep->te_state));
3931                 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3932                 freemsg(mp);
3933                 return;
3934         }
3935         /*
3936          * Defer committing the state change until it is determined if
3937          * the message will be queued with the tl_icon or not.
3938          */
3939         new_state  = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3940 
3941         /* validate the message */
3942         if (msz < sizeof (struct T_discon_req)) {
3943                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3944                     "tl_discon_req:invalid message"));
3945                 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3946                 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3947                 freemsg(mp);
3948                 return;
3949         }
3950 
3951         /*
3952          * if server, then validate that client exists
3953          * by connection sequence number etc.
3954          */
3955         if (tep->te_nicon > 0) { /* server */
3956 
3957                 /*
3958                  * search server list for disconnect client
3959                  */
3960                 tip = tl_icon_find(tep, dr->SEQ_number);
3961                 if (tip == NULL) {
3962                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
3963                             SL_TRACE | SL_ERROR,
3964                             "tl_discon_req:no disconnect endpoint"));
3965                         tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3966                         tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3967                         freemsg(mp);
3968                         return;
3969                 }
3970                 /*
3971                  * If ti_tep is NULL the client has already closed. In this case
3972                  * the code below will avoid any action on the client side.
3973                  */
3974 
3975                 IMPLY(tip->ti_tep != NULL,
3976                     tip->ti_tep->te_seqno == dr->SEQ_number);
3977                 peer_tep = tip->ti_tep;
3978         }
3979 
3980         /*
3981          * preallocate now for T_DISCON_IND
3982          * ack validity of request (T_OK_ACK) after memory committed
3983          */
3984         size = sizeof (struct T_discon_ind);
3985         if ((respmp = reallocb(mp, size, 0)) == NULL) {
3986                 tl_memrecover(wq, mp, size);
3987                 freemsg(ackmp);
3988                 return;
3989         }
3990 
3991         /*
3992          * prepare message to ack validity of request
3993          */
3994         if (tep->te_nicon == 0) {
3995                 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3996         } else {
3997                 if (tep->te_nicon == 1)
3998                         new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3999                 else
4000                         new_state = NEXTSTATE(TE_OK_ACK4, new_state);
4001         }
4002 
4003         /*
4004          * Flushing queues according to TPI. Using the old state.
4005          */
4006         if ((tep->te_nicon <= 1) &&
4007             ((save_state == TS_DATA_XFER) ||
4008             (save_state == TS_WIND_ORDREL) ||
4009             (save_state == TS_WREQ_ORDREL)))
4010                 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4011 
4012         /* send T_OK_ACK up  */
4013         tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4014 
4015         /*
4016          * now do disconnect business
4017          */
4018         if (tep->te_nicon > 0) { /* listener */
4019                 if (peer_tep != NULL && !peer_tep->te_closing) {
4020                         /*
4021                          * disconnect incoming connect request pending to tep
4022                          */
4023                         if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4024                                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4025                                     SL_TRACE | SL_ERROR,
4026                                     "tl_discon_req: reallocb failed"));
4027                                 tep->te_state = new_state;
4028                                 tl_merror(wq, respmp, ENOMEM);
4029                                 return;
4030                         }
4031                         di = (struct T_discon_ind *)dimp->b_rptr;
4032                         di->SEQ_number = BADSEQNUM;
4033                         save_state = peer_tep->te_state;
4034                         peer_tep->te_state = TS_IDLE;
4035 
4036                         TL_REMOVE_PEER(peer_tep->te_oconp);
4037                         enableok(peer_tep->te_wq);
4038                         TL_QENABLE(peer_tep);
4039                 } else {
4040                         freemsg(respmp);
4041                         dimp = NULL;
4042                 }
4043 
4044                 /*
4045                  * remove endpoint from incoming connection list
4046                  * - remove disconnect client from list on server
4047                  */
4048                 tl_freetip(tep, tip);
4049         } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4050                 /*
4051                  * disconnect an outgoing request pending from tep
4052                  */
4053 
4054                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4055                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4056                             SL_TRACE | SL_ERROR,
4057                             "tl_discon_req: reallocb failed"));
4058                         tep->te_state = new_state;
4059                         tl_merror(wq, respmp, ENOMEM);
4060                         return;
4061                 }
4062                 di = (struct T_discon_ind *)dimp->b_rptr;
4063                 DB_TYPE(dimp) = M_PROTO;
4064                 di->PRIM_type  = T_DISCON_IND;
4065                 di->DISCON_reason = ECONNRESET;
4066                 di->SEQ_number = tep->te_seqno;
4067 
4068                 /*
4069                  * If this is a socket the T_DISCON_IND is queued with
4070                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4071                  * from the list of pending connections.
4072                  * Note that when te_oconp is set the peer better have
4073                  * a t_connind_t for the client.
4074                  */
4075                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4076                         /*
4077                          * No need to check that
4078                          * ti_tep == NULL since the T_DISCON_IND
4079                          * takes precedence over other queued
4080                          * messages.
4081                          */
4082                         tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4083                         peer_tep = NULL;
4084                         dimp = NULL;
4085                         /*
4086                          * Can't clear te_oconp since tl_co_unconnect needs
4087                          * it as a hint not to free the tep.
4088                          * Keep the state unchanged since tl_conn_res inspects
4089                          * it.
4090                          */
4091                         new_state = tep->te_state;
4092                 } else {
4093                         /* Found - delete it */
4094                         tip = tl_icon_find(peer_tep, tep->te_seqno);
4095                         if (tip != NULL) {
4096                                 ASSERT(tep == tip->ti_tep);
4097                                 save_state = peer_tep->te_state;
4098                                 if (peer_tep->te_nicon == 1)
4099                                         peer_tep->te_state =
4100                                             NEXTSTATE(TE_DISCON_IND2,
4101                                             peer_tep->te_state);
4102                                 else
4103                                         peer_tep->te_state =
4104                                             NEXTSTATE(TE_DISCON_IND3,
4105                                             peer_tep->te_state);
4106                                 tl_freetip(peer_tep, tip);
4107                         }
4108                         ASSERT(tep->te_oconp != NULL);
4109                         TL_UNCONNECT(tep->te_oconp);
4110                 }
4111         } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4112                 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4113                         (void) (STRLOG(TL_ID, tep->te_minor, 2,
4114                             SL_TRACE | SL_ERROR,
4115                             "tl_discon_req: reallocb failed"));
4116                         tep->te_state = new_state;
4117                         tl_merror(wq, respmp, ENOMEM);
4118                         return;
4119                 }
4120                 di = (struct T_discon_ind *)dimp->b_rptr;
4121                 di->SEQ_number = BADSEQNUM;
4122 
4123                 save_state = peer_tep->te_state;
4124                 peer_tep->te_state = TS_IDLE;
4125         } else {
4126                 /* Not connected */
4127                 tep->te_state = new_state;
4128                 freemsg(respmp);
4129                 return;
4130         }
4131 
4132         /* Commit state changes */
4133         tep->te_state = new_state;
4134 
4135         if (peer_tep == NULL) {
4136                 ASSERT(dimp == NULL);
4137                 goto done;
4138         }
4139         /*
4140          * Flush queues on peer before sending up
4141          * T_DISCON_IND according to TPI
4142          */
4143 
4144         if ((save_state == TS_DATA_XFER) ||
4145             (save_state == TS_WIND_ORDREL) ||
4146             (save_state == TS_WREQ_ORDREL))
4147                 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4148 
4149         DB_TYPE(dimp) = M_PROTO;
4150         di->PRIM_type  = T_DISCON_IND;
4151         di->DISCON_reason = ECONNRESET;
4152 
4153         /*
4154          * data blocks already linked into dimp by reallocb()
4155          */
4156         /*
4157          * send indication message to peer user module
4158          */
4159         ASSERT(dimp != NULL);
4160         putnext(peer_tep->te_rq, dimp);
4161 done:
4162         if (tep->te_conp) {  /* disconnect pointers if connected */
4163                 ASSERT(!peer_tep->te_closing);
4164 
4165                 /*
4166                  * Messages may be queued on peer's write queue
4167                  * waiting to be processed by its write service
4168                  * procedure. Before the pointer to the peer transport
4169                  * structure is set to NULL, qenable the peer's write
4170                  * queue so that the queued up messages are processed.
4171                  */
4172                 if ((save_state == TS_DATA_XFER) ||
4173                     (save_state == TS_WIND_ORDREL) ||
4174                     (save_state == TS_WREQ_ORDREL))
4175                         TL_QENABLE(peer_tep);
4176                 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4177                 TL_UNCONNECT(peer_tep->te_conp);
4178                 if (!IS_SOCKET(tep)) {
4179                         /*
4180                          * unlink the streams
4181                          */
4182                         tep->te_wq->q_next = NULL;
4183                         peer_tep->te_wq->q_next = NULL;
4184                 }
4185                 TL_UNCONNECT(tep->te_conp);
4186         }
4187 }
4188 
4189 static void
4190 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4191 {
4192         if (!tep->te_closing)
4193                 tl_addr_req(mp, tep);
4194         else
4195                 freemsg(mp);
4196 
4197         tl_serializer_exit(tep);
4198         tl_refrele(tep);
4199 }
4200 
4201 static void
4202 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4203 {
4204         queue_t                 *wq;
4205         size_t                  ack_sz;
4206         mblk_t                  *ackmp;
4207         struct T_addr_ack       *taa;
4208 
4209         if (tep->te_closing) {
4210                 freemsg(mp);
4211                 return;
4212         }
4213 
4214         wq = tep->te_wq;
4215 
4216         /*
4217          * Note: T_ADDR_REQ message has only PRIM_type field
4218          * so it is already validated earlier.
4219          */
4220 
4221         if (IS_CLTS(tep) ||
4222             (tep->te_state > TS_WREQ_ORDREL) ||
4223             (tep->te_state < TS_DATA_XFER)) {
4224                 /*
4225                  * Either connectionless or connection oriented but not
4226                  * in connected data transfer state or half-closed states.
4227                  */
4228                 ack_sz = sizeof (struct T_addr_ack);
4229                 if (tep->te_state >= TS_IDLE)
4230                         /* is bound */
4231                         ack_sz += tep->te_alen;
4232                 ackmp = reallocb(mp, ack_sz, 0);
4233                 if (ackmp == NULL) {
4234                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4235                             SL_TRACE | SL_ERROR,
4236                             "tl_addr_req: reallocb failed"));
4237                         tl_memrecover(wq, mp, ack_sz);
4238                         return;
4239                 }
4240 
4241                 taa = (struct T_addr_ack *)ackmp->b_rptr;
4242 
4243                 bzero(taa, sizeof (struct T_addr_ack));
4244 
4245                 taa->PRIM_type = T_ADDR_ACK;
4246                 ackmp->b_datap->db_type = M_PCPROTO;
4247                 ackmp->b_wptr = (uchar_t *)&taa[1];
4248 
4249                 if (tep->te_state >= TS_IDLE) {
4250                         /* endpoint is bound */
4251                         taa->LOCADDR_length = tep->te_alen;
4252                         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4253 
4254                         bcopy(tep->te_abuf, ackmp->b_wptr,
4255                             tep->te_alen);
4256                         ackmp->b_wptr += tep->te_alen;
4257                         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4258                 }
4259 
4260                 (void) qreply(wq, ackmp);
4261         } else {
4262                 ASSERT(tep->te_state == TS_DATA_XFER ||
4263                     tep->te_state == TS_WIND_ORDREL ||
4264                     tep->te_state == TS_WREQ_ORDREL);
4265                 /* connection oriented in data transfer */
4266                 tl_connected_cots_addr_req(mp, tep);
4267         }
4268 }
4269 
4270 
4271 static void
4272 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4273 {
4274         tl_endpt_t              *peer_tep = tep->te_conp;
4275         size_t                  ack_sz;
4276         mblk_t                  *ackmp;
4277         struct T_addr_ack       *taa;
4278         uchar_t                 *addr_startp;
4279 
4280         if (tep->te_closing) {
4281                 freemsg(mp);
4282                 return;
4283         }
4284 
4285         if (peer_tep == NULL || peer_tep->te_closing) {
4286                 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4287                 return;
4288         }
4289 
4290         ASSERT(tep->te_state >= TS_IDLE);
4291 
4292         ack_sz = sizeof (struct T_addr_ack);
4293         ack_sz += T_ALIGN(tep->te_alen);
4294         ack_sz += peer_tep->te_alen;
4295 
4296         ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4297         if (ackmp == NULL) {
4298                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4299                     "tl_connected_cots_addr_req: reallocb failed"));
4300                 tl_memrecover(tep->te_wq, mp, ack_sz);
4301                 return;
4302         }
4303 
4304         taa = (struct T_addr_ack *)ackmp->b_rptr;
4305 
4306         /* endpoint is bound */
4307         taa->LOCADDR_length = tep->te_alen;
4308         taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4309 
4310         addr_startp = (uchar_t *)&taa[1];
4311 
4312         bcopy(tep->te_abuf, addr_startp,
4313             tep->te_alen);
4314 
4315         taa->REMADDR_length = peer_tep->te_alen;
4316         taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4317             taa->LOCADDR_length);
4318         addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4319         bcopy(peer_tep->te_abuf, addr_startp,
4320             peer_tep->te_alen);
4321         ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4322             taa->REMADDR_offset + peer_tep->te_alen;
4323         ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4324 
4325         putnext(tep->te_rq, ackmp);
4326 }
4327 
4328 static void
4329 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4330 {
4331         if (IS_CLTS(tep)) {
4332                 *ia = tl_clts_info_ack;
4333                 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4334         } else {
4335                 *ia = tl_cots_info_ack;
4336                 if (IS_COTSORD(tep))
4337                         ia->SERV_type = T_COTS_ORD;
4338         }
4339         ia->TIDU_size = tl_tidusz;
4340         ia->CURRENT_state = tep->te_state;
4341 }
4342 
4343 /*
4344  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
4345  * tl_wput.
4346  */
4347 static void
4348 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4349 {
4350         mblk_t                  *ackmp;
4351         t_uscalar_t             cap_bits1;
4352         struct T_capability_ack *tcap;
4353 
4354         if (tep->te_closing) {
4355                 freemsg(mp);
4356                 return;
4357         }
4358 
4359         cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4360 
4361         ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4362             M_PCPROTO, T_CAPABILITY_ACK);
4363         if (ackmp == NULL) {
4364                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4365                     "tl_capability_req: reallocb failed"));
4366                 tl_memrecover(tep->te_wq, mp,
4367                     sizeof (struct T_capability_ack));
4368                 return;
4369         }
4370 
4371         tcap = (struct T_capability_ack *)ackmp->b_rptr;
4372         tcap->CAP_bits1 = 0;
4373 
4374         if (cap_bits1 & TC1_INFO) {
4375                 tl_copy_info(&tcap->INFO_ack, tep);
4376                 tcap->CAP_bits1 |= TC1_INFO;
4377         }
4378 
4379         if (cap_bits1 & TC1_ACCEPTOR_ID) {
4380                 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4381                 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4382         }
4383 
4384         putnext(tep->te_rq, ackmp);
4385 }
4386 
4387 static void
4388 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4389 {
4390         if (!tep->te_closing)
4391                 tl_info_req(mp, tep);
4392         else
4393                 freemsg(mp);
4394 
4395         tl_serializer_exit(tep);
4396         tl_refrele(tep);
4397 }
4398 
4399 static void
4400 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4401 {
4402         mblk_t *ackmp;
4403 
4404         ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4405             M_PCPROTO, T_INFO_ACK);
4406         if (ackmp == NULL) {
4407                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4408                     "tl_info_req: reallocb failed"));
4409                 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4410                 return;
4411         }
4412 
4413         /*
4414          * fill in T_INFO_ACK contents
4415          */
4416         tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4417 
4418         /*
4419          * send ack message
4420          */
4421         putnext(tep->te_rq, ackmp);
4422 }
4423 
4424 /*
4425  * Handle M_DATA, T_data_req and T_optdata_req.
4426  * If this is a socket pass through T_optdata_req options unmodified.
4427  */
4428 static void
4429 tl_data(mblk_t *mp, tl_endpt_t *tep)
4430 {
4431         queue_t                 *wq = tep->te_wq;
4432         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4433         ssize_t                 msz = MBLKL(mp);
4434         tl_endpt_t              *peer_tep;
4435         queue_t                 *peer_rq;
4436         boolean_t               closing = tep->te_closing;
4437 
4438         if (IS_CLTS(tep)) {
4439                 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4440                     SL_TRACE | SL_ERROR,
4441                     "tl_wput:clts:unattached M_DATA"));
4442                 if (!closing) {
4443                         tl_merror(wq, mp, EPROTO);
4444                 } else {
4445                         freemsg(mp);
4446                 }
4447                 return;
4448         }
4449 
4450         /*
4451          * If the endpoint is closing it should still forward any data to the
4452          * peer (if it has one). If it is not allowed to forward it can just
4453          * free the message.
4454          */
4455         if (closing &&
4456             (tep->te_state != TS_DATA_XFER) &&
4457             (tep->te_state != TS_WREQ_ORDREL)) {
4458                 freemsg(mp);
4459                 return;
4460         }
4461 
4462         if (DB_TYPE(mp) == M_PROTO) {
4463                 if (prim->type == T_DATA_REQ &&
4464                     msz < sizeof (struct T_data_req)) {
4465                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4466                                 SL_TRACE | SL_ERROR,
4467                                 "tl_data:T_DATA_REQ:invalid message"));
4468                         if (!closing) {
4469                                 tl_merror(wq, mp, EPROTO);
4470                         } else {
4471                                 freemsg(mp);
4472                         }
4473                         return;
4474                 } else if (prim->type == T_OPTDATA_REQ &&
4475                     (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4476                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4477                             SL_TRACE | SL_ERROR,
4478                             "tl_data:T_OPTDATA_REQ:invalid message"));
4479                         if (!closing) {
4480                                 tl_merror(wq, mp, EPROTO);
4481                         } else {
4482                                 freemsg(mp);
4483                         }
4484                         return;
4485                 }
4486         }
4487 
4488         /*
4489          * connection oriented provider
4490          */
4491         switch (tep->te_state) {
4492         case TS_IDLE:
4493                 /*
4494                  * Other end not here - do nothing.
4495                  */
4496                 freemsg(mp);
4497                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4498                     "tl_data:cots with endpoint idle"));
4499                 return;
4500 
4501         case TS_DATA_XFER:
4502                 /* valid states */
4503                 if (tep->te_conp != NULL)
4504                         break;
4505 
4506                 if (tep->te_oconp == NULL) {
4507                         if (!closing) {
4508                                 tl_merror(wq, mp, EPROTO);
4509                         } else {
4510                                 freemsg(mp);
4511                         }
4512                         return;
4513                 }
4514                 /*
4515                  * For a socket the T_CONN_CON is sent early thus
4516                  * the peer might not yet have accepted the connection.
4517                  * If we are closing queue the packet with the T_CONN_IND.
4518                  * Otherwise defer processing the packet until the peer
4519                  * accepts the connection.
4520                  * Note that the queue is noenabled when we go into this
4521                  * state.
4522                  */
4523                 if (!closing) {
4524                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4525                             SL_TRACE | SL_ERROR,
4526                             "tl_data: ocon"));
4527                         TL_PUTBQ(tep, mp);
4528                         return;
4529                 }
4530                 if (DB_TYPE(mp) == M_PROTO) {
4531                         if (msz < sizeof (t_scalar_t)) {
4532                                 freemsg(mp);
4533                                 return;
4534                         }
4535                         /* reuse message block - just change REQ to IND */
4536                         if (prim->type == T_DATA_REQ)
4537                                 prim->type = T_DATA_IND;
4538                         else
4539                                 prim->type = T_OPTDATA_IND;
4540                 }
4541                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4542                 return;
4543 
4544         case TS_WREQ_ORDREL:
4545                 if (tep->te_conp == NULL) {
4546                         /*
4547                          * Other end closed - generate discon_ind
4548                          * with reason 0 to cause an EPIPE but no
4549                          * read side error on AF_UNIX sockets.
4550                          */
4551                         freemsg(mp);
4552                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4553                             SL_TRACE | SL_ERROR,
4554                             "tl_data: WREQ_ORDREL and no peer"));
4555                         tl_discon_ind(tep, 0);
4556                         return;
4557                 }
4558                 break;
4559 
4560         default:
4561                 /* invalid state for event TE_DATA_REQ */
4562                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4563                     "tl_data:cots:out of state"));
4564                 tl_merror(wq, mp, EPROTO);
4565                 return;
4566         }
4567         /*
4568          * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4569          * (State stays same on this event)
4570          */
4571 
4572         /*
4573          * get connected endpoint
4574          */
4575         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4576                 freemsg(mp);
4577                 /* Peer closed */
4578                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4579                     "tl_data: peer gone"));
4580                 return;
4581         }
4582 
4583         ASSERT(tep->te_serializer == peer_tep->te_serializer);
4584         peer_rq = peer_tep->te_rq;
4585 
4586         /*
4587          * Put it back if flow controlled
4588          * Note: Messages already on queue when we are closing is bounded
4589          * so we can ignore flow control.
4590          */
4591         if (!canputnext(peer_rq) && !closing) {
4592                 TL_PUTBQ(tep, mp);
4593                 return;
4594         }
4595 
4596         /*
4597          * validate peer state
4598          */
4599         switch (peer_tep->te_state) {
4600         case TS_DATA_XFER:
4601         case TS_WIND_ORDREL:
4602                 /* valid states */
4603                 break;
4604         default:
4605                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4606                     "tl_data:rx side:invalid state"));
4607                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4608                 return;
4609         }
4610         if (DB_TYPE(mp) == M_PROTO) {
4611                 /* reuse message block - just change REQ to IND */
4612                 if (prim->type == T_DATA_REQ)
4613                         prim->type = T_DATA_IND;
4614                 else
4615                         prim->type = T_OPTDATA_IND;
4616         }
4617         /*
4618          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4619          * (peer state stays same on this event)
4620          */
4621         /*
4622          * send data to connected peer
4623          */
4624         putnext(peer_rq, mp);
4625 }
4626 
4627 
4628 
4629 static void
4630 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4631 {
4632         queue_t                 *wq = tep->te_wq;
4633         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4634         ssize_t                 msz = MBLKL(mp);
4635         tl_endpt_t              *peer_tep;
4636         queue_t                 *peer_rq;
4637         boolean_t               closing = tep->te_closing;
4638 
4639         if (msz < sizeof (struct T_exdata_req)) {
4640                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4641                     "tl_exdata:invalid message"));
4642                 if (!closing) {
4643                         tl_merror(wq, mp, EPROTO);
4644                 } else {
4645                         freemsg(mp);
4646                 }
4647                 return;
4648         }
4649 
4650         /*
4651          * If the endpoint is closing it should still forward any data to the
4652          * peer (if it has one). If it is not allowed to forward it can just
4653          * free the message.
4654          */
4655         if (closing &&
4656             (tep->te_state != TS_DATA_XFER) &&
4657             (tep->te_state != TS_WREQ_ORDREL)) {
4658                 freemsg(mp);
4659                 return;
4660         }
4661 
4662         /*
4663          * validate state
4664          */
4665         switch (tep->te_state) {
4666         case TS_IDLE:
4667                 /*
4668                  * Other end not here - do nothing.
4669                  */
4670                 freemsg(mp);
4671                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4672                     "tl_exdata:cots with endpoint idle"));
4673                 return;
4674 
4675         case TS_DATA_XFER:
4676                 /* valid states */
4677                 if (tep->te_conp != NULL)
4678                         break;
4679 
4680                 if (tep->te_oconp == NULL) {
4681                         if (!closing) {
4682                                 tl_merror(wq, mp, EPROTO);
4683                         } else {
4684                                 freemsg(mp);
4685                         }
4686                         return;
4687                 }
4688                 /*
4689                  * For a socket the T_CONN_CON is sent early thus
4690                  * the peer might not yet have accepted the connection.
4691                  * If we are closing queue the packet with the T_CONN_IND.
4692                  * Otherwise defer processing the packet until the peer
4693                  * accepts the connection.
4694                  * Note that the queue is noenabled when we go into this
4695                  * state.
4696                  */
4697                 if (!closing) {
4698                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4699                             SL_TRACE | SL_ERROR,
4700                             "tl_exdata: ocon"));
4701                         TL_PUTBQ(tep, mp);
4702                         return;
4703                 }
4704                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4705                     "tl_exdata: closing socket ocon"));
4706                 prim->type = T_EXDATA_IND;
4707                 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4708                 return;
4709 
4710         case TS_WREQ_ORDREL:
4711                 if (tep->te_conp == NULL) {
4712                         /*
4713                          * Other end closed - generate discon_ind
4714                          * with reason 0 to cause an EPIPE but no
4715                          * read side error on AF_UNIX sockets.
4716                          */
4717                         freemsg(mp);
4718                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
4719                             SL_TRACE | SL_ERROR,
4720                             "tl_exdata: WREQ_ORDREL and no peer"));
4721                         tl_discon_ind(tep, 0);
4722                         return;
4723                 }
4724                 break;
4725 
4726         default:
4727                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4728                     SL_TRACE | SL_ERROR,
4729                     "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4730                     tep->te_state));
4731                 tl_merror(wq, mp, EPROTO);
4732                 return;
4733         }
4734         /*
4735          * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4736          * (state stays same on this event)
4737          */
4738 
4739         /*
4740          * get connected endpoint
4741          */
4742         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4743                 freemsg(mp);
4744                 /* Peer closed */
4745                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4746                     "tl_exdata: peer gone"));
4747                 return;
4748         }
4749 
4750         peer_rq = peer_tep->te_rq;
4751 
4752         /*
4753          * Put it back if flow controlled
4754          * Note: Messages already on queue when we are closing is bounded
4755          * so we can ignore flow control.
4756          */
4757         if (!canputnext(peer_rq) && !closing) {
4758                 TL_PUTBQ(tep, mp);
4759                 return;
4760         }
4761 
4762         /*
4763          * validate state on peer
4764          */
4765         switch (peer_tep->te_state) {
4766         case TS_DATA_XFER:
4767         case TS_WIND_ORDREL:
4768                 /* valid states */
4769                 break;
4770         default:
4771                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4772                     "tl_exdata:rx side:invalid state"));
4773                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4774                 return;
4775         }
4776         /*
4777          * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4778          * (peer state stays same on this event)
4779          */
4780         /*
4781          * reuse message block
4782          */
4783         prim->type = T_EXDATA_IND;
4784 
4785         /*
4786          * send data to connected peer
4787          */
4788         putnext(peer_rq, mp);
4789 }
4790 
4791 
4792 
4793 static void
4794 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4795 {
4796         queue_t                 *wq = tep->te_wq;
4797         union T_primitives      *prim = (union T_primitives *)mp->b_rptr;
4798         ssize_t                 msz = MBLKL(mp);
4799         tl_endpt_t              *peer_tep;
4800         queue_t                 *peer_rq;
4801         boolean_t               closing = tep->te_closing;
4802 
4803         if (msz < sizeof (struct T_ordrel_req)) {
4804                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4805                     "tl_ordrel:invalid message"));
4806                 if (!closing) {
4807                         tl_merror(wq, mp, EPROTO);
4808                 } else {
4809                         freemsg(mp);
4810                 }
4811                 return;
4812         }
4813 
4814         /*
4815          * validate state
4816          */
4817         switch (tep->te_state) {
4818         case TS_DATA_XFER:
4819         case TS_WREQ_ORDREL:
4820                 /* valid states */
4821                 if (tep->te_conp != NULL)
4822                         break;
4823 
4824                 if (tep->te_oconp == NULL)
4825                         break;
4826 
4827                 /*
4828                  * For a socket the T_CONN_CON is sent early thus
4829                  * the peer might not yet have accepted the connection.
4830                  * If we are closing queue the packet with the T_CONN_IND.
4831                  * Otherwise defer processing the packet until the peer
4832                  * accepts the connection.
4833                  * Note that the queue is noenabled when we go into this
4834                  * state.
4835                  */
4836                 if (!closing) {
4837                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
4838                             SL_TRACE | SL_ERROR,
4839                             "tl_ordlrel: ocon"));
4840                         TL_PUTBQ(tep, mp);
4841                         return;
4842                 }
4843                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4844                     "tl_ordlrel: closing socket ocon"));
4845                 prim->type = T_ORDREL_IND;
4846                 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4847                 return;
4848 
4849         default:
4850                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4851                     SL_TRACE | SL_ERROR,
4852                     "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4853                     tep->te_state));
4854                 if (!closing) {
4855                         tl_merror(wq, mp, EPROTO);
4856                 } else {
4857                         freemsg(mp);
4858                 }
4859                 return;
4860         }
4861         tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4862 
4863         /*
4864          * get connected endpoint
4865          */
4866         if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4867                 /* Peer closed */
4868                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4869                     "tl_ordrel: peer gone"));
4870                 freemsg(mp);
4871                 return;
4872         }
4873 
4874         peer_rq = peer_tep->te_rq;
4875 
4876         /*
4877          * Put it back if flow controlled except when we are closing.
4878          * Note: Messages already on queue when we are closing is bounded
4879          * so we can ignore flow control.
4880          */
4881         if (!canputnext(peer_rq) && !closing) {
4882                 TL_PUTBQ(tep, mp);
4883                 return;
4884         }
4885 
4886         /*
4887          * validate state on peer
4888          */
4889         switch (peer_tep->te_state) {
4890         case TS_DATA_XFER:
4891         case TS_WIND_ORDREL:
4892                 /* valid states */
4893                 break;
4894         default:
4895                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4896                     "tl_ordrel:rx side:invalid state"));
4897                 tl_merror(peer_tep->te_wq, mp, EPROTO);
4898                 return;
4899         }
4900         peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4901 
4902         /*
4903          * reuse message block
4904          */
4905         prim->type = T_ORDREL_IND;
4906         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4907             "tl_ordrel: send ordrel_ind"));
4908 
4909         /*
4910          * send data to connected peer
4911          */
4912         putnext(peer_rq, mp);
4913 }
4914 
4915 
4916 /*
4917  * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4918  */
4919 static void
4920 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4921 {
4922         size_t                  err_sz;
4923         tl_endpt_t              *tep;
4924         struct T_unitdata_req   *udreq;
4925         mblk_t                  *err_mp;
4926         t_scalar_t              alen;
4927         t_scalar_t              olen;
4928         struct T_uderror_ind    *uderr;
4929         uchar_t                 *addr_startp;
4930 
4931         err_sz = sizeof (struct T_uderror_ind);
4932         tep = (tl_endpt_t *)wq->q_ptr;
4933         udreq = (struct T_unitdata_req *)mp->b_rptr;
4934         alen = udreq->DEST_length;
4935         olen = udreq->OPT_length;
4936 
4937         if (alen > 0)
4938                 err_sz = T_ALIGN(err_sz + alen);
4939         if (olen > 0)
4940                 err_sz += olen;
4941 
4942         err_mp = allocb(err_sz, BPRI_MED);
4943         if (err_mp == NULL) {
4944                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4945                     "tl_uderr:allocb failure"));
4946                 /*
4947                  * Note: no rollback of state needed as it does
4948                  * not change in connectionless transport
4949                  */
4950                 tl_memrecover(wq, mp, err_sz);
4951                 return;
4952         }
4953 
4954         DB_TYPE(err_mp) = M_PROTO;
4955         err_mp->b_wptr = err_mp->b_rptr + err_sz;
4956         uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4957         uderr->PRIM_type = T_UDERROR_IND;
4958         uderr->ERROR_type = err;
4959         uderr->DEST_length = alen;
4960         uderr->OPT_length = olen;
4961         if (alen <= 0) {
4962                 uderr->DEST_offset = 0;
4963         } else {
4964                 uderr->DEST_offset =
4965                     (t_scalar_t)sizeof (struct T_uderror_ind);
4966                 addr_startp = mp->b_rptr + udreq->DEST_offset;
4967                 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4968                     (size_t)alen);
4969         }
4970         if (olen <= 0) {
4971                 uderr->OPT_offset = 0;
4972         } else {
4973                 uderr->OPT_offset =
4974                     (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4975                     uderr->DEST_length);
4976                 addr_startp = mp->b_rptr + udreq->OPT_offset;
4977                 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4978                     (size_t)olen);
4979         }
4980         freemsg(mp);
4981 
4982         /*
4983          * send indication message
4984          */
4985         tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4986 
4987         qreply(wq, err_mp);
4988 }
4989 
4990 static void
4991 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4992 {
4993         queue_t *wq = tep->te_wq;
4994 
4995         if (!tep->te_closing && (wq->q_first != NULL)) {
4996                 TL_PUTQ(tep, mp);
4997         } else {
4998                 if (tep->te_rq != NULL)
4999                         tl_unitdata(mp, tep);
5000                 else
5001                         freemsg(mp);
5002         }
5003 
5004         tl_serializer_exit(tep);
5005         tl_refrele(tep);
5006 }
5007 
5008 /*
5009  * Handle T_unitdata_req.
5010  * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5011  * If this is a socket pass through options unmodified.
5012  */
5013 static void
5014 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5015 {
5016         queue_t                 *wq = tep->te_wq;
5017         soux_addr_t             ux_addr;
5018         tl_addr_t               destaddr;
5019         uchar_t                 *addr_startp;
5020         tl_endpt_t              *peer_tep;
5021         struct T_unitdata_ind   *udind;
5022         struct T_unitdata_req   *udreq;
5023         ssize_t                 msz, ui_sz, reuse_mb_sz;
5024         t_scalar_t              alen, aoff, olen, ooff;
5025         t_scalar_t              oldolen = 0;
5026         cred_t                  *cr = NULL;
5027         pid_t                   cpid;
5028 
5029         udreq = (struct T_unitdata_req *)mp->b_rptr;
5030         msz = MBLKL(mp);
5031 
5032         /*
5033          * validate the state
5034          */
5035         if (tep->te_state != TS_IDLE) {
5036                 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5037                     SL_TRACE | SL_ERROR,
5038                     "tl_wput:T_CONN_REQ:out of state"));
5039                 tl_merror(wq, mp, EPROTO);
5040                 return;
5041         }
5042         /*
5043          * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5044          * (state does not change on this event)
5045          */
5046 
5047         /*
5048          * validate the message
5049          * Note: dereference fields in struct inside message only
5050          * after validating the message length.
5051          */
5052         if (msz < sizeof (struct T_unitdata_req)) {
5053                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5054                     "tl_unitdata:invalid message length"));
5055                 tl_merror(wq, mp, EINVAL);
5056                 return;
5057         }
5058         alen = udreq->DEST_length;
5059         aoff = udreq->DEST_offset;
5060         oldolen = olen = udreq->OPT_length;
5061         ooff = udreq->OPT_offset;
5062         if (olen == 0)
5063                 ooff = 0;
5064 
5065         if (IS_SOCKET(tep)) {
5066                 if ((alen != TL_SOUX_ADDRLEN) ||
5067                     (aoff < 0) ||
5068                     (aoff + alen > msz) ||
5069                     (olen < 0) || (ooff < 0) ||
5070                     ((olen > 0) && ((ooff + olen) > msz))) {
5071                         (void) (STRLOG(TL_ID, tep->te_minor,
5072                             1, SL_TRACE | SL_ERROR,
5073                             "tl_unitdata_req: invalid socket addr "
5074                             "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5075                             (int)msz, alen, aoff, olen, ooff));
5076                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5077                         return;
5078                 }
5079                 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5080 
5081                 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5082                     (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5083                         (void) (STRLOG(TL_ID, tep->te_minor,
5084                             1, SL_TRACE | SL_ERROR,
5085                             "tl_conn_req: invalid socket magic"));
5086                         tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5087                         return;
5088                 }
5089         } else {
5090                 if ((alen < 0) ||
5091                     (aoff < 0) ||
5092                     ((alen > 0) && ((aoff + alen) > msz)) ||
5093                     ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5094                     ((aoff + alen) < 0) ||
5095                     ((olen > 0) && ((ooff + olen) > msz)) ||
5096                     (olen < 0) ||
5097                     (ooff < 0) ||
5098                     ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5099                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
5100                                     SL_TRACE | SL_ERROR,
5101                                     "tl_unitdata:invalid unit data message"));
5102                         tl_merror(wq, mp, EINVAL);
5103                         return;
5104                 }
5105         }
5106 
5107         /* Options not supported unless it's a socket */
5108         if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5109                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5110                     "tl_unitdata:option use(unsupported) or zero len addr"));
5111                 tl_uderr(wq, mp, EPROTO);
5112                 return;
5113         }
5114 #ifdef DEBUG
5115         /*
5116          * Mild form of ASSERT()ion to detect broken TPI apps.
5117          * if (!assertion)
5118          *      log warning;
5119          */
5120         if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5121                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5122                     "tl_unitdata:addr overlaps TPI message"));
5123         }
5124 #endif
5125         /*
5126          * get destination endpoint
5127          */
5128         destaddr.ta_alen = alen;
5129         destaddr.ta_abuf = mp->b_rptr + aoff;
5130         destaddr.ta_zoneid = tep->te_zoneid;
5131 
5132         /*
5133          * Check whether the destination is the same that was used previously
5134          * and the destination endpoint is in the right state. If something is
5135          * wrong, find destination again and cache it.
5136          */
5137         peer_tep = tep->te_lastep;
5138 
5139         if ((peer_tep == NULL) || peer_tep->te_closing ||
5140             (peer_tep->te_state != TS_IDLE) ||
5141             !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5142                 /*
5143                  * Not the same as cached destination , need to find the right
5144                  * destination.
5145                  */
5146                 peer_tep = (IS_SOCKET(tep) ?
5147                     tl_sock_find_peer(tep, &ux_addr) :
5148                     tl_find_peer(tep, &destaddr));
5149 
5150                 if (peer_tep == NULL) {
5151                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5152                             SL_TRACE | SL_ERROR,
5153                             "tl_unitdata:no one at destination address"));
5154                         tl_uderr(wq, mp, ECONNRESET);
5155                         return;
5156                 }
5157 
5158                 /*
5159                  * Cache the new peer.
5160                  */
5161                 if (tep->te_lastep != NULL)
5162                         tl_refrele(tep->te_lastep);
5163 
5164                 tep->te_lastep = peer_tep;
5165         }
5166 
5167         if (peer_tep->te_state != TS_IDLE) {
5168                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5169                     "tl_unitdata:provider in invalid state"));
5170                 tl_uderr(wq, mp, EPROTO);
5171                 return;
5172         }
5173 
5174         ASSERT(peer_tep->te_rq != NULL);
5175 
5176         /*
5177          * Put it back if flow controlled except when we are closing.
5178          * Note: Messages already on queue when we are closing is bounded
5179          * so we can ignore flow control.
5180          */
5181         if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5182                 /* record what we are flow controlled on */
5183                 if (tep->te_flowq != NULL) {
5184                         list_remove(&tep->te_flowq->te_flowlist, tep);
5185                 }
5186                 list_insert_head(&peer_tep->te_flowlist, tep);
5187                 tep->te_flowq = peer_tep;
5188                 TL_PUTBQ(tep, mp);
5189                 return;
5190         }
5191         /*
5192          * prepare indication message
5193          */
5194 
5195         /*
5196          * calculate length of message
5197          */
5198         if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5199                 cr = msg_getcred(mp, &cpid);
5200                 ASSERT(cr != NULL);
5201 
5202                 if (peer_tep->te_flag & TL_SETCRED) {
5203                         ASSERT(olen == 0);
5204                         olen = (t_scalar_t)sizeof (struct opthdr) +
5205                             OPTLEN(sizeof (tl_credopt_t));
5206                                                 /* 1 option only */
5207                 } else if (peer_tep->te_flag & TL_SETUCRED) {
5208                         ASSERT(olen == 0);
5209                         olen = (t_scalar_t)sizeof (struct opthdr) +
5210                             OPTLEN(ucredminsize(cr));
5211                                                 /* 1 option only */
5212                 } else {
5213                         /* Possibly more than one option */
5214                         olen += (t_scalar_t)sizeof (struct T_opthdr) +
5215                             OPTLEN(ucredminsize(cr));
5216                 }
5217         }
5218 
5219         ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5220         reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5221 
5222         /*
5223          * If the unitdata_ind fits and we are not adding options
5224          * reuse the udreq mblk.
5225          *
5226          * Otherwise, it is possible we need to append an option if one of the
5227          * te_flag bits is set. This requires extra space in the data block for
5228          * the additional option but the traditional technique used below to
5229          * allocate a new block and copy into it will not work when there is a
5230          * message block with a free pointer (since we don't know anything
5231          * about the layout of the data, pointers referencing or within the
5232          * data, etc.). To handle this possibility the upper layers may have
5233          * preallocated some space to use for appending an option. We check the
5234          * overall mblock size against the size we need ('reuse_mb_sz' with the
5235          * original address length [alen] to ensure we won't overrun the
5236          * current mblk data size) to see if there is free space and thus
5237          * avoid allocating a new message block.
5238          */
5239         if (msz >= ui_sz && alen >= tep->te_alen &&
5240             !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5241                 /*
5242                  * Reuse the original mblk. Leave options in place.
5243                  */
5244                 udind = (struct T_unitdata_ind *)mp->b_rptr;
5245                 udind->PRIM_type = T_UNITDATA_IND;
5246                 udind->SRC_length = tep->te_alen;
5247                 addr_startp = mp->b_rptr + udind->SRC_offset;
5248                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5249 
5250         } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5251             mp->b_datap->db_frtnp != NULL) {
5252                 /*
5253                  * We have a message block with a free pointer, but extra space
5254                  * has been pre-allocated for us in case we need to append an
5255                  * option. Reuse the original mblk, leaving existing options in
5256                  * place.
5257                  */
5258                 udind = (struct T_unitdata_ind *)mp->b_rptr;
5259                 udind->PRIM_type = T_UNITDATA_IND;
5260                 udind->SRC_length = tep->te_alen;
5261                 addr_startp = mp->b_rptr + udind->SRC_offset;
5262                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5263 
5264                 if (peer_tep->te_flag &
5265                     (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5266                         ASSERT(cr != NULL);
5267                         /*
5268                          * We're appending one new option here after the
5269                          * original ones.
5270                          */
5271                         tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5272                             cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5273                 }
5274 
5275         } else if (mp->b_datap->db_frtnp != NULL) {
5276                 /*
5277                  * The next block creates a new mp and tries to copy the data
5278                  * block into it, but that cannot handle a message with a free
5279                  * pointer (for more details see the comment in kstrputmsg()
5280                  * where dupmsg() is called). Since we can never properly
5281                  * duplicate the mp while also extending the data, just error
5282                  * out now.
5283                  */
5284                 tl_uderr(wq, mp, EPROTO);
5285                 return;
5286         } else {
5287                 /* Allocate a new T_unitdata_ind message */
5288                 mblk_t *ui_mp;
5289 
5290                 ui_mp = allocb(ui_sz, BPRI_MED);
5291                 if (ui_mp == NULL) {
5292                         (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5293                             "tl_unitdata:allocb failure:message queued"));
5294                         tl_memrecover(wq, mp, ui_sz);
5295                         return;
5296                 }
5297 
5298                 /*
5299                  * fill in T_UNITDATA_IND contents
5300                  */
5301                 DB_TYPE(ui_mp) = M_PROTO;
5302                 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5303                 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5304                 udind->PRIM_type = T_UNITDATA_IND;
5305                 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5306                 udind->SRC_length = tep->te_alen;
5307                 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5308                 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5309                 udind->OPT_offset =
5310                     (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5311                 udind->OPT_length = olen;
5312                 if (peer_tep->te_flag &
5313                     (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5314 
5315                         if (oldolen != 0) {
5316                                 bcopy((void *)((uintptr_t)udreq + ooff),
5317                                     (void *)((uintptr_t)udind +
5318                                     udind->OPT_offset),
5319                                     oldolen);
5320                         }
5321                         ASSERT(cr != NULL);
5322 
5323                         tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5324                             oldolen, cr, cpid,
5325                             peer_tep->te_flag, peer_tep->te_credp);
5326                 } else {
5327                         bcopy((void *)((uintptr_t)udreq + ooff),
5328                             (void *)((uintptr_t)udind + udind->OPT_offset),
5329                             olen);
5330                 }
5331 
5332                 /*
5333                  * relink data blocks from mp to ui_mp
5334                  */
5335                 ui_mp->b_cont = mp->b_cont;
5336                 freeb(mp);
5337                 mp = ui_mp;
5338         }
5339         /*
5340          * send indication message
5341          */
5342         peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5343         putnext(peer_tep->te_rq, mp);
5344 }
5345 
5346 
5347 
5348 /*
5349  * Check if a given addr is in use.
5350  * Endpoint ptr returned or NULL if not found.
5351  * The name space is separate for each mode. This implies that
5352  * sockets get their own name space.
5353  */
5354 static tl_endpt_t *
5355 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5356 {
5357         tl_endpt_t *peer_tep = NULL;
5358         int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5359             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5360 
5361         ASSERT(!IS_SOCKET(tep));
5362 
5363         ASSERT(ap != NULL && ap->ta_alen > 0);
5364         ASSERT(ap->ta_zoneid == tep->te_zoneid);
5365         ASSERT(ap->ta_abuf != NULL);
5366         EQUIV(rc == 0, peer_tep != NULL);
5367         IMPLY(rc == 0,
5368             (tep->te_zoneid == peer_tep->te_zoneid) &&
5369             (tep->te_transport == peer_tep->te_transport));
5370 
5371         if ((rc == 0) && (peer_tep->te_closing)) {
5372                 tl_refrele(peer_tep);
5373                 peer_tep = NULL;
5374         }
5375 
5376         return (peer_tep);
5377 }
5378 
5379 /*
5380  * Find peer for a socket based on unix domain address.
5381  * For implicit addresses our peer can be found by minor number in ai hash. For
5382  * explicit binds we look vnode address at addr_hash.
5383  */
5384 static tl_endpt_t *
5385 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5386 {
5387         tl_endpt_t *peer_tep = NULL;
5388         mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5389             tep->te_aihash : tep->te_addrhash;
5390         int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5391             (mod_hash_val_t *)&peer_tep, tl_find_callback);
5392 
5393         ASSERT(IS_SOCKET(tep));
5394         EQUIV(rc == 0, peer_tep != NULL);
5395         IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5396 
5397         if (peer_tep != NULL) {
5398                 /* Don't attempt to use closing peer. */
5399                 if (peer_tep->te_closing)
5400                         goto errout;
5401 
5402                 /*
5403                  * Cross-zone unix sockets are permitted, but for Trusted
5404                  * Extensions only, the "server" for these must be in the
5405                  * global zone.
5406                  */
5407                 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5408                     is_system_labeled() &&
5409                     (peer_tep->te_zoneid != GLOBAL_ZONEID))
5410                         goto errout;
5411         }
5412 
5413         return (peer_tep);
5414 
5415 errout:
5416         tl_refrele(peer_tep);
5417         return (NULL);
5418 }
5419 
5420 /*
5421  * Generate a free addr and return it in struct pointed by ap
5422  * but allocating space for address buffer.
5423  * The generated address will be at least 4 bytes long and, if req->ta_alen
5424  * exceeds 4 bytes, be req->ta_alen bytes long.
5425  *
5426  * If address is found it will be inserted in the hash.
5427  *
5428  * If req->ta_alen is larger than the default alen (4 bytes) the last
5429  * alen-4 bytes will always be the same as in req.
5430  *
5431  * Return 0 for failure.
5432  * Return non-zero for success.
5433  */
5434 static boolean_t
5435 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5436 {
5437         t_scalar_t      alen;
5438         uint32_t        loopcnt;        /* Limit loop to 2^32 */
5439 
5440         ASSERT(tep->te_hash_hndl != NULL);
5441         ASSERT(!IS_SOCKET(tep));
5442 
5443         if (tep->te_hash_hndl == NULL)
5444                 return (B_FALSE);
5445 
5446         /*
5447          * check if default addr is in use
5448          * if it is - bump it and try again
5449          */
5450         if (req == NULL) {
5451                 alen = sizeof (uint32_t);
5452         } else {
5453                 alen = max(req->ta_alen, sizeof (uint32_t));
5454                 ASSERT(tep->te_zoneid == req->ta_zoneid);
5455         }
5456 
5457         if (tep->te_alen < alen) {
5458                 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5459 
5460                 /*
5461                  * Not enough space in tep->ta_ap to hold the address,
5462                  * allocate a bigger space.
5463                  */
5464                 if (abuf == NULL)
5465                         return (B_FALSE);
5466 
5467                 if (tep->te_alen > 0)
5468                         kmem_free(tep->te_abuf, tep->te_alen);
5469 
5470                 tep->te_alen = alen;
5471                 tep->te_abuf = abuf;
5472         }
5473 
5474         /* Copy in the address in req */
5475         if (req != NULL) {
5476                 ASSERT(alen >= req->ta_alen);
5477                 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5478         }
5479 
5480         /*
5481          * First try minor number then try default addresses.
5482          */
5483         bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5484 
5485         for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5486                 if (mod_hash_insert_reserve(tep->te_addrhash,
5487                     (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5488                     tep->te_hash_hndl) == 0) {
5489                         /*
5490                          * found free address
5491                          */
5492                         tep->te_flag |= TL_ADDRHASHED;
5493                         tep->te_hash_hndl = NULL;
5494 
5495                         return (B_TRUE); /* successful return */
5496                 }
5497                 /*
5498                  * Use default address.
5499                  */
5500                 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5501                 atomic_inc_32(&tep->te_defaddr);
5502         }
5503 
5504         /*
5505          * Failed to find anything.
5506          */
5507         (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5508             "tl_get_any_addr:looped 2^32 times"));
5509         return (B_FALSE);
5510 }
5511 
5512 /*
5513  * reallocb + set r/w ptrs to reflect size.
5514  */
5515 static mblk_t *
5516 tl_resizemp(mblk_t *mp, ssize_t new_size)
5517 {
5518         if ((mp = reallocb(mp, new_size, 0)) == NULL)
5519                 return (NULL);
5520 
5521         mp->b_rptr = DB_BASE(mp);
5522         mp->b_wptr = mp->b_rptr + new_size;
5523         return (mp);
5524 }
5525 
5526 static void
5527 tl_cl_backenable(tl_endpt_t *tep)
5528 {
5529         list_t *l = &tep->te_flowlist;
5530         tl_endpt_t *elp;
5531 
5532         ASSERT(IS_CLTS(tep));
5533 
5534         for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5535                 ASSERT(tep->te_ser == elp->te_ser);
5536                 ASSERT(elp->te_flowq == tep);
5537                 if (!elp->te_closing)
5538                         TL_QENABLE(elp);
5539                 elp->te_flowq = NULL;
5540                 list_remove(l, elp);
5541         }
5542 }
5543 
5544 /*
5545  * Unconnect endpoints.
5546  */
5547 static void
5548 tl_co_unconnect(tl_endpt_t *tep)
5549 {
5550         tl_endpt_t      *peer_tep = tep->te_conp;
5551         tl_endpt_t      *srv_tep = tep->te_oconp;
5552         list_t          *l;
5553         tl_icon_t       *tip;
5554         tl_endpt_t      *cl_tep;
5555         mblk_t          *d_mp;
5556 
5557         ASSERT(IS_COTS(tep));
5558         /*
5559          * If our peer is closing, don't use it.
5560          */
5561         if ((peer_tep != NULL) && peer_tep->te_closing) {
5562                 TL_UNCONNECT(tep->te_conp);
5563                 peer_tep = NULL;
5564         }
5565         if ((srv_tep != NULL) && srv_tep->te_closing) {
5566                 TL_UNCONNECT(tep->te_oconp);
5567                 srv_tep = NULL;
5568         }
5569 
5570         if (tep->te_nicon > 0) {
5571                 l = &tep->te_iconp;
5572                 /*
5573                  * If incoming requests pending, change state
5574                  * of clients on disconnect ind event and send
5575                  * discon_ind pdu to modules above them
5576                  * for server: all clients get disconnect
5577                  */
5578 
5579                 while (tep->te_nicon > 0) {
5580                         tip    = list_head(l);
5581                         cl_tep = tip->ti_tep;
5582 
5583                         if (cl_tep == NULL) {
5584                                 tl_freetip(tep, tip);
5585                                 continue;
5586                         }
5587 
5588                         if (cl_tep->te_oconp != NULL) {
5589                                 ASSERT(cl_tep != cl_tep->te_oconp);
5590                                 TL_UNCONNECT(cl_tep->te_oconp);
5591                         }
5592 
5593                         if (cl_tep->te_closing) {
5594                                 tl_freetip(tep, tip);
5595                                 continue;
5596                         }
5597 
5598                         enableok(cl_tep->te_wq);
5599                         TL_QENABLE(cl_tep);
5600                         d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5601                         if (d_mp != NULL) {
5602                                 cl_tep->te_state = TS_IDLE;
5603                                 putnext(cl_tep->te_rq, d_mp);
5604                         } else {
5605                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5606                                     SL_TRACE | SL_ERROR,
5607                                     "tl_co_unconnect:icmng: "
5608                                     "allocb failure"));
5609                         }
5610                         tl_freetip(tep, tip);
5611                 }
5612         } else if (srv_tep != NULL) {
5613                 /*
5614                  * If outgoing request pending, change state
5615                  * of server on discon ind event
5616                  */
5617 
5618                 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5619                     IS_COTSORD(srv_tep) &&
5620                     !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5621                         /*
5622                          * Queue ordrel_ind for server to be picked up
5623                          * when the connection is accepted.
5624                          */
5625                         d_mp = tl_ordrel_ind_alloc();
5626                 } else {
5627                         /*
5628                          * send discon_ind to server
5629                          */
5630                         d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5631                 }
5632                 if (d_mp == NULL) {
5633                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5634                             SL_TRACE | SL_ERROR,
5635                             "tl_co_unconnect:outgoing:allocb failure"));
5636                         TL_UNCONNECT(tep->te_oconp);
5637                         goto discon_peer;
5638                 }
5639 
5640                 /*
5641                  * If this is a socket the T_DISCON_IND is queued with
5642                  * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5643                  * from the list of pending connections.
5644                  * Note that when te_oconp is set the peer better have
5645                  * a t_connind_t for the client.
5646                  */
5647                 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5648                         /*
5649                          * Queue the disconnection message.
5650                          */
5651                         tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5652                 } else {
5653                         tip = tl_icon_find(srv_tep, tep->te_seqno);
5654                         if (tip == NULL) {
5655                                 freemsg(d_mp);
5656                         } else {
5657                                 ASSERT(tep == tip->ti_tep);
5658                                 ASSERT(tep->te_ser == srv_tep->te_ser);
5659                                 /*
5660                                  * Delete tip from the server list.
5661                                  */
5662                                 if (srv_tep->te_nicon == 1) {
5663                                         srv_tep->te_state =
5664                                             NEXTSTATE(TE_DISCON_IND2,
5665                                             srv_tep->te_state);
5666                                 } else {
5667                                         srv_tep->te_state =
5668                                             NEXTSTATE(TE_DISCON_IND3,
5669                                             srv_tep->te_state);
5670                                 }
5671                                 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5672                                     T_DISCON_IND);
5673                                 putnext(srv_tep->te_rq, d_mp);
5674                                 tl_freetip(srv_tep, tip);
5675                         }
5676                         TL_UNCONNECT(tep->te_oconp);
5677                         srv_tep = NULL;
5678                 }
5679         } else if (peer_tep != NULL) {
5680                 /*
5681                  * unconnect existing connection
5682                  * If connected, change state of peer on
5683                  * discon ind event and send discon ind pdu
5684                  * to module above it
5685                  */
5686 
5687                 ASSERT(tep->te_ser == peer_tep->te_ser);
5688                 if (IS_COTSORD(peer_tep) &&
5689                     (peer_tep->te_state == TS_WIND_ORDREL ||
5690                     peer_tep->te_state == TS_DATA_XFER)) {
5691                         /*
5692                          * send ordrel ind
5693                          */
5694                         (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5695                         "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5696                             peer_tep->te_state,
5697                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5698                         d_mp = tl_ordrel_ind_alloc();
5699                         if (d_mp == NULL) {
5700                                 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5701                                     SL_TRACE | SL_ERROR,
5702                                     "tl_co_unconnect:connected:"
5703                                     "allocb failure"));
5704                                 /*
5705                                  * Continue with cleaning up peer as
5706                                  * this side may go away with the close
5707                                  */
5708                                 TL_QENABLE(peer_tep);
5709                                 goto discon_peer;
5710                         }
5711                         peer_tep->te_state =
5712                             NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5713 
5714                         putnext(peer_tep->te_rq, d_mp);
5715                         /*
5716                          * Handle flow control case.  This will generate
5717                          * a t_discon_ind message with reason 0 if there
5718                          * is data queued on the write side.
5719                          */
5720                         TL_QENABLE(peer_tep);
5721                 } else if (IS_COTSORD(peer_tep) &&
5722                     peer_tep->te_state == TS_WREQ_ORDREL) {
5723                         /*
5724                          * Sent an ordrel_ind. We send a discon with
5725                          * with error 0 to inform that the peer is gone.
5726                          */
5727                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5728                             SL_TRACE | SL_ERROR,
5729                             "tl_co_unconnect: discon in state %d",
5730                             tep->te_state));
5731                         tl_discon_ind(peer_tep, 0);
5732                 } else {
5733                         (void) (STRLOG(TL_ID, tep->te_minor, 3,
5734                             SL_TRACE | SL_ERROR,
5735                             "tl_co_unconnect: state %d", tep->te_state));
5736                         tl_discon_ind(peer_tep, ECONNRESET);
5737                 }
5738 
5739 discon_peer:
5740                 /*
5741                  * Disconnect cross-pointers only for close
5742                  */
5743                 if (tep->te_closing) {
5744                         peer_tep = tep->te_conp;
5745                         TL_REMOVE_PEER(peer_tep->te_conp);
5746                         TL_REMOVE_PEER(tep->te_conp);
5747                 }
5748         }
5749 }
5750 
5751 /*
5752  * Note: The following routine does not recover from allocb()
5753  * failures
5754  * The reason should be from the <sys/errno.h> space.
5755  */
5756 static void
5757 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5758 {
5759         mblk_t *d_mp;
5760 
5761         if (tep->te_closing)
5762                 return;
5763 
5764         /*
5765          * flush the queues.
5766          */
5767         flushq(tep->te_rq, FLUSHDATA);
5768         (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5769 
5770         /*
5771          * send discon ind
5772          */
5773         d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5774         if (d_mp == NULL) {
5775                 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5776                     "tl_discon_ind:allocb failure"));
5777                 return;
5778         }
5779         tep->te_state = TS_IDLE;
5780         putnext(tep->te_rq, d_mp);
5781 }
5782 
5783 /*
5784  * Note: The following routine does not recover from allocb()
5785  * failures
5786  * The reason should be from the <sys/errno.h> space.
5787  */
5788 static mblk_t *
5789 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5790 {
5791         mblk_t *mp;
5792         struct T_discon_ind *tdi;
5793 
5794         if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5795                 DB_TYPE(mp) = M_PROTO;
5796                 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5797                 tdi = (struct T_discon_ind *)mp->b_rptr;
5798                 tdi->PRIM_type = T_DISCON_IND;
5799                 tdi->DISCON_reason = reason;
5800                 tdi->SEQ_number = seqnum;
5801         }
5802         return (mp);
5803 }
5804 
5805 
5806 /*
5807  * Note: The following routine does not recover from allocb()
5808  * failures
5809  */
5810 static mblk_t *
5811 tl_ordrel_ind_alloc(void)
5812 {
5813         mblk_t *mp;
5814         struct T_ordrel_ind *toi;
5815 
5816         if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5817                 DB_TYPE(mp) = M_PROTO;
5818                 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5819                 toi = (struct T_ordrel_ind *)mp->b_rptr;
5820                 toi->PRIM_type = T_ORDREL_IND;
5821         }
5822         return (mp);
5823 }
5824 
5825 
5826 /*
5827  * Lookup the seqno in the list of queued connections.
5828  */
5829 static tl_icon_t *
5830 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5831 {
5832         list_t *l = &tep->te_iconp;
5833         tl_icon_t *tip = list_head(l);
5834 
5835         ASSERT(seqno != 0);
5836 
5837         for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5838                 ;
5839 
5840         return (tip);
5841 }
5842 
5843 /*
5844  * Queue data for a given T_CONN_IND while verifying that redundant
5845  * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5846  * Used when the originator of the connection closes.
5847  */
5848 static void
5849 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5850 {
5851         tl_icon_t               *tip;
5852         mblk_t                  **mpp, *mp;
5853         int                     prim, nprim;
5854 
5855         if (nmp->b_datap->db_type == M_PROTO)
5856                 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5857         else
5858                 nprim = -1;     /* M_DATA */
5859 
5860         tip = tl_icon_find(tep, seqno);
5861         if (tip == NULL) {
5862                 freemsg(nmp);
5863                 return;
5864         }
5865 
5866         ASSERT(tip->ti_seqno != 0);
5867         mpp = &tip->ti_mp;
5868         while (*mpp != NULL) {
5869                 mp = *mpp;
5870 
5871                 if (mp->b_datap->db_type == M_PROTO)
5872                         prim = ((union T_primitives *)mp->b_rptr)->type;
5873                 else
5874                         prim = -1;      /* M_DATA */
5875 
5876                 /*
5877                  * Allow nothing after a T_DISCON_IND
5878                  */
5879                 if (prim == T_DISCON_IND) {
5880                         freemsg(nmp);
5881                         return;
5882                 }
5883                 /*
5884                  * Only allow a T_DISCON_IND after an T_ORDREL_IND
5885                  */
5886                 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5887                         freemsg(nmp);
5888                         return;
5889                 }
5890                 mpp = &(mp->b_next);
5891         }
5892         *mpp = nmp;
5893 }
5894 
5895 /*
5896  * Verify if a certain TPI primitive exists on the connind queue.
5897  * Use prim -1 for M_DATA.
5898  * Return non-zero if found.
5899  */
5900 static boolean_t
5901 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5902 {
5903         tl_icon_t *tip = tl_icon_find(tep, seqno);
5904         boolean_t found = B_FALSE;
5905 
5906         if (tip != NULL) {
5907                 mblk_t *mp;
5908                 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5909                         found = (DB_TYPE(mp) == M_PROTO &&
5910                             ((union T_primitives *)mp->b_rptr)->type == prim);
5911                 }
5912         }
5913         return (found);
5914 }
5915 
5916 /*
5917  * Send the b_next mblk chain that has accumulated before the connection
5918  * was accepted. Perform the necessary state transitions.
5919  */
5920 static void
5921 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5922 {
5923         mblk_t                  *mp;
5924         union T_primitives      *primp;
5925 
5926         if (tep->te_closing) {
5927                 tl_icon_freemsgs(mpp);
5928                 return;
5929         }
5930 
5931         ASSERT(tep->te_state == TS_DATA_XFER);
5932         ASSERT(tep->te_rq->q_first == NULL);
5933 
5934         while ((mp = *mpp) != NULL) {
5935                 *mpp = mp->b_next;
5936                 mp->b_next = NULL;
5937 
5938                 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5939                 switch (DB_TYPE(mp)) {
5940                 default:
5941                         freemsg(mp);
5942                         break;
5943                 case M_DATA:
5944                         putnext(tep->te_rq, mp);
5945                         break;
5946                 case M_PROTO:
5947                         primp = (union T_primitives *)mp->b_rptr;
5948                         switch (primp->type) {
5949                         case T_UNITDATA_IND:
5950                         case T_DATA_IND:
5951                         case T_OPTDATA_IND:
5952                         case T_EXDATA_IND:
5953                                 putnext(tep->te_rq, mp);
5954                                 break;
5955                         case T_ORDREL_IND:
5956                                 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5957                                     tep->te_state);
5958                                 putnext(tep->te_rq, mp);
5959                                 break;
5960                         case T_DISCON_IND:
5961                                 tep->te_state = TS_IDLE;
5962                                 putnext(tep->te_rq, mp);
5963                                 break;
5964                         default:
5965 #ifdef DEBUG
5966                                 cmn_err(CE_PANIC,
5967                                     "tl_icon_sendmsgs: unknown primitive");
5968 #endif /* DEBUG */
5969                                 freemsg(mp);
5970                                 break;
5971                         }
5972                         break;
5973                 }
5974         }
5975 }
5976 
5977 /*
5978  * Free the b_next mblk chain that has accumulated before the connection
5979  * was accepted.
5980  */
5981 static void
5982 tl_icon_freemsgs(mblk_t **mpp)
5983 {
5984         mblk_t *mp;
5985 
5986         while ((mp = *mpp) != NULL) {
5987                 *mpp = mp->b_next;
5988                 mp->b_next = NULL;
5989                 freemsg(mp);
5990         }
5991 }
5992 
5993 /*
5994  * Send M_ERROR
5995  * Note: assumes caller ensured enough space in mp or enough
5996  *      memory available. Does not attempt recovery from allocb()
5997  *      failures
5998  */
5999 
6000 static void
6001 tl_merror(queue_t *wq, mblk_t *mp, int error)
6002 {
6003         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6004 
6005         if (tep->te_closing) {
6006                 freemsg(mp);
6007                 return;
6008         }
6009 
6010         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6011             SL_TRACE | SL_ERROR,
6012             "tl_merror: tep=%p, err=%d", (void *)tep, error));
6013 
6014         /*
6015          * flush all messages on queue. we are shutting
6016          * the stream down on fatal error
6017          */
6018         flushq(wq, FLUSHALL);
6019         if (IS_COTS(tep)) {
6020                 /* connection oriented - unconnect endpoints */
6021                 tl_co_unconnect(tep);
6022         }
6023         if (mp->b_cont) {
6024                 freemsg(mp->b_cont);
6025                 mp->b_cont = NULL;
6026         }
6027 
6028         if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6029                 freemsg(mp);
6030                 mp = allocb(1, BPRI_HI);
6031                 if (mp == NULL) {
6032                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6033                             SL_TRACE | SL_ERROR,
6034                             "tl_merror:M_PROTO: out of memory"));
6035                         return;
6036                 }
6037         }
6038         if (mp) {
6039                 DB_TYPE(mp) = M_ERROR;
6040                 mp->b_rptr = DB_BASE(mp);
6041                 *mp->b_rptr = (char)error;
6042                 mp->b_wptr = mp->b_rptr + sizeof (char);
6043                 qreply(wq, mp);
6044         } else {
6045                 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6046         }
6047 }
6048 
6049 static void
6050 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6051 {
6052         ASSERT(cr != NULL);
6053 
6054         if (flag & TL_SETCRED) {
6055                 struct opthdr *opt = (struct opthdr *)buf;
6056                 tl_credopt_t *tlcred;
6057 
6058                 opt->level = TL_PROT_LEVEL;
6059                 opt->name = TL_OPT_PEER_CRED;
6060                 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6061 
6062                 tlcred = (tl_credopt_t *)(opt + 1);
6063                 tlcred->tc_uid = crgetuid(cr);
6064                 tlcred->tc_gid = crgetgid(cr);
6065                 tlcred->tc_ruid = crgetruid(cr);
6066                 tlcred->tc_rgid = crgetrgid(cr);
6067                 tlcred->tc_suid = crgetsuid(cr);
6068                 tlcred->tc_sgid = crgetsgid(cr);
6069                 tlcred->tc_ngroups = crgetngroups(cr);
6070         } else if (flag & TL_SETUCRED) {
6071                 struct opthdr *opt = (struct opthdr *)buf;
6072 
6073                 opt->level = TL_PROT_LEVEL;
6074                 opt->name = TL_OPT_PEER_UCRED;
6075                 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6076 
6077                 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6078         } else {
6079                 struct T_opthdr *topt = (struct T_opthdr *)buf;
6080                 ASSERT(flag & TL_SOCKUCRED);
6081 
6082                 topt->level = SOL_SOCKET;
6083                 topt->name = SCM_UCRED;
6084                 topt->len = ucredminsize(cr) + sizeof (*topt);
6085                 topt->status = 0;
6086                 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6087         }
6088 }
6089 
6090 /* ARGSUSED */
6091 static int
6092 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6093 {
6094         /* no default value processed in protocol specific code currently */
6095         return (-1);
6096 }
6097 
6098 /* ARGSUSED */
6099 static int
6100 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6101 {
6102         int len;
6103         tl_endpt_t *tep;
6104         int *valp;
6105 
6106         tep = (tl_endpt_t *)wq->q_ptr;
6107 
6108         len = 0;
6109 
6110         /*
6111          * Assumes: option level and name sanity check done elsewhere
6112          */
6113 
6114         switch (level) {
6115         case SOL_SOCKET:
6116                 if (!IS_SOCKET(tep))
6117                         break;
6118                 switch (name) {
6119                 case SO_RECVUCRED:
6120                         len = sizeof (int);
6121                         valp = (int *)ptr;
6122                         *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6123                         break;
6124                 default:
6125                         break;
6126                 }
6127                 break;
6128         case TL_PROT_LEVEL:
6129                 switch (name) {
6130                 case TL_OPT_PEER_CRED:
6131                 case TL_OPT_PEER_UCRED:
6132                         /*
6133                          * option not supposed to retrieved directly
6134                          * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6135                          * when some internal flags set by other options
6136                          * Direct retrieval always designed to fail(ignored)
6137                          * for this option.
6138                          */
6139                         break;
6140                 }
6141         }
6142         return (len);
6143 }
6144 
6145 /* ARGSUSED */
6146 static int
6147 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6148     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6149     cred_t *cr)
6150 {
6151         int error;
6152         tl_endpt_t *tep;
6153 
6154         tep = (tl_endpt_t *)wq->q_ptr;
6155 
6156         error = 0;              /* NOERROR */
6157 
6158         /*
6159          * Assumes: option level and name sanity checks done elsewhere
6160          */
6161 
6162         switch (level) {
6163         case SOL_SOCKET:
6164                 if (!IS_SOCKET(tep)) {
6165                         error = EINVAL;
6166                         break;
6167                 }
6168                 /*
6169                  * TBD: fill in other AF_UNIX socket options and then stop
6170                  * returning error.
6171                  */
6172                 switch (name) {
6173                 case SO_RECVUCRED:
6174                         /*
6175                          * We only support this for datagram sockets;
6176                          * getpeerucred handles the connection oriented
6177                          * transports.
6178                          */
6179                         if (!IS_CLTS(tep)) {
6180                                 error = EINVAL;
6181                                 break;
6182                         }
6183                         if (*(int *)invalp == 0)
6184                                 tep->te_flag &= ~TL_SOCKUCRED;
6185                         else
6186                                 tep->te_flag |= TL_SOCKUCRED;
6187                         break;
6188                 default:
6189                         error = EINVAL;
6190                         break;
6191                 }
6192                 break;
6193         case TL_PROT_LEVEL:
6194                 switch (name) {
6195                 case TL_OPT_PEER_CRED:
6196                 case TL_OPT_PEER_UCRED:
6197                         /*
6198                          * option not supposed to be set directly
6199                          * Its value in initialized for each endpoint at
6200                          * driver open time.
6201                          * Direct setting always designed to fail for this
6202                          * option.
6203                          */
6204                         (void) (STRLOG(TL_ID, tep->te_minor, 1,
6205                             SL_TRACE | SL_ERROR,
6206                             "tl_set_opt: option is not supported"));
6207                         error = EPROTO;
6208                         break;
6209                 }
6210         }
6211         return (error);
6212 }
6213 
6214 
6215 static void
6216 tl_timer(void *arg)
6217 {
6218         queue_t *wq = arg;
6219         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6220 
6221         ASSERT(tep);
6222 
6223         tep->te_timoutid = 0;
6224 
6225         enableok(wq);
6226         /*
6227          * Note: can call wsrv directly here and save context switch
6228          * Consider change when qtimeout (not timeout) is active
6229          */
6230         qenable(wq);
6231 }
6232 
6233 static void
6234 tl_buffer(void *arg)
6235 {
6236         queue_t *wq = arg;
6237         tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6238 
6239         ASSERT(tep);
6240 
6241         tep->te_bufcid = 0;
6242         tep->te_nowsrv = B_FALSE;
6243 
6244         enableok(wq);
6245         /*
6246          *  Note: can call wsrv directly here and save context switch
6247          * Consider change when qbufcall (not bufcall) is active
6248          */
6249         qenable(wq);
6250 }
6251 
6252 static void
6253 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6254 {
6255         tl_endpt_t *tep;
6256 
6257         tep = (tl_endpt_t *)wq->q_ptr;
6258 
6259         if (tep->te_closing) {
6260                 freemsg(mp);
6261                 return;
6262         }
6263         noenable(wq);
6264 
6265         (void) insq(wq, wq->q_first, mp);
6266 
6267         if (tep->te_bufcid || tep->te_timoutid) {
6268                 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6269                     "tl_memrecover:recover %p pending", (void *)wq));
6270                 return;
6271         }
6272 
6273         tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6274         if (tep->te_bufcid == NULL) {
6275                 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6276                     drv_usectohz(TL_BUFWAIT));
6277         }
6278 }
6279 
6280 static void
6281 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6282 {
6283         ASSERT(tip->ti_seqno != 0);
6284 
6285         if (tip->ti_mp != NULL) {
6286                 tl_icon_freemsgs(&tip->ti_mp);
6287                 tip->ti_mp = NULL;
6288         }
6289         if (tip->ti_tep != NULL) {
6290                 tl_refrele(tip->ti_tep);
6291                 tip->ti_tep = NULL;
6292         }
6293         list_remove(&tep->te_iconp, tip);
6294         kmem_free(tip, sizeof (tl_icon_t));
6295         tep->te_nicon--;
6296 }
6297 
6298 /*
6299  * Remove address from address hash.
6300  */
6301 static void
6302 tl_addr_unbind(tl_endpt_t *tep)
6303 {
6304         tl_endpt_t *elp;
6305 
6306         if (tep->te_flag & TL_ADDRHASHED) {
6307                 if (IS_SOCKET(tep)) {
6308                         (void) mod_hash_remove(tep->te_addrhash,
6309                             (mod_hash_key_t)tep->te_vp,
6310                             (mod_hash_val_t *)&elp);
6311                         tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6312                         tep->te_magic = SOU_MAGIC_IMPLICIT;
6313                 } else {
6314                         (void) mod_hash_remove(tep->te_addrhash,
6315                             (mod_hash_key_t)&tep->te_ap,
6316                             (mod_hash_val_t *)&elp);
6317                         (void) kmem_free(tep->te_abuf, tep->te_alen);
6318                         tep->te_alen = -1;
6319                         tep->te_abuf = NULL;
6320                 }
6321                 tep->te_flag &= ~TL_ADDRHASHED;
6322         }
6323 }