1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 27 * Copyright (c) 2012 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * Multithreaded STREAMS Local Transport Provider. 32 * 33 * OVERVIEW 34 * ======== 35 * 36 * This driver provides TLI as well as socket semantics. It provides 37 * connectionless, connection oriented, and connection oriented with orderly 38 * release transports for TLI and sockets. Each transport type has separate name 39 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) - 40 * this removes any name space conflicts when binding to socket style transport 41 * addresses. 42 * 43 * NOTE: There is one exception: Socket ticots and ticotsord transports share 44 * the same namespace. In fact, sockets always use ticotsord type transport. 45 * 46 * The driver mode is specified during open() by the minor number used for 47 * open. 48 * 49 * The sockets in addition have the following semantic differences: 50 * No support for passing up credentials (TL_SET[U]CRED). 51 * 52 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND, 53 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to 54 * T_OPTDATA_IND. 55 * 56 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before 57 * a T_CONN_RES is received from the acceptor. This means that a socket 58 * connect will complete before the peer has called accept. 59 * 60 * 61 * MULTITHREADING 62 * ============== 63 * 64 * The driver does not use STREAMS protection mechanisms. Instead it uses a 65 * generic "serializer" abstraction. Most of the operations are executed behind 66 * the serializer and are, essentially single-threaded. All functions executed 67 * behind the same serializer are strictly serialized. So if one thread calls 68 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls 69 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one 70 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or 71 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the 72 * same time. 73 * 74 * Connectionless transport use a single serializer per transport type (one for 75 * TLI and one for sockets. Connection-oriented transports use finer-grained 76 * serializers. 77 * 78 * All COTS-type endpoints start their life with private serializers. During 79 * connection request processing the endpoint serializer is switched to the 80 * listener's serializer and the rest of T_CONN_REQ processing is done on the 81 * listener serializer. During T_CONN_RES processing the eager serializer is 82 * switched from listener to acceptor serializer and after that point all 83 * processing for eager and acceptor happens on this serializer. To avoid races 84 * with endpoint closes while its serializer may be changing closes are blocked 85 * while serializers are manipulated. 86 * 87 * References accounting 88 * --------------------- 89 * 90 * Endpoints are reference counted and freed when the last reference is 91 * dropped. Functions within the serializer may access an endpoint state even 92 * after an endpoint closed. The te_closing being set on the endpoint indicates 93 * that the endpoint entered its close routine. 94 * 95 * One reference is held for each opened endpoint instance. The reference 96 * counter is incremented when the endpoint is linked to another endpoint and 97 * decremented when the link disappears. It is also incremented when the 98 * endpoint is found by the hash table lookup. This increment is atomic with the 99 * lookup itself and happens while the hash table read lock is held. 100 * 101 * Close synchronization 102 * --------------------- 103 * 104 * During close the endpoint as marked as closing using te_closing flag. It is 105 * usually enough to check for te_closing flag since all other state changes 106 * happen after this flag is set and the close entered serializer. Immediately 107 * after setting te_closing flag tl_close() enters serializer and waits until 108 * the callback finishes. This allows all functions called within serializer to 109 * simply check te_closing without any locks. 110 * 111 * Serializer management. 112 * --------------------- 113 * 114 * For COTS transports serializers are created when the endpoint is constructed 115 * and destroyed when the endpoint is destructed. CLTS transports use global 116 * serializers - one for sockets and one for TLI. 117 * 118 * COTS serializers have separate reference counts to deal with several 119 * endpoints sharing the same serializer. There is a subtle problem related to 120 * the serializer destruction. The serializer should never be destroyed by any 121 * function executed inside serializer. This means that close has to wait till 122 * all serializer activity for this endpoint is finished before it can drop the 123 * last reference on the endpoint (which may as well free the serializer). This 124 * is only relevant for COTS transports which manage serializers 125 * dynamically. For CLTS transports close may complete without waiting for all 126 * serializer activity to finish since serializer is only destroyed at driver 127 * detach time. 128 * 129 * COTS endpoints keep track of the number of outstanding requests on the 130 * serializer for the endpoint. The code handling accept() avoids changing 131 * client serializer if it has any pending messages on the serializer and 132 * instead moves acceptor to listener's serializer. 133 * 134 * 135 * Use of hash tables 136 * ------------------ 137 * 138 * The driver uses modhash hash table implementation. Each transport uses two 139 * hash tables - one for finding endpoints by acceptor ID and another one for 140 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same 141 * pair of hash tables since sockets only use TICOTSORD. 142 * 143 * All hash tables lookups increment a reference count for returned endpoints, 144 * so we may safely check the endpoint state even when the endpoint is removed 145 * from the hash by another thread immediately after it is found. 146 * 147 * 148 * CLOSE processing 149 * ================ 150 * 151 * The driver enters serializer twice on close(). The close sequence is the 152 * following: 153 * 154 * 1) Wait until closing is safe (te_closewait becomes zero) 155 * This step is needed to prevent close during serializer switches. In most 156 * cases (close happening after connection establishment) te_closewait is 157 * zero. 158 * 1) Set te_closing. 159 * 2) Call tl_close_ser() within serializer and wait for it to complete. 160 * 161 * te_close_ser simply marks endpoint and wakes up waiting tl_close(). 162 * It also needs to clear write-side q_next pointers - this should be done 163 * before qprocsoff(). 164 * 165 * This synchronous serializer entry during close is needed to ensure that 166 * the queue is valid everywhere inside the serializer. 167 * 168 * Note that in many cases close will execute tl_close_ser() synchronously, 169 * so it will not wait at all. 170 * 171 * 3) Calls qprocsoff(). 172 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to 173 * complete (for COTS transports). For CLTS transport there is no wait. 174 * 175 * tl_close_finish_ser() Finishes the close process and wakes up waiting 176 * close if there is any. 177 * 178 * Note that in most cases close will enter te_close_ser_finish() 179 * synchronously and will not wait at all. 180 * 181 * 182 * Flow Control 183 * ============ 184 * 185 * The driver implements both read and write side service routines. No one calls 186 * putq() on the read queue. The read side service routine tl_rsrv() is called 187 * when the read side stream is back-enabled. It enters serializer synchronously 188 * (waits till serializer processing is complete). Within serializer it 189 * back-enables all endpoints blocked by the queue for connection-less 190 * transports and enables write side service processing for the peer for 191 * connection-oriented transports. 192 * 193 * Read and write side service routines use special mblk_sized space in the 194 * endpoint structure to enter perimeter. 195 * 196 * Write-side flow control 197 * ----------------------- 198 * 199 * Write side flow control is a bit tricky. The driver needs to deal with two 200 * message queues - the explicit STREAMS message queue maintained by 201 * putq()/getq()/putbq() and the implicit queue within the serializer. These two 202 * queues should be synchronized to preserve message ordering and should 203 * maintain a single order determined by the order in which messages enter 204 * tl_wput(). In order to maintain the ordering between these two queues the 205 * STREAMS queue is only manipulated within the serializer, so the ordering is 206 * provided by the serializer. 207 * 208 * Functions called from the tl_wsrv() sometimes may call putbq(). To 209 * immediately stop any further processing of the STREAMS message queues the 210 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write 211 * side service processing stops when the flag is set. 212 * 213 * The tl_wsrv() function enters serializer synchronously and waits for it to 214 * complete. The serializer call-back tl_wsrv_ser() either drains all messages 215 * on the STREAMS queue or terminates when it notices the te_nowsrv flag 216 * set. Note that the maximum amount of messages processed by tl_wput_ser() is 217 * always bounded by the amount of messages on the STREAMS queue at the time 218 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS 219 * queue from another serialized entry which can't happen in parallel. This 220 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk 221 * of it draining forever while writer places new messages on the STREAMS 222 * queue). 223 * 224 * Note that a closing endpoint never sets te_nowsrv and never calls putbq(). 225 * 226 * 227 * Unix Domain Sockets 228 * =================== 229 * 230 * The driver knows the structure of Unix Domain sockets addresses and treats 231 * them differently from generic TLI addresses. For sockets implicit binds are 232 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address 233 * instead of using address length of zero. Explicit binds specify 234 * SOU_MAGIC_EXPLICIT as magic. 235 * 236 * For implicit binds we always use minor number as soua_vp part of the address 237 * and avoid any hash table lookups. This saves two hash tables lookups per 238 * anonymous bind. 239 * 240 * For explicit address we hash the vnode pointer instead of hashing the 241 * full-scale address+zone+length. Hashing by pointer is more efficient then 242 * hashing by the full address. 243 * 244 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the 245 * tep structure, so it should be never freed. 246 * 247 * Also for sockets the driver always uses minor number as acceptor id. 248 * 249 * TPI VIOLATIONS 250 * -------------- 251 * 252 * This driver violates TPI in several respects for Unix Domain Sockets: 253 * 254 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind 255 * is requested and the endpoint is already in use. There is no point in 256 * generating an unused address since this address will be rejected by 257 * sockfs anyway. For implicit binds it always generates a new address 258 * (sets soua_vp to its minor number). 259 * 260 * 2) It always uses minor number as acceptor ID and never uses queue 261 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ 262 * message and they do not use the queue pointer. 263 * 264 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog 265 * followed by listen(). The listen() should be issued with non-zero 266 * backlog, so sotpi_listen() issues unbind request followed by bind 267 * request to the same address but with a non-zero qlen value. Both 268 * tl_bind() and tl_unbind() require write lock on the hash table to 269 * insert/remove the address. The driver does not remove the address from 270 * the hash for endpoints that are bound to the explicit address and have 271 * backlog of zero. During T_BIND_REQ processing if the address requested 272 * is equal to the address the endpoint already has it updates the backlog 273 * without reinserting the address in the hash table. This optimization 274 * avoids two hash table updates for each listener created. It always 275 * avoids the problem of a "stolen" address when another listener may use 276 * the same address between the unbind and bind and suddenly listen() fails 277 * because address is in use even though the bind() succeeded. 278 * 279 * 280 * CONNECTIONLESS TRANSPORTS 281 * ========================= 282 * 283 * Connectionless transports all share the same serializer (one for TLI and one 284 * for Sockets). Functions executing behind serializer can check or modify state 285 * of any endpoint. 286 * 287 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the 288 * te_lastep field. The next time X talks to some address A it checks whether A 289 * is the same as Y's address and if it is there is no need to lookup Y. If the 290 * address is different or the state of Y is not appropriate (e.g. closed or not 291 * idle) X does a lookup using tl_find_peer() and caches the new address. 292 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold 293 * on the endpoint found. 294 * 295 * During close of endpoint Y it doesn't try to remove itself from other 296 * endpoints caches. They will detect that Y is gone and will search the peer 297 * endpoint again. 298 * 299 * Flow Control Handling. 300 * ---------------------- 301 * 302 * Each connectionless endpoint keeps a list of endpoints which are 303 * flow-controlled by its queue. It also keeps a pointer to the queue which 304 * flow-controls itself. Whenever flow control releases for endpoint X it 305 * enables all queues from the list. During close it also back-enables everyone 306 * in the list. If X is flow-controlled when it is closing it removes it from 307 * the peers list. 308 * 309 * DATA STRUCTURES 310 * =============== 311 * 312 * Each endpoint is represented by the tl_endpt_t structure which keeps all the 313 * endpoint state. For connection-oriented transports it has a keeps a list 314 * of pending connections (tl_icon_t). For connectionless transports it keeps a 315 * list of endpoints flow controlled by this one. 316 * 317 * Each transport type is represented by a per-transport data structure 318 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the 319 * endpoint address hash tables for each transport. It also contains pointer to 320 * transport serializer for connectionless transports. 321 * 322 * Each endpoint keeps a link to its transport structure, so the code can find 323 * all per-transport information quickly. 324 */ 325 326 #include <sys/types.h> 327 #include <sys/inttypes.h> 328 #include <sys/stream.h> 329 #include <sys/stropts.h> 330 #define _SUN_TPI_VERSION 2 331 #include <sys/tihdr.h> 332 #include <sys/strlog.h> 333 #include <sys/debug.h> 334 #include <sys/cred.h> 335 #include <sys/errno.h> 336 #include <sys/kmem.h> 337 #include <sys/id_space.h> 338 #include <sys/modhash.h> 339 #include <sys/mkdev.h> 340 #include <sys/tl.h> 341 #include <sys/stat.h> 342 #include <sys/conf.h> 343 #include <sys/modctl.h> 344 #include <sys/strsun.h> 345 #include <sys/socket.h> 346 #include <sys/socketvar.h> 347 #include <sys/sysmacros.h> 348 #include <sys/xti_xtiopt.h> 349 #include <sys/ddi.h> 350 #include <sys/sunddi.h> 351 #include <sys/zone.h> 352 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */ 353 #include <inet/optcom.h> 354 #include <sys/strsubr.h> 355 #include <sys/ucred.h> 356 #include <sys/suntpi.h> 357 #include <sys/list.h> 358 #include <sys/serializer.h> 359 360 /* 361 * TBD List 362 * 14 Eliminate state changes through table 363 * 16. AF_UNIX socket options 364 * 17. connect() for ticlts 365 * 18. support for "netstat" to show AF_UNIX plus TLI local 366 * transport connections 367 * 21. sanity check to flushing on sending M_ERROR 368 */ 369 370 /* 371 * CONSTANT DECLARATIONS 372 * -------------------- 373 */ 374 375 /* 376 * Local declarations 377 */ 378 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST] 379 380 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */ 381 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */ 382 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */ 383 /* 384 * Hash tables size. 385 */ 386 #define TL_HASH_SIZE 311 387 388 /* 389 * Definitions for module_info 390 */ 391 #define TL_ID (104) /* module ID number */ 392 #define TL_NAME "tl" /* module name */ 393 #define TL_MINPSZ (0) /* min packet size */ 394 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */ 395 #define TL_HIWAT (16*1024) /* hi water mark */ 396 #define TL_LOWAT (256) /* lo water mark */ 397 /* 398 * Definition of minor numbers/modes for new transport provider modes. 399 * We view the socket use as a separate mode to get a separate name space. 400 */ 401 #define TL_TICOTS 0 /* connection oriented transport */ 402 #define TL_TICOTSORD 1 /* COTS w/ orderly release */ 403 #define TL_TICLTS 2 /* connectionless transport */ 404 #define TL_UNUSED 3 405 #define TL_SOCKET 4 /* Socket */ 406 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS) 407 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD) 408 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS) 409 410 #define TL_MINOR_MASK 0x7 411 #define TL_MINOR_START (TL_TICLTS + 1) 412 413 /* 414 * LOCAL MACROS 415 */ 416 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t)) 417 418 /* 419 * EXTERNAL VARIABLE DECLARATIONS 420 * ----------------------------- 421 */ 422 /* 423 * state table defined in the OS space.c 424 */ 425 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES]; 426 427 /* 428 * STREAMS DRIVER ENTRY POINTS PROTOTYPES 429 */ 430 static int tl_open(queue_t *, dev_t *, int, int, cred_t *); 431 static int tl_close(queue_t *, int, cred_t *); 432 static void tl_wput(queue_t *, mblk_t *); 433 static void tl_wsrv(queue_t *); 434 static void tl_rsrv(queue_t *); 435 436 static int tl_attach(dev_info_t *, ddi_attach_cmd_t); 437 static int tl_detach(dev_info_t *, ddi_detach_cmd_t); 438 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 439 440 441 /* 442 * GLOBAL DATA STRUCTURES AND VARIABLES 443 * ----------------------------------- 444 */ 445 446 /* 447 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ 448 * For now, we only manage the SO_RECVUCRED option but we also have 449 * harmless dummy options to make things work with some common code we access. 450 */ 451 opdes_t tl_opt_arr[] = { 452 /* The SO_TYPE is needed for the hack below */ 453 { 454 SO_TYPE, 455 SOL_SOCKET, 456 OA_R, 457 OA_R, 458 OP_NP, 459 0, 460 sizeof (t_scalar_t), 461 0 462 }, 463 { 464 SO_RECVUCRED, 465 SOL_SOCKET, 466 OA_RW, 467 OA_RW, 468 OP_NP, 469 0, 470 sizeof (int), 471 0 472 } 473 }; 474 475 /* 476 * Table of all supported levels 477 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have 478 * any supported options so we need this info separately. 479 * 480 * This is needed only for topmost tpi providers. 481 */ 482 optlevel_t tl_valid_levels_arr[] = { 483 XTI_GENERIC, 484 SOL_SOCKET, 485 TL_PROT_LEVEL 486 }; 487 488 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr) 489 /* 490 * Current upper bound on the amount of space needed to return all options. 491 * Additional options with data size of sizeof(long) are handled automatically. 492 * Others need hand job. 493 */ 494 #define TL_MAX_OPT_BUF_LEN \ 495 ((A_CNT(tl_opt_arr) << 2) + \ 496 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \ 497 + 64 + sizeof (struct T_optmgmt_ack)) 498 499 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr) 500 501 /* 502 * transport addr structure 503 */ 504 typedef struct tl_addr { 505 zoneid_t ta_zoneid; /* Zone scope of address */ 506 t_scalar_t ta_alen; /* length of abuf */ 507 void *ta_abuf; /* the addr itself */ 508 } tl_addr_t; 509 510 /* 511 * Refcounted version of serializer. 512 */ 513 typedef struct tl_serializer { 514 uint_t ts_refcnt; 515 serializer_t *ts_serializer; 516 } tl_serializer_t; 517 518 /* 519 * Each transport type has a separate state. 520 * Per-transport state. 521 */ 522 typedef struct tl_transport_state { 523 char *tr_name; 524 minor_t tr_minor; 525 uint32_t tr_defaddr; 526 mod_hash_t *tr_ai_hash; 527 mod_hash_t *tr_addr_hash; 528 tl_serializer_t *tr_serializer; 529 } tl_transport_state_t; 530 531 #define TL_DFADDR 0x1000 532 533 static tl_transport_state_t tl_transports[] = { 534 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL }, 535 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL }, 536 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL }, 537 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL }, 538 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL }, 539 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL }, 540 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL } 541 }; 542 543 #define TL_MAXTRANSPORT A_CNT(tl_transports) 544 545 struct tl_endpt; 546 typedef struct tl_endpt tl_endpt_t; 547 548 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *); 549 550 /* 551 * Data structure used to represent pending connects. 552 * Records enough information so that the connecting peer can close 553 * before the connection gets accepted. 554 */ 555 typedef struct tl_icon { 556 list_node_t ti_node; 557 struct tl_endpt *ti_tep; /* NULL if peer has already closed */ 558 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */ 559 t_scalar_t ti_seqno; /* Sequence number */ 560 } tl_icon_t; 561 562 typedef struct so_ux_addr soux_addr_t; 563 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t) 564 565 /* 566 * Maximum number of unaccepted connection indications allowed per listener. 567 */ 568 #define TL_MAXQLEN 4096 569 int tl_maxqlen = TL_MAXQLEN; 570 571 /* 572 * transport endpoint structure 573 */ 574 struct tl_endpt { 575 queue_t *te_rq; /* stream read queue */ 576 queue_t *te_wq; /* stream write queue */ 577 uint32_t te_refcnt; 578 int32_t te_state; /* TPI state of endpoint */ 579 minor_t te_minor; /* minor number */ 580 #define te_seqno te_minor 581 uint_t te_flag; /* flag field */ 582 boolean_t te_nowsrv; 583 tl_serializer_t *te_ser; /* Serializer to use */ 584 #define te_serializer te_ser->ts_serializer 585 586 soux_addr_t te_uxaddr; /* Socket address */ 587 #define te_magic te_uxaddr.soua_magic 588 #define te_vp te_uxaddr.soua_vp 589 tl_addr_t te_ap; /* addr bound to this endpt */ 590 #define te_zoneid te_ap.ta_zoneid 591 #define te_alen te_ap.ta_alen 592 #define te_abuf te_ap.ta_abuf 593 594 tl_transport_state_t *te_transport; 595 #define te_addrhash te_transport->tr_addr_hash 596 #define te_aihash te_transport->tr_ai_hash 597 #define te_defaddr te_transport->tr_defaddr 598 cred_t *te_credp; /* endpoint user credentials */ 599 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */ 600 601 /* 602 * State specific for connection-oriented and connectionless transports. 603 */ 604 union { 605 /* Connection-oriented state. */ 606 struct { 607 t_uscalar_t _te_nicon; /* count of conn requests */ 608 t_uscalar_t _te_qlen; /* max conn requests */ 609 tl_endpt_t *_te_oconp; /* conn request pending */ 610 tl_endpt_t *_te_conp; /* connected endpt */ 611 #ifndef _ILP32 612 void *_te_pad; 613 #endif 614 list_t _te_iconp; /* list of conn ind. pending */ 615 } _te_cots_state; 616 /* Connection-less state. */ 617 struct { 618 tl_endpt_t *_te_lastep; /* last dest. endpoint */ 619 tl_endpt_t *_te_flowq; /* flow controlled on whom */ 620 list_node_t _te_flows; /* lists of connections */ 621 list_t _te_flowlist; /* Who flowcontrols on me */ 622 } _te_clts_state; 623 } _te_transport_state; 624 #define te_nicon _te_transport_state._te_cots_state._te_nicon 625 #define te_qlen _te_transport_state._te_cots_state._te_qlen 626 #define te_oconp _te_transport_state._te_cots_state._te_oconp 627 #define te_conp _te_transport_state._te_cots_state._te_conp 628 #define te_iconp _te_transport_state._te_cots_state._te_iconp 629 #define te_lastep _te_transport_state._te_clts_state._te_lastep 630 #define te_flowq _te_transport_state._te_clts_state._te_flowq 631 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist 632 #define te_flows _te_transport_state._te_clts_state._te_flows 633 634 bufcall_id_t te_bufcid; /* outstanding bufcall id */ 635 timeout_id_t te_timoutid; /* outstanding timeout id */ 636 pid_t te_cpid; /* cached pid of endpoint */ 637 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */ 638 /* 639 * Pieces of the endpoint state needed for closing. 640 */ 641 kmutex_t te_closelock; 642 kcondvar_t te_closecv; 643 uint8_t te_closing; /* The endpoint started closing */ 644 uint8_t te_closewait; /* Wait in close until zero */ 645 mblk_t te_closemp; /* for entering serializer on close */ 646 mblk_t te_rsrvmp; /* for entering serializer on rsrv */ 647 mblk_t te_wsrvmp; /* for entering serializer on wsrv */ 648 kmutex_t te_srv_lock; 649 kcondvar_t te_srv_cv; 650 uint8_t te_rsrv_active; /* Running in tl_rsrv() */ 651 uint8_t te_wsrv_active; /* Running in tl_wsrv() */ 652 /* 653 * Pieces of the endpoint state needed for serializer transitions. 654 */ 655 kmutex_t te_ser_lock; /* Protects the count below */ 656 uint_t te_ser_count; /* Number of messages on serializer */ 657 }; 658 659 /* 660 * Flag values. Lower 4 bits specify that transport used. 661 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only, 662 * they allow to identify the endpoint more easily. 663 */ 664 #define TL_LISTENER 0x00010 /* the listener endpoint */ 665 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */ 666 #define TL_EAGER 0x00040 /* connecting endpoint */ 667 #define TL_ACCEPTED 0x00080 /* accepted connection */ 668 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */ 669 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */ 670 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */ 671 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */ 672 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */ 673 /* 674 * Boolean checks for the endpoint type. 675 */ 676 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0) 677 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0) 678 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0) 679 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0) 680 681 /* 682 * Certain operations are always used together. These macros reduce the chance 683 * of missing a part of a combination. 684 */ 685 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; } 686 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) } 687 688 #define TL_PUTBQ(x, mp) { \ 689 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \ 690 (x)->te_nowsrv = B_TRUE; \ 691 (void) putbq((x)->te_wq, mp); \ 692 } 693 694 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); } 695 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); } 696 697 /* 698 * STREAMS driver glue data structures. 699 */ 700 static struct module_info tl_minfo = { 701 TL_ID, /* mi_idnum */ 702 TL_NAME, /* mi_idname */ 703 TL_MINPSZ, /* mi_minpsz */ 704 TL_MAXPSZ, /* mi_maxpsz */ 705 TL_HIWAT, /* mi_hiwat */ 706 TL_LOWAT /* mi_lowat */ 707 }; 708 709 static struct qinit tl_rinit = { 710 NULL, /* qi_putp */ 711 (int (*)())tl_rsrv, /* qi_srvp */ 712 tl_open, /* qi_qopen */ 713 tl_close, /* qi_qclose */ 714 NULL, /* qi_qadmin */ 715 &tl_minfo, /* qi_minfo */ 716 NULL /* qi_mstat */ 717 }; 718 719 static struct qinit tl_winit = { 720 (int (*)())tl_wput, /* qi_putp */ 721 (int (*)())tl_wsrv, /* qi_srvp */ 722 NULL, /* qi_qopen */ 723 NULL, /* qi_qclose */ 724 NULL, /* qi_qadmin */ 725 &tl_minfo, /* qi_minfo */ 726 NULL /* qi_mstat */ 727 }; 728 729 static struct streamtab tlinfo = { 730 &tl_rinit, /* st_rdinit */ 731 &tl_winit, /* st_wrinit */ 732 NULL, /* st_muxrinit */ 733 NULL /* st_muxwrinit */ 734 }; 735 736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach, 737 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported); 738 739 static struct modldrv modldrv = { 740 &mod_driverops, /* Type of module -- pseudo driver here */ 741 "TPI Local Transport (tl)", 742 &tl_devops, /* driver ops */ 743 }; 744 745 /* 746 * Module linkage information for the kernel. 747 */ 748 static struct modlinkage modlinkage = { 749 MODREV_1, 750 &modldrv, 751 NULL 752 }; 753 754 /* 755 * Templates for response to info request 756 * Check sanity of unlimited connect data etc. 757 */ 758 759 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 760 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO) 761 762 static struct T_info_ack tl_cots_info_ack = 763 { 764 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */ 765 T_INFINITE, /* TSDU size */ 766 T_INFINITE, /* ETSDU size */ 767 T_INFINITE, /* CDATA_size */ 768 T_INFINITE, /* DDATA_size */ 769 T_INFINITE, /* ADDR_size */ 770 T_INFINITE, /* OPT_size */ 771 0, /* TIDU_size - fill at run time */ 772 T_COTS, /* SERV_type */ 773 -1, /* CURRENT_state */ 774 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */ 775 }; 776 777 static struct T_info_ack tl_clts_info_ack = 778 { 779 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */ 780 0, /* TSDU_size - fill at run time */ 781 -2, /* ETSDU_size -2 => not supported */ 782 -2, /* CDATA_size -2 => not supported */ 783 -2, /* DDATA_size -2 => not supported */ 784 -1, /* ADDR_size -1 => infinite */ 785 -1, /* OPT_size */ 786 0, /* TIDU_size - fill at run time */ 787 T_CLTS, /* SERV_type */ 788 -1, /* CURRENT_state */ 789 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */ 790 }; 791 792 /* 793 * private copy of devinfo pointer used in tl_info 794 */ 795 static dev_info_t *tl_dip; 796 797 /* 798 * Endpoints cache. 799 */ 800 static kmem_cache_t *tl_cache; 801 /* 802 * Minor number space. 803 */ 804 static id_space_t *tl_minors; 805 806 /* 807 * Default Data Unit size. 808 */ 809 static t_scalar_t tl_tidusz; 810 811 /* 812 * Size of hash tables. 813 */ 814 static size_t tl_hash_size = TL_HASH_SIZE; 815 816 /* 817 * Debug and test variable ONLY. Turn off T_CONN_IND queueing 818 * for sockets. 819 */ 820 static int tl_disable_early_connect = 0; 821 static int tl_client_closing_when_accepting; 822 823 static int tl_serializer_noswitch; 824 825 /* 826 * LOCAL FUNCTION PROTOTYPES 827 * ------------------------- 828 */ 829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *); 830 static void tl_do_proto(mblk_t *, tl_endpt_t *); 831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *); 832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *); 833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t, 834 t_scalar_t); 835 static void tl_bind(mblk_t *, tl_endpt_t *); 836 static void tl_bind_ser(mblk_t *, tl_endpt_t *); 837 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t); 838 static void tl_unbind(mblk_t *, tl_endpt_t *); 839 static void tl_optmgmt(queue_t *, mblk_t *); 840 static void tl_conn_req(queue_t *, mblk_t *); 841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *); 842 static void tl_conn_res(mblk_t *, tl_endpt_t *); 843 static void tl_discon_req(mblk_t *, tl_endpt_t *); 844 static void tl_capability_req(mblk_t *, tl_endpt_t *); 845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *); 846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *); 847 static void tl_info_req(mblk_t *, tl_endpt_t *); 848 static void tl_addr_req(mblk_t *, tl_endpt_t *); 849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *); 850 static void tl_data(mblk_t *, tl_endpt_t *); 851 static void tl_exdata(mblk_t *, tl_endpt_t *); 852 static void tl_ordrel(mblk_t *, tl_endpt_t *); 853 static void tl_unitdata(mblk_t *, tl_endpt_t *); 854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *); 855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t); 856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *); 857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *); 858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *); 859 static void tl_cl_backenable(tl_endpt_t *); 860 static void tl_co_unconnect(tl_endpt_t *); 861 static mblk_t *tl_resizemp(mblk_t *, ssize_t); 862 static void tl_discon_ind(tl_endpt_t *, uint32_t); 863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t); 864 static mblk_t *tl_ordrel_ind_alloc(void); 865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t); 866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *); 867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t); 868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **); 869 static void tl_icon_freemsgs(mblk_t **); 870 static void tl_merror(queue_t *, mblk_t *, int); 871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *); 872 static int tl_default_opt(queue_t *, int, int, uchar_t *); 873 static int tl_get_opt(queue_t *, int, int, uchar_t *); 874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *, 875 uchar_t *, void *, cred_t *); 876 static void tl_memrecover(queue_t *, mblk_t *, size_t); 877 static void tl_freetip(tl_endpt_t *, tl_icon_t *); 878 static void tl_free(tl_endpt_t *); 879 static int tl_constructor(void *, void *, int); 880 static void tl_destructor(void *, void *); 881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t); 882 static tl_serializer_t *tl_serializer_alloc(int); 883 static void tl_serializer_refhold(tl_serializer_t *); 884 static void tl_serializer_refrele(tl_serializer_t *); 885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *); 886 static void tl_serializer_exit(tl_endpt_t *); 887 static boolean_t tl_noclose(tl_endpt_t *); 888 static void tl_closeok(tl_endpt_t *); 889 static void tl_refhold(tl_endpt_t *); 890 static void tl_refrele(tl_endpt_t *); 891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t); 892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t); 893 static void tl_close_ser(mblk_t *, tl_endpt_t *); 894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *); 895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *); 896 static void tl_proto_ser(mblk_t *, tl_endpt_t *); 897 static void tl_putq_ser(mblk_t *, tl_endpt_t *); 898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *); 899 static void tl_wput_ser(mblk_t *, tl_endpt_t *); 900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *); 901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *); 902 static void tl_addr_unbind(tl_endpt_t *); 903 904 /* 905 * Intialize option database object for TL 906 */ 907 908 optdb_obj_t tl_opt_obj = { 909 tl_default_opt, /* TL default value function pointer */ 910 tl_get_opt, /* TL get function pointer */ 911 tl_set_opt, /* TL set function pointer */ 912 TL_OPT_ARR_CNT, /* TL option database count of entries */ 913 tl_opt_arr, /* TL option database */ 914 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */ 915 tl_valid_levels_arr /* TL valid level array */ 916 }; 917 918 /* 919 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS 920 * --------------------------------------- 921 */ 922 923 /* 924 * Loadable module routines 925 */ 926 int 927 _init(void) 928 { 929 return (mod_install(&modlinkage)); 930 } 931 932 int 933 _fini(void) 934 { 935 return (mod_remove(&modlinkage)); 936 } 937 938 int 939 _info(struct modinfo *modinfop) 940 { 941 return (mod_info(&modlinkage, modinfop)); 942 } 943 944 /* 945 * Driver Entry Points and Other routines 946 */ 947 static int 948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 949 { 950 int i; 951 char name[32]; 952 953 /* 954 * Resume from a checkpoint state. 955 */ 956 if (cmd == DDI_RESUME) 957 return (DDI_SUCCESS); 958 959 if (cmd != DDI_ATTACH) 960 return (DDI_FAILURE); 961 962 /* 963 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that 964 * streams message sizes can be unlimited. We use a defined constant 965 * instead. 966 */ 967 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ; 968 969 /* 970 * Create subdevices for each transport. 971 */ 972 for (i = 0; i < TL_UNUSED; i++) { 973 if (ddi_create_minor_node(devi, 974 tl_transports[i].tr_name, 975 S_IFCHR, tl_transports[i].tr_minor, 976 DDI_PSEUDO, NULL) == DDI_FAILURE) { 977 ddi_remove_minor_node(devi, NULL); 978 return (DDI_FAILURE); 979 } 980 } 981 982 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t), 983 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0); 984 985 if (tl_cache == NULL) { 986 ddi_remove_minor_node(devi, NULL); 987 return (DDI_FAILURE); 988 } 989 990 tl_minors = id_space_create("tl_minor_space", 991 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1); 992 993 /* 994 * Create ID space for minor numbers 995 */ 996 for (i = 0; i < TL_MAXTRANSPORT; i++) { 997 tl_transport_state_t *t = &tl_transports[i]; 998 999 if (i == TL_UNUSED) 1000 continue; 1001 1002 /* Socket COTSORD shares namespace with COTS */ 1003 if (i == TL_SOCK_COTSORD) { 1004 t->tr_ai_hash = 1005 tl_transports[TL_SOCK_COTS].tr_ai_hash; 1006 ASSERT(t->tr_ai_hash != NULL); 1007 t->tr_addr_hash = 1008 tl_transports[TL_SOCK_COTS].tr_addr_hash; 1009 ASSERT(t->tr_addr_hash != NULL); 1010 continue; 1011 } 1012 1013 /* 1014 * Create hash tables. 1015 */ 1016 (void) snprintf(name, sizeof (name), "%s_ai_hash", 1017 t->tr_name); 1018 #ifdef _ILP32 1019 if (i & TL_SOCKET) 1020 t->tr_ai_hash = 1021 mod_hash_create_idhash(name, tl_hash_size - 1, 1022 mod_hash_null_valdtor); 1023 else 1024 t->tr_ai_hash = 1025 mod_hash_create_ptrhash(name, tl_hash_size, 1026 mod_hash_null_valdtor, sizeof (queue_t)); 1027 #else 1028 t->tr_ai_hash = 1029 mod_hash_create_idhash(name, tl_hash_size - 1, 1030 mod_hash_null_valdtor); 1031 #endif /* _ILP32 */ 1032 1033 if (i & TL_SOCKET) { 1034 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash", 1035 t->tr_name); 1036 t->tr_addr_hash = mod_hash_create_ptrhash(name, 1037 tl_hash_size, mod_hash_null_valdtor, 1038 sizeof (uintptr_t)); 1039 } else { 1040 (void) snprintf(name, sizeof (name), "%s_addr_hash", 1041 t->tr_name); 1042 t->tr_addr_hash = mod_hash_create_extended(name, 1043 tl_hash_size, mod_hash_null_keydtor, 1044 mod_hash_null_valdtor, 1045 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP); 1046 } 1047 1048 /* Create serializer for connectionless transports. */ 1049 if (i & TL_TICLTS) 1050 t->tr_serializer = tl_serializer_alloc(KM_SLEEP); 1051 } 1052 1053 tl_dip = devi; 1054 1055 return (DDI_SUCCESS); 1056 } 1057 1058 static int 1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 1060 { 1061 int i; 1062 1063 if (cmd == DDI_SUSPEND) 1064 return (DDI_SUCCESS); 1065 1066 if (cmd != DDI_DETACH) 1067 return (DDI_FAILURE); 1068 1069 /* 1070 * Destroy arenas and hash tables. 1071 */ 1072 for (i = 0; i < TL_MAXTRANSPORT; i++) { 1073 tl_transport_state_t *t = &tl_transports[i]; 1074 1075 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD)) 1076 continue; 1077 1078 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL); 1079 if (t->tr_serializer != NULL) { 1080 tl_serializer_refrele(t->tr_serializer); 1081 t->tr_serializer = NULL; 1082 } 1083 1084 #ifdef _ILP32 1085 if (i & TL_SOCKET) 1086 mod_hash_destroy_idhash(t->tr_ai_hash); 1087 else 1088 mod_hash_destroy_ptrhash(t->tr_ai_hash); 1089 #else 1090 mod_hash_destroy_idhash(t->tr_ai_hash); 1091 #endif /* _ILP32 */ 1092 t->tr_ai_hash = NULL; 1093 if (i & TL_SOCKET) 1094 mod_hash_destroy_ptrhash(t->tr_addr_hash); 1095 else 1096 mod_hash_destroy_hash(t->tr_addr_hash); 1097 t->tr_addr_hash = NULL; 1098 } 1099 1100 kmem_cache_destroy(tl_cache); 1101 tl_cache = NULL; 1102 id_space_destroy(tl_minors); 1103 tl_minors = NULL; 1104 ddi_remove_minor_node(devi, NULL); 1105 return (DDI_SUCCESS); 1106 } 1107 1108 /* ARGSUSED */ 1109 static int 1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1111 { 1112 1113 int retcode = DDI_FAILURE; 1114 1115 switch (infocmd) { 1116 1117 case DDI_INFO_DEVT2DEVINFO: 1118 if (tl_dip != NULL) { 1119 *result = (void *)tl_dip; 1120 retcode = DDI_SUCCESS; 1121 } 1122 break; 1123 1124 case DDI_INFO_DEVT2INSTANCE: 1125 *result = (void *)0; 1126 retcode = DDI_SUCCESS; 1127 break; 1128 1129 default: 1130 break; 1131 } 1132 return (retcode); 1133 } 1134 1135 /* 1136 * Endpoint reference management. 1137 */ 1138 static void 1139 tl_refhold(tl_endpt_t *tep) 1140 { 1141 atomic_add_32(&tep->te_refcnt, 1); 1142 } 1143 1144 static void 1145 tl_refrele(tl_endpt_t *tep) 1146 { 1147 ASSERT(tep->te_refcnt != 0); 1148 1149 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0) 1150 tl_free(tep); 1151 } 1152 1153 /*ARGSUSED*/ 1154 static int 1155 tl_constructor(void *buf, void *cdrarg, int kmflags) 1156 { 1157 tl_endpt_t *tep = buf; 1158 1159 bzero(tep, sizeof (tl_endpt_t)); 1160 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL); 1161 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL); 1162 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL); 1163 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL); 1164 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL); 1165 1166 return (0); 1167 } 1168 1169 /*ARGSUSED*/ 1170 static void 1171 tl_destructor(void *buf, void *cdrarg) 1172 { 1173 tl_endpt_t *tep = buf; 1174 1175 mutex_destroy(&tep->te_closelock); 1176 cv_destroy(&tep->te_closecv); 1177 mutex_destroy(&tep->te_srv_lock); 1178 cv_destroy(&tep->te_srv_cv); 1179 mutex_destroy(&tep->te_ser_lock); 1180 } 1181 1182 static void 1183 tl_free(tl_endpt_t *tep) 1184 { 1185 ASSERT(tep->te_refcnt == 0); 1186 ASSERT(tep->te_transport != NULL); 1187 ASSERT(tep->te_rq == NULL); 1188 ASSERT(tep->te_wq == NULL); 1189 ASSERT(tep->te_ser != NULL); 1190 ASSERT(tep->te_ser_count == 0); 1191 ASSERT(! (tep->te_flag & TL_ADDRHASHED)); 1192 1193 if (IS_SOCKET(tep)) { 1194 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN); 1195 ASSERT(tep->te_abuf == &tep->te_uxaddr); 1196 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor); 1197 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT); 1198 } else if (tep->te_abuf != NULL) { 1199 kmem_free(tep->te_abuf, tep->te_alen); 1200 tep->te_alen = -1; /* uninitialized */ 1201 tep->te_abuf = NULL; 1202 } else { 1203 ASSERT(tep->te_alen == -1); 1204 } 1205 1206 id_free(tl_minors, tep->te_minor); 1207 ASSERT(tep->te_credp == NULL); 1208 1209 if (tep->te_hash_hndl != NULL) 1210 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl); 1211 1212 if (IS_COTS(tep)) { 1213 TL_REMOVE_PEER(tep->te_conp); 1214 TL_REMOVE_PEER(tep->te_oconp); 1215 tl_serializer_refrele(tep->te_ser); 1216 tep->te_ser = NULL; 1217 ASSERT(tep->te_nicon == 0); 1218 ASSERT(list_head(&tep->te_iconp) == NULL); 1219 } else { 1220 ASSERT(tep->te_lastep == NULL); 1221 ASSERT(list_head(&tep->te_flowlist) == NULL); 1222 ASSERT(tep->te_flowq == NULL); 1223 } 1224 1225 ASSERT(tep->te_bufcid == 0); 1226 ASSERT(tep->te_timoutid == 0); 1227 bzero(&tep->te_ap, sizeof (tep->te_ap)); 1228 tep->te_acceptor_id = 0; 1229 1230 ASSERT(tep->te_closewait == 0); 1231 ASSERT(!tep->te_rsrv_active); 1232 ASSERT(!tep->te_wsrv_active); 1233 tep->te_closing = 0; 1234 tep->te_nowsrv = B_FALSE; 1235 tep->te_flag = 0; 1236 1237 kmem_cache_free(tl_cache, tep); 1238 } 1239 1240 /* 1241 * Allocate/free reference-counted wrappers for serializers. 1242 */ 1243 static tl_serializer_t * 1244 tl_serializer_alloc(int flags) 1245 { 1246 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags); 1247 serializer_t *ser; 1248 1249 if (s == NULL) 1250 return (NULL); 1251 1252 ser = serializer_create(flags); 1253 1254 if (ser == NULL) { 1255 kmem_free(s, sizeof (tl_serializer_t)); 1256 return (NULL); 1257 } 1258 1259 s->ts_refcnt = 1; 1260 s->ts_serializer = ser; 1261 return (s); 1262 } 1263 1264 static void 1265 tl_serializer_refhold(tl_serializer_t *s) 1266 { 1267 atomic_add_32(&s->ts_refcnt, 1); 1268 } 1269 1270 static void 1271 tl_serializer_refrele(tl_serializer_t *s) 1272 { 1273 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) { 1274 serializer_destroy(s->ts_serializer); 1275 kmem_free(s, sizeof (tl_serializer_t)); 1276 } 1277 } 1278 1279 /* 1280 * Post a request on the endpoint serializer. For COTS transports keep track of 1281 * the number of pending requests. 1282 */ 1283 static void 1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp) 1285 { 1286 if (IS_COTS(tep)) { 1287 mutex_enter(&tep->te_ser_lock); 1288 tep->te_ser_count++; 1289 mutex_exit(&tep->te_ser_lock); 1290 } 1291 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep); 1292 } 1293 1294 /* 1295 * Complete processing the request on the serializer. Decrement the counter for 1296 * pending requests for COTS transports. 1297 */ 1298 static void 1299 tl_serializer_exit(tl_endpt_t *tep) 1300 { 1301 if (IS_COTS(tep)) { 1302 mutex_enter(&tep->te_ser_lock); 1303 ASSERT(tep->te_ser_count != 0); 1304 tep->te_ser_count--; 1305 mutex_exit(&tep->te_ser_lock); 1306 } 1307 } 1308 1309 /* 1310 * Hash management functions. 1311 */ 1312 1313 /* 1314 * Return TRUE if two addresses are equal, false otherwise. 1315 */ 1316 static boolean_t 1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2) 1318 { 1319 return ((ap1->ta_alen > 0) && 1320 (ap1->ta_alen == ap2->ta_alen) && 1321 (ap1->ta_zoneid == ap2->ta_zoneid) && 1322 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0)); 1323 } 1324 1325 /* 1326 * This function is called whenever an endpoint is found in the hash table. 1327 */ 1328 /* ARGSUSED0 */ 1329 static void 1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val) 1331 { 1332 tl_refhold((tl_endpt_t *)val); 1333 } 1334 1335 /* 1336 * Address hash function. 1337 */ 1338 /* ARGSUSED */ 1339 static uint_t 1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key) 1341 { 1342 tl_addr_t *ap = (tl_addr_t *)key; 1343 size_t len = ap->ta_alen; 1344 uchar_t *p = ap->ta_abuf; 1345 uint_t i, g; 1346 1347 ASSERT((len > 0) && (p != NULL)); 1348 1349 for (i = ap->ta_zoneid; len -- != 0; p++) { 1350 i = (i << 4) + (*p); 1351 if ((g = (i & 0xf0000000U)) != 0) { 1352 i ^= (g >> 24); 1353 i ^= g; 1354 } 1355 } 1356 return (i); 1357 } 1358 1359 /* 1360 * This function is used by hash lookups. It compares two generic addresses. 1361 */ 1362 static int 1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2) 1364 { 1365 #ifdef DEBUG 1366 tl_addr_t *ap1 = (tl_addr_t *)key1; 1367 tl_addr_t *ap2 = (tl_addr_t *)key2; 1368 1369 ASSERT(key1 != NULL); 1370 ASSERT(key2 != NULL); 1371 1372 ASSERT(ap1->ta_abuf != NULL); 1373 ASSERT(ap2->ta_abuf != NULL); 1374 ASSERT(ap1->ta_alen > 0); 1375 ASSERT(ap2->ta_alen > 0); 1376 #endif 1377 1378 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2)); 1379 } 1380 1381 /* 1382 * Prevent endpoint from closing if possible. 1383 * Return B_TRUE on success, B_FALSE on failure. 1384 */ 1385 static boolean_t 1386 tl_noclose(tl_endpt_t *tep) 1387 { 1388 boolean_t rc = B_FALSE; 1389 1390 mutex_enter(&tep->te_closelock); 1391 if (! tep->te_closing) { 1392 ASSERT(tep->te_closewait == 0); 1393 tep->te_closewait++; 1394 rc = B_TRUE; 1395 } 1396 mutex_exit(&tep->te_closelock); 1397 return (rc); 1398 } 1399 1400 /* 1401 * Allow endpoint to close if needed. 1402 */ 1403 static void 1404 tl_closeok(tl_endpt_t *tep) 1405 { 1406 ASSERT(tep->te_closewait > 0); 1407 mutex_enter(&tep->te_closelock); 1408 ASSERT(tep->te_closewait == 1); 1409 tep->te_closewait--; 1410 cv_signal(&tep->te_closecv); 1411 mutex_exit(&tep->te_closelock); 1412 } 1413 1414 /* 1415 * STREAMS open entry point. 1416 */ 1417 /* ARGSUSED */ 1418 static int 1419 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1420 { 1421 tl_endpt_t *tep; 1422 minor_t minor = getminor(*devp); 1423 1424 /* 1425 * Driver is called directly. Both CLONEOPEN and MODOPEN 1426 * are illegal 1427 */ 1428 if ((sflag == CLONEOPEN) || (sflag == MODOPEN)) 1429 return (ENXIO); 1430 1431 if (rq->q_ptr != NULL) 1432 return (0); 1433 1434 /* Minor number should specify the mode used for the driver. */ 1435 if ((minor >= TL_UNUSED)) 1436 return (ENXIO); 1437 1438 if (oflag & SO_SOCKSTR) { 1439 minor |= TL_SOCKET; 1440 } 1441 1442 tep = kmem_cache_alloc(tl_cache, KM_SLEEP); 1443 tep->te_refcnt = 1; 1444 tep->te_cpid = curproc->p_pid; 1445 rq->q_ptr = WR(rq)->q_ptr = tep; 1446 tep->te_state = TS_UNBND; 1447 tep->te_credp = credp; 1448 crhold(credp); 1449 tep->te_zoneid = getzoneid(); 1450 1451 tep->te_flag = minor & TL_MINOR_MASK; 1452 tep->te_transport = &tl_transports[minor]; 1453 1454 /* Allocate a unique minor number for this instance. */ 1455 tep->te_minor = (minor_t)id_alloc(tl_minors); 1456 1457 /* Reserve hash handle for bind(). */ 1458 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl); 1459 1460 /* Transport-specific initialization */ 1461 if (IS_COTS(tep)) { 1462 /* Use private serializer */ 1463 tep->te_ser = tl_serializer_alloc(KM_SLEEP); 1464 1465 /* Create list for pending connections */ 1466 list_create(&tep->te_iconp, sizeof (tl_icon_t), 1467 offsetof(tl_icon_t, ti_node)); 1468 tep->te_qlen = 0; 1469 tep->te_nicon = 0; 1470 tep->te_oconp = NULL; 1471 tep->te_conp = NULL; 1472 } else { 1473 /* Use shared serializer */ 1474 tep->te_ser = tep->te_transport->tr_serializer; 1475 bzero(&tep->te_flows, sizeof (list_node_t)); 1476 /* Create list for flow control */ 1477 list_create(&tep->te_flowlist, sizeof (tl_endpt_t), 1478 offsetof(tl_endpt_t, te_flows)); 1479 tep->te_flowq = NULL; 1480 tep->te_lastep = NULL; 1481 1482 } 1483 1484 /* Initialize endpoint address */ 1485 if (IS_SOCKET(tep)) { 1486 /* Socket-specific address handling. */ 1487 tep->te_alen = TL_SOUX_ADDRLEN; 1488 tep->te_abuf = &tep->te_uxaddr; 1489 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 1490 tep->te_magic = SOU_MAGIC_IMPLICIT; 1491 } else { 1492 tep->te_alen = -1; 1493 tep->te_abuf = NULL; 1494 } 1495 1496 /* clone the driver */ 1497 *devp = makedevice(getmajor(*devp), tep->te_minor); 1498 1499 tep->te_rq = rq; 1500 tep->te_wq = WR(rq); 1501 1502 #ifdef _ILP32 1503 if (IS_SOCKET(tep)) 1504 tep->te_acceptor_id = tep->te_minor; 1505 else 1506 tep->te_acceptor_id = (t_uscalar_t)rq; 1507 #else 1508 tep->te_acceptor_id = tep->te_minor; 1509 #endif /* _ILP32 */ 1510 1511 1512 qprocson(rq); 1513 1514 /* 1515 * Insert acceptor ID in the hash. The AI hash always sleeps on 1516 * insertion so insertion can't fail. 1517 */ 1518 (void) mod_hash_insert(tep->te_transport->tr_ai_hash, 1519 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1520 (mod_hash_val_t)tep); 1521 1522 return (0); 1523 } 1524 1525 /* ARGSUSED1 */ 1526 static int 1527 tl_close(queue_t *rq, int flag, cred_t *credp) 1528 { 1529 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 1530 tl_endpt_t *elp = NULL; 1531 queue_t *wq = tep->te_wq; 1532 int rc; 1533 1534 ASSERT(wq == WR(rq)); 1535 1536 /* 1537 * Remove the endpoint from acceptor hash. 1538 */ 1539 rc = mod_hash_remove(tep->te_transport->tr_ai_hash, 1540 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id, 1541 (mod_hash_val_t *)&elp); 1542 ASSERT(rc == 0 && tep == elp); 1543 if ((rc != 0) || (tep != elp)) { 1544 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1545 SL_TRACE|SL_ERROR, 1546 "tl_close:inconsistency in AI hash")); 1547 } 1548 1549 /* 1550 * Wait till close is safe, then mark endpoint as closing. 1551 */ 1552 mutex_enter(&tep->te_closelock); 1553 while (tep->te_closewait) 1554 cv_wait(&tep->te_closecv, &tep->te_closelock); 1555 tep->te_closing = B_TRUE; 1556 /* 1557 * Will wait for the serializer part of the close to finish, so set 1558 * te_closewait now. 1559 */ 1560 tep->te_closewait = 1; 1561 tep->te_nowsrv = B_FALSE; 1562 mutex_exit(&tep->te_closelock); 1563 1564 /* 1565 * tl_close_ser doesn't drop reference, so no need to tl_refhold. 1566 * It is safe because close will wait for tl_close_ser to finish. 1567 */ 1568 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp); 1569 1570 /* 1571 * Wait for the first phase of close to complete before qprocsoff(). 1572 */ 1573 mutex_enter(&tep->te_closelock); 1574 while (tep->te_closewait) 1575 cv_wait(&tep->te_closecv, &tep->te_closelock); 1576 mutex_exit(&tep->te_closelock); 1577 1578 qprocsoff(rq); 1579 1580 if (tep->te_bufcid) { 1581 qunbufcall(rq, tep->te_bufcid); 1582 tep->te_bufcid = 0; 1583 } 1584 if (tep->te_timoutid) { 1585 (void) quntimeout(rq, tep->te_timoutid); 1586 tep->te_timoutid = 0; 1587 } 1588 1589 /* 1590 * Finish close behind serializer. 1591 * 1592 * For a CLTS endpoint increase a refcount and continue close processing 1593 * with serializer protection. This processing may happen asynchronously 1594 * with the completion of tl_close(). 1595 * 1596 * Fot a COTS endpoint wait before destroying tep since the serializer 1597 * may go away together with tep and we need to destroy serializer 1598 * outside of serializer context. 1599 */ 1600 ASSERT(tep->te_closewait == 0); 1601 if (IS_COTS(tep)) 1602 tep->te_closewait = 1; 1603 else 1604 tl_refhold(tep); 1605 1606 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp); 1607 1608 /* 1609 * For connection-oriented transports wait for all serializer activity 1610 * to settle down. 1611 */ 1612 if (IS_COTS(tep)) { 1613 mutex_enter(&tep->te_closelock); 1614 while (tep->te_closewait) 1615 cv_wait(&tep->te_closecv, &tep->te_closelock); 1616 mutex_exit(&tep->te_closelock); 1617 } 1618 1619 crfree(tep->te_credp); 1620 tep->te_credp = NULL; 1621 tep->te_wq = NULL; 1622 tl_refrele(tep); 1623 /* 1624 * tep is likely to be destroyed now, so can't reference it any more. 1625 */ 1626 1627 rq->q_ptr = wq->q_ptr = NULL; 1628 return (0); 1629 } 1630 1631 /* 1632 * First phase of close processing done behind the serializer. 1633 * 1634 * Do not drop the reference in the end - tl_close() wants this reference to 1635 * stay. 1636 */ 1637 /* ARGSUSED0 */ 1638 static void 1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep) 1640 { 1641 ASSERT(tep->te_closing); 1642 ASSERT(tep->te_closewait == 1); 1643 ASSERT(!(tep->te_flag & TL_CLOSE_SER)); 1644 1645 tep->te_flag |= TL_CLOSE_SER; 1646 1647 /* 1648 * Drain out all messages on queue except for TL_TICOTS where the 1649 * abortive release semantics permit discarding of data on close 1650 */ 1651 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) { 1652 tl_wsrv_ser(NULL, tep); 1653 } 1654 1655 /* Remove address from hash table. */ 1656 tl_addr_unbind(tep); 1657 /* 1658 * qprocsoff() gets confused when q->q_next is not NULL on the write 1659 * queue of the driver, so clear these before qprocsoff() is called. 1660 * Also clear q_next for the peer since this queue is going away. 1661 */ 1662 if (IS_COTS(tep) && !IS_SOCKET(tep)) { 1663 tl_endpt_t *peer_tep = tep->te_conp; 1664 1665 tep->te_wq->q_next = NULL; 1666 if ((peer_tep != NULL) && !peer_tep->te_closing) 1667 peer_tep->te_wq->q_next = NULL; 1668 } 1669 1670 tep->te_rq = NULL; 1671 1672 /* wake up tl_close() */ 1673 tl_closeok(tep); 1674 tl_serializer_exit(tep); 1675 } 1676 1677 /* 1678 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop 1679 * the reference for CLTS. 1680 * 1681 * Called from serializer. Should drop reference count for CLTS only. 1682 */ 1683 /* ARGSUSED0 */ 1684 static void 1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep) 1686 { 1687 ASSERT(tep->te_closing); 1688 IMPLY(IS_CLTS(tep), tep->te_closewait == 0); 1689 IMPLY(IS_COTS(tep), tep->te_closewait == 1); 1690 1691 tep->te_state = -1; /* Uninitialized */ 1692 if (IS_COTS(tep)) { 1693 tl_co_unconnect(tep); 1694 } else { 1695 /* Connectionless specific cleanup */ 1696 TL_REMOVE_PEER(tep->te_lastep); 1697 /* 1698 * Backenable anybody that is flow controlled waiting for 1699 * this endpoint. 1700 */ 1701 tl_cl_backenable(tep); 1702 if (tep->te_flowq != NULL) { 1703 list_remove(&(tep->te_flowq->te_flowlist), tep); 1704 tep->te_flowq = NULL; 1705 } 1706 } 1707 1708 tl_serializer_exit(tep); 1709 if (IS_COTS(tep)) 1710 tl_closeok(tep); 1711 else 1712 tl_refrele(tep); 1713 } 1714 1715 /* 1716 * STREAMS write-side put procedure. 1717 * Enter serializer for most of the processing. 1718 * 1719 * The T_CONN_REQ is processed outside of serializer. 1720 */ 1721 static void 1722 tl_wput(queue_t *wq, mblk_t *mp) 1723 { 1724 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1725 ssize_t msz = MBLKL(mp); 1726 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 1727 tlproc_t *tl_proc = NULL; 1728 1729 switch (DB_TYPE(mp)) { 1730 case M_DATA: 1731 /* Only valid for connection-oriented transports */ 1732 if (IS_CLTS(tep)) { 1733 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1734 SL_TRACE|SL_ERROR, 1735 "tl_wput:M_DATA invalid for ticlts driver")); 1736 tl_merror(wq, mp, EPROTO); 1737 return; 1738 } 1739 tl_proc = tl_wput_data_ser; 1740 break; 1741 1742 case M_IOCTL: 1743 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 1744 case TL_IOC_CREDOPT: 1745 /* FALLTHROUGH */ 1746 case TL_IOC_UCREDOPT: 1747 /* 1748 * Serialize endpoint state change. 1749 */ 1750 tl_proc = tl_do_ioctl_ser; 1751 break; 1752 1753 default: 1754 miocnak(wq, mp, 0, EINVAL); 1755 return; 1756 } 1757 break; 1758 1759 case M_FLUSH: 1760 /* 1761 * do canonical M_FLUSH processing 1762 */ 1763 if (*mp->b_rptr & FLUSHW) { 1764 flushq(wq, FLUSHALL); 1765 *mp->b_rptr &= ~FLUSHW; 1766 } 1767 if (*mp->b_rptr & FLUSHR) { 1768 flushq(RD(wq), FLUSHALL); 1769 qreply(wq, mp); 1770 } else { 1771 freemsg(mp); 1772 } 1773 return; 1774 1775 case M_PROTO: 1776 if (msz < sizeof (prim->type)) { 1777 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1778 SL_TRACE|SL_ERROR, 1779 "tl_wput:M_PROTO data too short")); 1780 tl_merror(wq, mp, EPROTO); 1781 return; 1782 } 1783 switch (prim->type) { 1784 case T_OPTMGMT_REQ: 1785 case T_SVR4_OPTMGMT_REQ: 1786 /* 1787 * Process TPI option management requests immediately 1788 * in put procedure regardless of in-order processing 1789 * of already queued messages. 1790 * (Note: This driver supports AF_UNIX socket 1791 * implementation. Unless we implement this processing, 1792 * setsockopt() on socket endpoint will block on flow 1793 * controlled endpoints which it should not. That is 1794 * required for successful execution of VSU socket tests 1795 * and is consistent with BSD socket behavior). 1796 */ 1797 tl_optmgmt(wq, mp); 1798 return; 1799 case O_T_BIND_REQ: 1800 case T_BIND_REQ: 1801 tl_proc = tl_bind_ser; 1802 break; 1803 case T_CONN_REQ: 1804 if (IS_CLTS(tep)) { 1805 tl_merror(wq, mp, EPROTO); 1806 return; 1807 } 1808 tl_conn_req(wq, mp); 1809 return; 1810 case T_DATA_REQ: 1811 case T_OPTDATA_REQ: 1812 case T_EXDATA_REQ: 1813 case T_ORDREL_REQ: 1814 tl_proc = tl_putq_ser; 1815 break; 1816 case T_UNITDATA_REQ: 1817 if (IS_COTS(tep) || 1818 (msz < sizeof (struct T_unitdata_req))) { 1819 tl_merror(wq, mp, EPROTO); 1820 return; 1821 } 1822 if ((tep->te_state == TS_IDLE) && !wq->q_first) { 1823 tl_proc = tl_unitdata_ser; 1824 } else { 1825 tl_proc = tl_putq_ser; 1826 } 1827 break; 1828 default: 1829 /* 1830 * process in service procedure if message already 1831 * queued (maintain in-order processing) 1832 */ 1833 if (wq->q_first != NULL) { 1834 tl_proc = tl_putq_ser; 1835 } else { 1836 tl_proc = tl_wput_ser; 1837 } 1838 break; 1839 } 1840 break; 1841 1842 case M_PCPROTO: 1843 /* 1844 * Check that the message has enough data to figure out TPI 1845 * primitive. 1846 */ 1847 if (msz < sizeof (prim->type)) { 1848 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1849 SL_TRACE|SL_ERROR, 1850 "tl_wput:M_PCROTO data too short")); 1851 tl_merror(wq, mp, EPROTO); 1852 return; 1853 } 1854 switch (prim->type) { 1855 case T_CAPABILITY_REQ: 1856 tl_capability_req(mp, tep); 1857 return; 1858 case T_INFO_REQ: 1859 tl_proc = tl_info_req_ser; 1860 break; 1861 case T_ADDR_REQ: 1862 tl_proc = tl_addr_req_ser; 1863 break; 1864 1865 default: 1866 (void) (STRLOG(TL_ID, tep->te_minor, 1, 1867 SL_TRACE|SL_ERROR, 1868 "tl_wput:unknown TPI msg primitive")); 1869 tl_merror(wq, mp, EPROTO); 1870 return; 1871 } 1872 break; 1873 default: 1874 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 1875 "tl_wput:default:unexpected Streams message")); 1876 freemsg(mp); 1877 return; 1878 } 1879 1880 /* 1881 * Continue processing via serializer. 1882 */ 1883 ASSERT(tl_proc != NULL); 1884 tl_refhold(tep); 1885 tl_serializer_enter(tep, tl_proc, mp); 1886 } 1887 1888 /* 1889 * Place message on the queue while preserving order. 1890 */ 1891 static void 1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep) 1893 { 1894 if (tep->te_closing) { 1895 tl_wput_ser(mp, tep); 1896 } else { 1897 TL_PUTQ(tep, mp); 1898 tl_serializer_exit(tep); 1899 tl_refrele(tep); 1900 } 1901 1902 } 1903 1904 static void 1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep) 1906 { 1907 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 1908 1909 switch (DB_TYPE(mp)) { 1910 case M_DATA: 1911 tl_data(mp, tep); 1912 break; 1913 case M_PROTO: 1914 tl_do_proto(mp, tep); 1915 break; 1916 default: 1917 freemsg(mp); 1918 break; 1919 } 1920 } 1921 1922 /* 1923 * Write side put procedure called from serializer. 1924 */ 1925 static void 1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep) 1927 { 1928 tl_wput_common_ser(mp, tep); 1929 tl_serializer_exit(tep); 1930 tl_refrele(tep); 1931 } 1932 1933 /* 1934 * M_DATA processing. Called from serializer. 1935 */ 1936 static void 1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep) 1938 { 1939 tl_endpt_t *peer_tep = tep->te_conp; 1940 queue_t *peer_rq; 1941 1942 ASSERT(DB_TYPE(mp) == M_DATA); 1943 ASSERT(IS_COTS(tep)); 1944 1945 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer); 1946 1947 /* 1948 * fastpath for data. Ignore flow control if tep is closing. 1949 */ 1950 if ((peer_tep != NULL) && 1951 !peer_tep->te_closing && 1952 ((tep->te_state == TS_DATA_XFER) || 1953 (tep->te_state == TS_WREQ_ORDREL)) && 1954 (tep->te_wq != NULL) && 1955 (tep->te_wq->q_first == NULL) && 1956 ((peer_tep->te_state == TS_DATA_XFER) || 1957 (peer_tep->te_state == TS_WREQ_ORDREL)) && 1958 ((peer_rq = peer_tep->te_rq) != NULL) && 1959 (canputnext(peer_rq) || tep->te_closing)) { 1960 putnext(peer_rq, mp); 1961 } else if (tep->te_closing) { 1962 /* 1963 * It is possible that by the time we got here tep started to 1964 * close. If the write queue is not empty, and the state is 1965 * TS_DATA_XFER the data should be delivered in order, so we 1966 * call putq() instead of freeing the data. 1967 */ 1968 if ((tep->te_wq != NULL) && 1969 ((tep->te_state == TS_DATA_XFER) || 1970 (tep->te_state == TS_WREQ_ORDREL))) { 1971 TL_PUTQ(tep, mp); 1972 } else { 1973 freemsg(mp); 1974 } 1975 } else { 1976 TL_PUTQ(tep, mp); 1977 } 1978 1979 tl_serializer_exit(tep); 1980 tl_refrele(tep); 1981 } 1982 1983 /* 1984 * Write side service routine. 1985 * 1986 * All actual processing happens within serializer which is entered 1987 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new 1988 * messages that need processing may have arrived, so tl_wsrv repeats until 1989 * queue is empty or te_nowsrv is set. 1990 */ 1991 static void 1992 tl_wsrv(queue_t *wq) 1993 { 1994 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 1995 1996 while ((wq->q_first != NULL) && !tep->te_nowsrv) { 1997 mutex_enter(&tep->te_srv_lock); 1998 ASSERT(tep->te_wsrv_active == B_FALSE); 1999 tep->te_wsrv_active = B_TRUE; 2000 mutex_exit(&tep->te_srv_lock); 2001 2002 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp); 2003 2004 /* 2005 * Wait for serializer job to complete. 2006 */ 2007 mutex_enter(&tep->te_srv_lock); 2008 while (tep->te_wsrv_active) { 2009 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2010 } 2011 cv_signal(&tep->te_srv_cv); 2012 mutex_exit(&tep->te_srv_lock); 2013 } 2014 } 2015 2016 /* 2017 * Serialized write side processing of the STREAMS queue. 2018 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp 2019 * is NULL. 2020 */ 2021 static void 2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep) 2023 { 2024 mblk_t *mp; 2025 queue_t *wq = tep->te_wq; 2026 2027 ASSERT(wq != NULL); 2028 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) { 2029 tl_wput_common_ser(mp, tep); 2030 } 2031 2032 /* 2033 * Wakeup service routine unless called from close. 2034 * If ser_mp is specified, the caller is tl_wsrv(). 2035 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't 2036 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should 2037 * be no matching tl_serializer_exit() in this case. 2038 * Also, there is no need to wakeup anyone since tl_close_ser() is not 2039 * waiting on te_srv_cv. 2040 */ 2041 if (ser_mp != NULL) { 2042 /* 2043 * We are called from tl_wsrv. 2044 */ 2045 mutex_enter(&tep->te_srv_lock); 2046 ASSERT(tep->te_wsrv_active); 2047 tep->te_wsrv_active = B_FALSE; 2048 cv_signal(&tep->te_srv_cv); 2049 mutex_exit(&tep->te_srv_lock); 2050 tl_serializer_exit(tep); 2051 } 2052 } 2053 2054 /* 2055 * Called when the stream is backenabled. Enter serializer and qenable everyone 2056 * flow controlled by tep. 2057 * 2058 * NOTE: The service routine should enter serializer synchronously. Otherwise it 2059 * is possible that two instances of tl_rsrv will be running reusing the same 2060 * rsrv mblk. 2061 */ 2062 static void 2063 tl_rsrv(queue_t *rq) 2064 { 2065 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr; 2066 2067 ASSERT(rq->q_first == NULL); 2068 ASSERT(tep->te_rsrv_active == 0); 2069 2070 tep->te_rsrv_active = B_TRUE; 2071 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp); 2072 /* 2073 * Wait for serializer job to complete. 2074 */ 2075 mutex_enter(&tep->te_srv_lock); 2076 while (tep->te_rsrv_active) { 2077 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock); 2078 } 2079 cv_signal(&tep->te_srv_cv); 2080 mutex_exit(&tep->te_srv_lock); 2081 } 2082 2083 /* ARGSUSED */ 2084 static void 2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep) 2086 { 2087 tl_endpt_t *peer_tep; 2088 2089 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) { 2090 tl_cl_backenable(tep); 2091 } else if ( 2092 IS_COTS(tep) && 2093 ((peer_tep = tep->te_conp) != NULL) && 2094 !peer_tep->te_closing && 2095 ((tep->te_state == TS_DATA_XFER) || 2096 (tep->te_state == TS_WIND_ORDREL)|| 2097 (tep->te_state == TS_WREQ_ORDREL))) { 2098 TL_QENABLE(peer_tep); 2099 } 2100 2101 /* 2102 * Wakeup read side service routine. 2103 */ 2104 mutex_enter(&tep->te_srv_lock); 2105 ASSERT(tep->te_rsrv_active); 2106 tep->te_rsrv_active = B_FALSE; 2107 cv_signal(&tep->te_srv_cv); 2108 mutex_exit(&tep->te_srv_lock); 2109 tl_serializer_exit(tep); 2110 } 2111 2112 /* 2113 * process M_PROTO messages. Always called from serializer. 2114 */ 2115 static void 2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep) 2117 { 2118 ssize_t msz = MBLKL(mp); 2119 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 2120 2121 /* Message size was validated by tl_wput(). */ 2122 ASSERT(msz >= sizeof (prim->type)); 2123 2124 switch (prim->type) { 2125 case T_UNBIND_REQ: 2126 tl_unbind(mp, tep); 2127 break; 2128 2129 case T_ADDR_REQ: 2130 tl_addr_req(mp, tep); 2131 break; 2132 2133 case O_T_CONN_RES: 2134 case T_CONN_RES: 2135 if (IS_CLTS(tep)) { 2136 tl_merror(tep->te_wq, mp, EPROTO); 2137 break; 2138 } 2139 tl_conn_res(mp, tep); 2140 break; 2141 2142 case T_DISCON_REQ: 2143 if (IS_CLTS(tep)) { 2144 tl_merror(tep->te_wq, mp, EPROTO); 2145 break; 2146 } 2147 tl_discon_req(mp, tep); 2148 break; 2149 2150 case T_DATA_REQ: 2151 if (IS_CLTS(tep)) { 2152 tl_merror(tep->te_wq, mp, EPROTO); 2153 break; 2154 } 2155 tl_data(mp, tep); 2156 break; 2157 2158 case T_OPTDATA_REQ: 2159 if (IS_CLTS(tep)) { 2160 tl_merror(tep->te_wq, mp, EPROTO); 2161 break; 2162 } 2163 tl_data(mp, tep); 2164 break; 2165 2166 case T_EXDATA_REQ: 2167 if (IS_CLTS(tep)) { 2168 tl_merror(tep->te_wq, mp, EPROTO); 2169 break; 2170 } 2171 tl_exdata(mp, tep); 2172 break; 2173 2174 case T_ORDREL_REQ: 2175 if (! IS_COTSORD(tep)) { 2176 tl_merror(tep->te_wq, mp, EPROTO); 2177 break; 2178 } 2179 tl_ordrel(mp, tep); 2180 break; 2181 2182 case T_UNITDATA_REQ: 2183 if (IS_COTS(tep)) { 2184 tl_merror(tep->te_wq, mp, EPROTO); 2185 break; 2186 } 2187 tl_unitdata(mp, tep); 2188 break; 2189 2190 default: 2191 tl_merror(tep->te_wq, mp, EPROTO); 2192 break; 2193 } 2194 } 2195 2196 /* 2197 * Process ioctl from serializer. 2198 * This is a wrapper around tl_do_ioctl(). 2199 */ 2200 static void 2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep) 2202 { 2203 if (! tep->te_closing) 2204 tl_do_ioctl(mp, tep); 2205 else 2206 freemsg(mp); 2207 2208 tl_serializer_exit(tep); 2209 tl_refrele(tep); 2210 } 2211 2212 static void 2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep) 2214 { 2215 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr; 2216 int cmd = iocbp->ioc_cmd; 2217 queue_t *wq = tep->te_wq; 2218 int error; 2219 int thisopt, otheropt; 2220 2221 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT)); 2222 2223 switch (cmd) { 2224 case TL_IOC_CREDOPT: 2225 if (cmd == TL_IOC_CREDOPT) { 2226 thisopt = TL_SETCRED; 2227 otheropt = TL_SETUCRED; 2228 } else { 2229 /* FALLTHROUGH */ 2230 case TL_IOC_UCREDOPT: 2231 thisopt = TL_SETUCRED; 2232 otheropt = TL_SETCRED; 2233 } 2234 /* 2235 * The credentials passing does not apply to sockets. 2236 * Only one of the cred options can be set at a given time. 2237 */ 2238 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) { 2239 miocnak(wq, mp, 0, EINVAL); 2240 return; 2241 } 2242 2243 /* 2244 * Turn on generation of credential options for 2245 * T_conn_req, T_conn_con, T_unidata_ind. 2246 */ 2247 error = miocpullup(mp, sizeof (uint32_t)); 2248 if (error != 0) { 2249 miocnak(wq, mp, 0, error); 2250 return; 2251 } 2252 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) { 2253 miocnak(wq, mp, 0, EINVAL); 2254 return; 2255 } 2256 2257 if (*(uint32_t *)mp->b_cont->b_rptr) 2258 tep->te_flag |= thisopt; 2259 else 2260 tep->te_flag &= ~thisopt; 2261 2262 miocack(wq, mp, 0, 0); 2263 break; 2264 2265 default: 2266 /* Should not be here */ 2267 miocnak(wq, mp, 0, EINVAL); 2268 break; 2269 } 2270 } 2271 2272 2273 /* 2274 * send T_ERROR_ACK 2275 * Note: assumes enough memory or caller passed big enough mp 2276 * - no recovery from allocb failures 2277 */ 2278 2279 static void 2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err, 2281 t_scalar_t unix_err, t_scalar_t type) 2282 { 2283 struct T_error_ack *err_ack; 2284 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2285 M_PCPROTO, T_ERROR_ACK); 2286 2287 if (ackmp == NULL) { 2288 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR, 2289 "tl_error_ack:out of mblk memory")); 2290 tl_merror(wq, NULL, ENOSR); 2291 return; 2292 } 2293 err_ack = (struct T_error_ack *)ackmp->b_rptr; 2294 err_ack->ERROR_prim = type; 2295 err_ack->TLI_error = tli_err; 2296 err_ack->UNIX_error = unix_err; 2297 2298 /* 2299 * send error ack message 2300 */ 2301 qreply(wq, ackmp); 2302 } 2303 2304 2305 2306 /* 2307 * send T_OK_ACK 2308 * Note: assumes enough memory or caller passed big enough mp 2309 * - no recovery from allocb failures 2310 */ 2311 static void 2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type) 2313 { 2314 struct T_ok_ack *ok_ack; 2315 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack), 2316 M_PCPROTO, T_OK_ACK); 2317 2318 if (ackmp == NULL) { 2319 tl_merror(wq, NULL, ENOMEM); 2320 return; 2321 } 2322 2323 ok_ack = (struct T_ok_ack *)ackmp->b_rptr; 2324 ok_ack->CORRECT_prim = type; 2325 2326 (void) qreply(wq, ackmp); 2327 } 2328 2329 /* 2330 * Process T_BIND_REQ and O_T_BIND_REQ from serializer. 2331 * This is a wrapper around tl_bind(). 2332 */ 2333 static void 2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep) 2335 { 2336 if (! tep->te_closing) 2337 tl_bind(mp, tep); 2338 else 2339 freemsg(mp); 2340 2341 tl_serializer_exit(tep); 2342 tl_refrele(tep); 2343 } 2344 2345 /* 2346 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests. 2347 * Assumes that the endpoint is in the unbound. 2348 */ 2349 static void 2350 tl_bind(mblk_t *mp, tl_endpt_t *tep) 2351 { 2352 queue_t *wq = tep->te_wq; 2353 struct T_bind_ack *b_ack; 2354 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr; 2355 mblk_t *ackmp, *bamp; 2356 soux_addr_t ux_addr; 2357 t_uscalar_t qlen = 0; 2358 t_scalar_t alen, aoff; 2359 tl_addr_t addr_req; 2360 void *addr_startp; 2361 ssize_t msz = MBLKL(mp), basize; 2362 t_scalar_t tli_err = 0, unix_err = 0; 2363 t_scalar_t save_prim_type = bind->PRIM_type; 2364 t_scalar_t save_state = tep->te_state; 2365 2366 if (tep->te_state != TS_UNBND) { 2367 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2368 SL_TRACE|SL_ERROR, 2369 "tl_wput:bind_request:out of state, state=%d", 2370 tep->te_state)); 2371 tli_err = TOUTSTATE; 2372 goto error; 2373 } 2374 2375 if (msz < sizeof (struct T_bind_req)) { 2376 tli_err = TSYSERR; unix_err = EINVAL; 2377 goto error; 2378 } 2379 2380 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state); 2381 2382 ASSERT((bind->PRIM_type == O_T_BIND_REQ) || 2383 (bind->PRIM_type == T_BIND_REQ)); 2384 2385 alen = bind->ADDR_length; 2386 aoff = bind->ADDR_offset; 2387 2388 /* negotiate max conn req pending */ 2389 if (IS_COTS(tep)) { 2390 qlen = bind->CONIND_number; 2391 if (qlen > tl_maxqlen) 2392 qlen = tl_maxqlen; 2393 } 2394 2395 /* 2396 * Reserve hash handle. It can only be NULL if the endpoint is unbound 2397 * and bound again. 2398 */ 2399 if ((tep->te_hash_hndl == NULL) && 2400 ((tep->te_flag & TL_ADDRHASHED) == 0) && 2401 mod_hash_reserve_nosleep(tep->te_addrhash, 2402 &tep->te_hash_hndl) != 0) { 2403 tli_err = TSYSERR; unix_err = ENOSR; 2404 goto error; 2405 } 2406 2407 /* 2408 * Verify address correctness. 2409 */ 2410 if (IS_SOCKET(tep)) { 2411 ASSERT(bind->PRIM_type == O_T_BIND_REQ); 2412 2413 if ((alen != TL_SOUX_ADDRLEN) || 2414 (aoff < 0) || 2415 (aoff + alen > msz)) { 2416 (void) (STRLOG(TL_ID, tep->te_minor, 2417 1, SL_TRACE|SL_ERROR, 2418 "tl_bind: invalid socket addr")); 2419 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2420 tli_err = TSYSERR; unix_err = EINVAL; 2421 goto error; 2422 } 2423 /* Copy address from message to local buffer. */ 2424 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr)); 2425 /* 2426 * Check that we got correct address from sockets 2427 */ 2428 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) && 2429 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) { 2430 (void) (STRLOG(TL_ID, tep->te_minor, 2431 1, SL_TRACE|SL_ERROR, 2432 "tl_bind: invalid socket magic")); 2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2434 tli_err = TSYSERR; unix_err = EINVAL; 2435 goto error; 2436 } 2437 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) && 2438 (ux_addr.soua_vp != NULL)) { 2439 (void) (STRLOG(TL_ID, tep->te_minor, 2440 1, SL_TRACE|SL_ERROR, 2441 "tl_bind: implicit addr non-empty")); 2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2443 tli_err = TSYSERR; unix_err = EINVAL; 2444 goto error; 2445 } 2446 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) && 2447 (ux_addr.soua_vp == NULL)) { 2448 (void) (STRLOG(TL_ID, tep->te_minor, 2449 1, SL_TRACE|SL_ERROR, 2450 "tl_bind: explicit addr empty")); 2451 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2452 tli_err = TSYSERR; unix_err = EINVAL; 2453 goto error; 2454 } 2455 } else { 2456 if ((alen > 0) && ((aoff < 0) || 2457 ((ssize_t)(aoff + alen) > msz) || 2458 ((aoff + alen) < 0))) { 2459 (void) (STRLOG(TL_ID, tep->te_minor, 2460 1, SL_TRACE|SL_ERROR, 2461 "tl_bind: invalid message")); 2462 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2463 tli_err = TSYSERR; unix_err = EINVAL; 2464 goto error; 2465 } 2466 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) { 2467 (void) (STRLOG(TL_ID, tep->te_minor, 2468 1, SL_TRACE|SL_ERROR, 2469 "tl_bind: bad addr in message")); 2470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2471 tli_err = TBADADDR; 2472 goto error; 2473 } 2474 #ifdef DEBUG 2475 /* 2476 * Mild form of ASSERT()ion to detect broken TPI apps. 2477 * if (! assertion) 2478 * log warning; 2479 */ 2480 if (! ((alen == 0 && aoff == 0) || 2481 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) { 2482 (void) (STRLOG(TL_ID, tep->te_minor, 2483 3, SL_TRACE|SL_ERROR, 2484 "tl_bind: addr overlaps TPI message")); 2485 } 2486 #endif 2487 } 2488 2489 /* 2490 * Bind the address provided or allocate one if requested. 2491 * Allow rebinds with a new qlen value. 2492 */ 2493 if (IS_SOCKET(tep)) { 2494 /* 2495 * For anonymous requests the te_ap is already set up properly 2496 * so use minor number as an address. 2497 * For explicit requests need to check whether the address is 2498 * already in use. 2499 */ 2500 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) { 2501 int rc; 2502 2503 if (tep->te_flag & TL_ADDRHASHED) { 2504 ASSERT(IS_COTS(tep) && tep->te_qlen == 0); 2505 if (tep->te_vp == ux_addr.soua_vp) 2506 goto skip_addr_bind; 2507 else /* Rebind to a new address. */ 2508 tl_addr_unbind(tep); 2509 } 2510 /* 2511 * Insert address in the hash if it is not already 2512 * there. Since we use preallocated handle, the insert 2513 * can fail only if the key is already present. 2514 */ 2515 rc = mod_hash_insert_reserve(tep->te_addrhash, 2516 (mod_hash_key_t)ux_addr.soua_vp, 2517 (mod_hash_val_t)tep, tep->te_hash_hndl); 2518 2519 if (rc != 0) { 2520 ASSERT(rc == MH_ERR_DUPLICATE); 2521 /* 2522 * Violate O_T_BIND_REQ semantics and fail with 2523 * TADDRBUSY - sockets will not use any address 2524 * other than supplied one for explicit binds. 2525 */ 2526 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2527 SL_TRACE|SL_ERROR, 2528 "tl_bind:requested addr %p is busy", 2529 ux_addr.soua_vp)); 2530 tli_err = TADDRBUSY; unix_err = 0; 2531 goto error; 2532 } 2533 tep->te_uxaddr = ux_addr; 2534 tep->te_flag |= TL_ADDRHASHED; 2535 tep->te_hash_hndl = NULL; 2536 } 2537 } else if (alen == 0) { 2538 /* 2539 * assign any free address 2540 */ 2541 if (! tl_get_any_addr(tep, NULL)) { 2542 (void) (STRLOG(TL_ID, tep->te_minor, 2543 1, SL_TRACE|SL_ERROR, 2544 "tl_bind:failed to get buffer for any " 2545 "address")); 2546 tli_err = TSYSERR; unix_err = ENOSR; 2547 goto error; 2548 } 2549 } else { 2550 addr_req.ta_alen = alen; 2551 addr_req.ta_abuf = (mp->b_rptr + aoff); 2552 addr_req.ta_zoneid = tep->te_zoneid; 2553 2554 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 2555 if (tep->te_abuf == NULL) { 2556 tli_err = TSYSERR; unix_err = ENOSR; 2557 goto error; 2558 } 2559 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen); 2560 tep->te_alen = alen; 2561 2562 if (mod_hash_insert_reserve(tep->te_addrhash, 2563 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 2564 tep->te_hash_hndl) != 0) { 2565 if (save_prim_type == T_BIND_REQ) { 2566 /* 2567 * The bind semantics for this primitive 2568 * require a failure if the exact address 2569 * requested is busy 2570 */ 2571 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2572 SL_TRACE|SL_ERROR, 2573 "tl_bind:requested addr is busy")); 2574 tli_err = TADDRBUSY; unix_err = 0; 2575 goto error; 2576 } 2577 2578 /* 2579 * O_T_BIND_REQ semantics say if address if requested 2580 * address is busy, bind to any available free address 2581 */ 2582 if (! tl_get_any_addr(tep, &addr_req)) { 2583 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2584 SL_TRACE|SL_ERROR, 2585 "tl_bind:unable to get any addr buf")); 2586 tli_err = TSYSERR; unix_err = ENOMEM; 2587 goto error; 2588 } 2589 } else { 2590 tep->te_flag |= TL_ADDRHASHED; 2591 tep->te_hash_hndl = NULL; 2592 } 2593 } 2594 2595 ASSERT(tep->te_alen >= 0); 2596 2597 skip_addr_bind: 2598 /* 2599 * prepare T_BIND_ACK TPI message 2600 */ 2601 basize = sizeof (struct T_bind_ack) + tep->te_alen; 2602 bamp = reallocb(mp, basize, 0); 2603 if (bamp == NULL) { 2604 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2605 "tl_wput:tl_bind: allocb failed")); 2606 /* 2607 * roll back state changes 2608 */ 2609 tl_addr_unbind(tep); 2610 tep->te_state = TS_UNBND; 2611 tl_memrecover(wq, mp, basize); 2612 return; 2613 } 2614 2615 DB_TYPE(bamp) = M_PCPROTO; 2616 bamp->b_wptr = bamp->b_rptr + basize; 2617 b_ack = (struct T_bind_ack *)bamp->b_rptr; 2618 b_ack->PRIM_type = T_BIND_ACK; 2619 b_ack->CONIND_number = qlen; 2620 b_ack->ADDR_length = tep->te_alen; 2621 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack); 2622 addr_startp = bamp->b_rptr + b_ack->ADDR_offset; 2623 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 2624 2625 if (IS_COTS(tep)) { 2626 tep->te_qlen = qlen; 2627 if (qlen > 0) 2628 tep->te_flag |= TL_LISTENER; 2629 } 2630 2631 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state); 2632 /* 2633 * send T_BIND_ACK message 2634 */ 2635 (void) qreply(wq, bamp); 2636 return; 2637 2638 error: 2639 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0); 2640 if (ackmp == NULL) { 2641 /* 2642 * roll back state changes 2643 */ 2644 tep->te_state = save_state; 2645 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2646 return; 2647 } 2648 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 2649 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type); 2650 } 2651 2652 /* 2653 * Process T_UNBIND_REQ. 2654 * Called from serializer. 2655 */ 2656 static void 2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep) 2658 { 2659 queue_t *wq; 2660 mblk_t *ackmp; 2661 2662 if (tep->te_closing) { 2663 freemsg(mp); 2664 return; 2665 } 2666 2667 wq = tep->te_wq; 2668 2669 /* 2670 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK 2671 * ==> allocate for T_ERROR_ACK (known max) 2672 */ 2673 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) { 2674 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2675 return; 2676 } 2677 /* 2678 * memory resources committed 2679 * Note: no message validation. T_UNBIND_REQ message is 2680 * same size as PRIM_type field so already verified earlier. 2681 */ 2682 2683 /* 2684 * validate state 2685 */ 2686 if (tep->te_state != TS_IDLE) { 2687 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2688 SL_TRACE|SL_ERROR, 2689 "tl_wput:T_UNBIND_REQ:out of state, state=%d", 2690 tep->te_state)); 2691 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ); 2692 return; 2693 } 2694 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state); 2695 2696 /* 2697 * TPI says on T_UNBIND_REQ: 2698 * send up a M_FLUSH to flush both 2699 * read and write queues 2700 */ 2701 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 2702 2703 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 || 2704 tep->te_magic != SOU_MAGIC_EXPLICIT) { 2705 2706 /* 2707 * Sockets use bind with qlen==0 followed by bind() to 2708 * the same address with qlen > 0 for listeners. 2709 * We allow rebind with a new qlen value. 2710 */ 2711 tl_addr_unbind(tep); 2712 } 2713 2714 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2715 /* 2716 * send T_OK_ACK 2717 */ 2718 tl_ok_ack(wq, ackmp, T_UNBIND_REQ); 2719 } 2720 2721 2722 /* 2723 * Option management code from drv/ip is used here 2724 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr 2725 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ. 2726 * However, that is what we want as that option is 'unorthodox' 2727 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND 2728 * and not in T_SVR4_OPTMGMT_REQ/ACK 2729 * Note2: use of optcom_req means this routine is an exception to 2730 * recovery from allocb() failures. 2731 */ 2732 2733 static void 2734 tl_optmgmt(queue_t *wq, mblk_t *mp) 2735 { 2736 tl_endpt_t *tep; 2737 mblk_t *ackmp; 2738 union T_primitives *prim; 2739 cred_t *cr; 2740 2741 tep = (tl_endpt_t *)wq->q_ptr; 2742 prim = (union T_primitives *)mp->b_rptr; 2743 2744 /* 2745 * All Solaris components should pass a db_credp 2746 * for this TPI message, hence we ASSERT. 2747 * But in case there is some other M_PROTO that looks 2748 * like a TPI message sent by some other kernel 2749 * component, we check and return an error. 2750 */ 2751 cr = msg_getcred(mp, NULL); 2752 ASSERT(cr != NULL); 2753 if (cr == NULL) { 2754 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type); 2755 return; 2756 } 2757 2758 /* all states OK for AF_UNIX options ? */ 2759 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE && 2760 prim->type == T_SVR4_OPTMGMT_REQ) { 2761 /* 2762 * Broken TLI semantics that options can only be managed 2763 * in TS_IDLE state. Needed for Sparc ABI test suite that 2764 * tests this TLI (mis)feature using this device driver. 2765 */ 2766 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2767 SL_TRACE|SL_ERROR, 2768 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d", 2769 tep->te_state)); 2770 /* 2771 * preallocate memory for T_ERROR_ACK 2772 */ 2773 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2774 if (! ackmp) { 2775 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2776 return; 2777 } 2778 2779 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ); 2780 freemsg(mp); 2781 return; 2782 } 2783 2784 /* 2785 * call common option management routine from drv/ip 2786 */ 2787 if (prim->type == T_SVR4_OPTMGMT_REQ) { 2788 svr4_optcom_req(wq, mp, cr, &tl_opt_obj); 2789 } else { 2790 ASSERT(prim->type == T_OPTMGMT_REQ); 2791 tpi_optcom_req(wq, mp, cr, &tl_opt_obj); 2792 } 2793 } 2794 2795 /* 2796 * Handle T_conn_req - the driver part of accept(). 2797 * If TL_SET[U]CRED generate the credentials options. 2798 * If this is a socket pass through options unmodified. 2799 * For sockets generate the T_CONN_CON here instead of 2800 * waiting for the T_CONN_RES. 2801 */ 2802 static void 2803 tl_conn_req(queue_t *wq, mblk_t *mp) 2804 { 2805 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 2806 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr; 2807 ssize_t msz = MBLKL(mp); 2808 t_scalar_t alen, aoff, olen, ooff, err = 0; 2809 tl_endpt_t *peer_tep = NULL; 2810 mblk_t *ackmp; 2811 mblk_t *dimp; 2812 struct T_discon_ind *di; 2813 soux_addr_t ux_addr; 2814 tl_addr_t dst; 2815 2816 ASSERT(IS_COTS(tep)); 2817 2818 if (tep->te_closing) { 2819 freemsg(mp); 2820 return; 2821 } 2822 2823 /* 2824 * preallocate memory for: 2825 * 1. max of T_ERROR_ACK and T_OK_ACK 2826 * ==> known max T_ERROR_ACK 2827 * 2. max of T_DISCON_IND and T_CONN_IND 2828 */ 2829 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 2830 if (! ackmp) { 2831 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 2832 return; 2833 } 2834 /* 2835 * memory committed for T_OK_ACK/T_ERROR_ACK now 2836 * will be committed for T_DISCON_IND/T_CONN_IND later 2837 */ 2838 2839 if (tep->te_state != TS_IDLE) { 2840 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2841 SL_TRACE|SL_ERROR, 2842 "tl_wput:T_CONN_REQ:out of state, state=%d", 2843 tep->te_state)); 2844 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2845 freemsg(mp); 2846 return; 2847 } 2848 2849 /* 2850 * validate the message 2851 * Note: dereference fields in struct inside message only 2852 * after validating the message length. 2853 */ 2854 if (msz < sizeof (struct T_conn_req)) { 2855 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2856 "tl_conn_req:invalid message length")); 2857 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2858 freemsg(mp); 2859 return; 2860 } 2861 alen = creq->DEST_length; 2862 aoff = creq->DEST_offset; 2863 olen = creq->OPT_length; 2864 ooff = creq->OPT_offset; 2865 if (olen == 0) 2866 ooff = 0; 2867 2868 if (IS_SOCKET(tep)) { 2869 if ((alen != TL_SOUX_ADDRLEN) || 2870 (aoff < 0) || 2871 (aoff + alen > msz) || 2872 (alen > msz - sizeof (struct T_conn_req))) { 2873 (void) (STRLOG(TL_ID, tep->te_minor, 2874 1, SL_TRACE|SL_ERROR, 2875 "tl_conn_req: invalid socket addr")); 2876 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2877 freemsg(mp); 2878 return; 2879 } 2880 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 2881 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 2882 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 2883 (void) (STRLOG(TL_ID, tep->te_minor, 2884 1, SL_TRACE|SL_ERROR, 2885 "tl_conn_req: invalid socket magic")); 2886 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2887 freemsg(mp); 2888 return; 2889 } 2890 } else { 2891 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) || 2892 (olen > 0 && ((ssize_t)(ooff + olen) > msz || 2893 ooff + olen < 0)) || 2894 olen < 0 || ooff < 0) { 2895 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2896 SL_TRACE|SL_ERROR, 2897 "tl_conn_req:invalid message")); 2898 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ); 2899 freemsg(mp); 2900 return; 2901 } 2902 2903 if (alen <= 0 || aoff < 0 || 2904 (ssize_t)alen > msz - sizeof (struct T_conn_req)) { 2905 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2906 SL_TRACE|SL_ERROR, 2907 "tl_conn_req:bad addr in message, " 2908 "alen=%d, msz=%ld", 2909 alen, msz)); 2910 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ); 2911 freemsg(mp); 2912 return; 2913 } 2914 #ifdef DEBUG 2915 /* 2916 * Mild form of ASSERT()ion to detect broken TPI apps. 2917 * if (! assertion) 2918 * log warning; 2919 */ 2920 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) { 2921 (void) (STRLOG(TL_ID, tep->te_minor, 3, 2922 SL_TRACE|SL_ERROR, 2923 "tl_conn_req: addr overlaps TPI message")); 2924 } 2925 #endif 2926 if (olen) { 2927 /* 2928 * no opts in connect req 2929 * supported in this provider except for sockets. 2930 */ 2931 (void) (STRLOG(TL_ID, tep->te_minor, 1, 2932 SL_TRACE|SL_ERROR, 2933 "tl_conn_req:options not supported " 2934 "in message")); 2935 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ); 2936 freemsg(mp); 2937 return; 2938 } 2939 } 2940 2941 /* 2942 * Prevent tep from closing on us. 2943 */ 2944 if (! tl_noclose(tep)) { 2945 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2946 "tl_conn_req:endpoint is closing")); 2947 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ); 2948 freemsg(mp); 2949 return; 2950 } 2951 2952 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state); 2953 /* 2954 * get endpoint to connect to 2955 * check that peer with DEST addr is bound to addr 2956 * and has CONIND_number > 0 2957 */ 2958 dst.ta_alen = alen; 2959 dst.ta_abuf = mp->b_rptr + aoff; 2960 dst.ta_zoneid = tep->te_zoneid; 2961 2962 /* 2963 * Verify if remote addr is in use 2964 */ 2965 peer_tep = (IS_SOCKET(tep) ? 2966 tl_sock_find_peer(tep, &ux_addr) : 2967 tl_find_peer(tep, &dst)); 2968 2969 if (peer_tep == NULL) { 2970 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 2971 "tl_conn_req:no one at connect address")); 2972 err = ECONNREFUSED; 2973 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) { 2974 /* 2975 * validate that number of incoming connection is 2976 * not to capacity on destination endpoint 2977 */ 2978 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 2979 "tl_conn_req: qlen overflow connection refused")); 2980 err = ECONNREFUSED; 2981 } 2982 2983 /* 2984 * Send T_DISCON_IND in case of error 2985 */ 2986 if (err != 0) { 2987 if (peer_tep != NULL) 2988 tl_refrele(peer_tep); 2989 /* We are still expected to send T_OK_ACK */ 2990 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 2991 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ); 2992 tl_closeok(tep); 2993 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind), 2994 M_PROTO, T_DISCON_IND); 2995 if (dimp == NULL) { 2996 tl_merror(wq, NULL, ENOSR); 2997 return; 2998 } 2999 di = (struct T_discon_ind *)dimp->b_rptr; 3000 di->DISCON_reason = err; 3001 di->SEQ_number = BADSEQNUM; 3002 3003 tep->te_state = TS_IDLE; 3004 /* 3005 * send T_DISCON_IND message 3006 */ 3007 putnext(tep->te_rq, dimp); 3008 return; 3009 } 3010 3011 ASSERT(IS_COTS(peer_tep)); 3012 3013 /* 3014 * Found the listener. At this point processing will continue on 3015 * listener serializer. Close of the endpoint should be blocked while we 3016 * switch serializers. 3017 */ 3018 tl_serializer_refhold(peer_tep->te_ser); 3019 tl_serializer_refrele(tep->te_ser); 3020 tep->te_ser = peer_tep->te_ser; 3021 ASSERT(tep->te_oconp == NULL); 3022 tep->te_oconp = peer_tep; 3023 3024 /* 3025 * It is safe to close now. Close may continue on listener serializer. 3026 */ 3027 tl_closeok(tep); 3028 3029 /* 3030 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user 3031 * data, so we link mp to ackmp. 3032 */ 3033 ackmp->b_cont = mp; 3034 mp = ackmp; 3035 3036 tl_refhold(tep); 3037 tl_serializer_enter(tep, tl_conn_req_ser, mp); 3038 } 3039 3040 /* 3041 * Finish T_CONN_REQ processing on listener serializer. 3042 */ 3043 static void 3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep) 3045 { 3046 queue_t *wq; 3047 tl_endpt_t *peer_tep = tep->te_oconp; 3048 mblk_t *confmp, *cimp, *indmp; 3049 void *opts = NULL; 3050 mblk_t *ackmp = mp; 3051 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr; 3052 struct T_conn_ind *ci; 3053 tl_icon_t *tip; 3054 void *addr_startp; 3055 t_scalar_t olen = creq->OPT_length; 3056 t_scalar_t ooff = creq->OPT_offset; 3057 size_t ci_msz; 3058 size_t size; 3059 cred_t *cr = NULL; 3060 pid_t cpid; 3061 3062 if (tep->te_closing) { 3063 TL_UNCONNECT(tep->te_oconp); 3064 tl_serializer_exit(tep); 3065 tl_refrele(tep); 3066 freemsg(mp); 3067 return; 3068 } 3069 3070 wq = tep->te_wq; 3071 tep->te_flag |= TL_EAGER; 3072 3073 /* 3074 * Extract preallocated ackmp from mp. 3075 */ 3076 mp = mp->b_cont; 3077 ackmp->b_cont = NULL; 3078 3079 if (olen == 0) 3080 ooff = 0; 3081 3082 if (peer_tep->te_closing || 3083 !((peer_tep->te_state == TS_IDLE) || 3084 (peer_tep->te_state == TS_WRES_CIND))) { 3085 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR, 3086 "tl_conn_req:peer in bad state (%d)", 3087 peer_tep->te_state)); 3088 TL_UNCONNECT(tep->te_oconp); 3089 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ); 3090 freemsg(ackmp); 3091 tl_serializer_exit(tep); 3092 tl_refrele(tep); 3093 return; 3094 } 3095 3096 /* 3097 * preallocate now for T_DISCON_IND or T_CONN_IND 3098 */ 3099 /* 3100 * calculate length of T_CONN_IND message 3101 */ 3102 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3103 cr = msg_getcred(mp, &cpid); 3104 ASSERT(cr != NULL); 3105 if (peer_tep->te_flag & TL_SETCRED) { 3106 ooff = 0; 3107 olen = (t_scalar_t) sizeof (struct opthdr) + 3108 OPTLEN(sizeof (tl_credopt_t)); 3109 /* 1 option only */ 3110 } else { 3111 ooff = 0; 3112 olen = (t_scalar_t)sizeof (struct opthdr) + 3113 OPTLEN(ucredminsize(cr)); 3114 /* 1 option only */ 3115 } 3116 } 3117 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen; 3118 ci_msz = T_ALIGN(ci_msz) + olen; 3119 size = max(ci_msz, sizeof (struct T_discon_ind)); 3120 3121 /* 3122 * Save options from mp - we'll need them for T_CONN_IND. 3123 */ 3124 if (ooff != 0) { 3125 opts = kmem_alloc(olen, KM_NOSLEEP); 3126 if (opts == NULL) { 3127 /* 3128 * roll back state changes 3129 */ 3130 tep->te_state = TS_IDLE; 3131 tl_memrecover(wq, mp, size); 3132 freemsg(ackmp); 3133 TL_UNCONNECT(tep->te_oconp); 3134 tl_serializer_exit(tep); 3135 tl_refrele(tep); 3136 return; 3137 } 3138 /* Copy options to a temp buffer */ 3139 bcopy(mp->b_rptr + ooff, opts, olen); 3140 } 3141 3142 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 3143 /* 3144 * Generate a T_CONN_CON that has the identical address 3145 * (and options) as the T_CONN_REQ. 3146 * NOTE: assumes that the T_conn_req and T_conn_con structures 3147 * are isomorphic. 3148 */ 3149 confmp = copyb(mp); 3150 if (! confmp) { 3151 /* 3152 * roll back state changes 3153 */ 3154 tep->te_state = TS_IDLE; 3155 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr); 3156 freemsg(ackmp); 3157 if (opts != NULL) 3158 kmem_free(opts, olen); 3159 TL_UNCONNECT(tep->te_oconp); 3160 tl_serializer_exit(tep); 3161 tl_refrele(tep); 3162 return; 3163 } 3164 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type = 3165 T_CONN_CON; 3166 } else { 3167 confmp = NULL; 3168 } 3169 if ((indmp = reallocb(mp, size, 0)) == NULL) { 3170 /* 3171 * roll back state changes 3172 */ 3173 tep->te_state = TS_IDLE; 3174 tl_memrecover(wq, mp, size); 3175 freemsg(ackmp); 3176 if (opts != NULL) 3177 kmem_free(opts, olen); 3178 freemsg(confmp); 3179 TL_UNCONNECT(tep->te_oconp); 3180 tl_serializer_exit(tep); 3181 tl_refrele(tep); 3182 return; 3183 } 3184 3185 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 3186 if (tip == NULL) { 3187 /* 3188 * roll back state changes 3189 */ 3190 tep->te_state = TS_IDLE; 3191 tl_memrecover(wq, indmp, sizeof (*tip)); 3192 freemsg(ackmp); 3193 if (opts != NULL) 3194 kmem_free(opts, olen); 3195 freemsg(confmp); 3196 TL_UNCONNECT(tep->te_oconp); 3197 tl_serializer_exit(tep); 3198 tl_refrele(tep); 3199 return; 3200 } 3201 tip->ti_mp = NULL; 3202 3203 /* 3204 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON 3205 * and tl_icon_t cell. 3206 */ 3207 3208 /* 3209 * ack validity of request and send the peer credential in the ACK. 3210 */ 3211 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state); 3212 3213 if (peer_tep != NULL && peer_tep->te_credp != NULL && 3214 confmp != NULL) { 3215 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid); 3216 } 3217 3218 tl_ok_ack(wq, ackmp, T_CONN_REQ); 3219 3220 /* 3221 * prepare message to send T_CONN_IND 3222 */ 3223 /* 3224 * allocate the message - original data blocks retained 3225 * in the returned mblk 3226 */ 3227 cimp = tl_resizemp(indmp, size); 3228 if (! cimp) { 3229 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3230 "tl_conn_req:con_ind:allocb failure")); 3231 tl_merror(wq, indmp, ENOMEM); 3232 TL_UNCONNECT(tep->te_oconp); 3233 tl_serializer_exit(tep); 3234 tl_refrele(tep); 3235 if (opts != NULL) 3236 kmem_free(opts, olen); 3237 freemsg(confmp); 3238 ASSERT(tip->ti_mp == NULL); 3239 kmem_free(tip, sizeof (*tip)); 3240 return; 3241 } 3242 3243 DB_TYPE(cimp) = M_PROTO; 3244 ci = (struct T_conn_ind *)cimp->b_rptr; 3245 ci->PRIM_type = T_CONN_IND; 3246 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind); 3247 ci->SRC_length = tep->te_alen; 3248 ci->SEQ_number = tep->te_seqno; 3249 3250 addr_startp = cimp->b_rptr + ci->SRC_offset; 3251 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 3252 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3253 3254 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3255 ci->SRC_length); 3256 ci->OPT_length = olen; /* because only 1 option */ 3257 tl_fill_option(cimp->b_rptr + ci->OPT_offset, 3258 cr, cpid, 3259 peer_tep->te_flag, peer_tep->te_credp); 3260 } else if (ooff != 0) { 3261 /* Copy option from T_CONN_REQ */ 3262 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset + 3263 ci->SRC_length); 3264 ci->OPT_length = olen; 3265 ASSERT(opts != NULL); 3266 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen); 3267 } else { 3268 ci->OPT_offset = 0; 3269 ci->OPT_length = 0; 3270 } 3271 if (opts != NULL) 3272 kmem_free(opts, olen); 3273 3274 /* 3275 * register connection request with server peer 3276 * append to list of incoming connections 3277 * increment references for both peer_tep and tep: peer_tep is placed on 3278 * te_oconp and tep is placed on listeners queue. 3279 */ 3280 tip->ti_tep = tep; 3281 tip->ti_seqno = tep->te_seqno; 3282 list_insert_tail(&peer_tep->te_iconp, tip); 3283 peer_tep->te_nicon++; 3284 3285 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state); 3286 /* 3287 * send the T_CONN_IND message 3288 */ 3289 putnext(peer_tep->te_rq, cimp); 3290 3291 /* 3292 * Send a T_CONN_CON message for sockets. 3293 * Disable the queues until we have reached the correct state! 3294 */ 3295 if (confmp != NULL) { 3296 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state); 3297 noenable(wq); 3298 putnext(tep->te_rq, confmp); 3299 } 3300 /* 3301 * Now we need to increment tep reference because tep is referenced by 3302 * server list of pending connections. We also need to decrement 3303 * reference before exiting serializer. Two operations void each other 3304 * so we don't modify reference at all. 3305 */ 3306 ASSERT(tep->te_refcnt >= 2); 3307 ASSERT(peer_tep->te_refcnt >= 2); 3308 tl_serializer_exit(tep); 3309 } 3310 3311 3312 3313 /* 3314 * Handle T_conn_res on listener stream. Called on listener serializer. 3315 * tl_conn_req has already generated the T_CONN_CON. 3316 * tl_conn_res is called on listener serializer. 3317 * No one accesses acceptor at this point, so it is safe to modify acceptor. 3318 * Switch eager serializer to acceptor's. 3319 * 3320 * If TL_SET[U]CRED generate the credentials options. 3321 * For sockets tl_conn_req has already generated the T_CONN_CON. 3322 */ 3323 static void 3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep) 3325 { 3326 queue_t *wq; 3327 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr; 3328 ssize_t msz = MBLKL(mp); 3329 t_scalar_t olen, ooff, err = 0; 3330 t_scalar_t prim = cres->PRIM_type; 3331 uchar_t *addr_startp; 3332 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL; 3333 tl_icon_t *tip; 3334 size_t size; 3335 mblk_t *ackmp, *respmp; 3336 mblk_t *dimp, *ccmp = NULL; 3337 struct T_discon_ind *di; 3338 struct T_conn_con *cc; 3339 boolean_t client_noclose_set = B_FALSE; 3340 boolean_t switch_client_serializer = B_TRUE; 3341 3342 ASSERT(IS_COTS(tep)); 3343 3344 if (tep->te_closing) { 3345 freemsg(mp); 3346 return; 3347 } 3348 3349 wq = tep->te_wq; 3350 3351 /* 3352 * preallocate memory for: 3353 * 1. max of T_ERROR_ACK and T_OK_ACK 3354 * ==> known max T_ERROR_ACK 3355 * 2. max of T_DISCON_IND and T_CONN_CON 3356 */ 3357 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3358 if (! ackmp) { 3359 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3360 return; 3361 } 3362 /* 3363 * memory committed for T_OK_ACK/T_ERROR_ACK now 3364 * will be committed for T_DISCON_IND/T_CONN_CON later 3365 */ 3366 3367 3368 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES); 3369 3370 /* 3371 * validate state 3372 */ 3373 if (tep->te_state != TS_WRES_CIND) { 3374 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3375 SL_TRACE|SL_ERROR, 3376 "tl_wput:T_CONN_RES:out of state, state=%d", 3377 tep->te_state)); 3378 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3379 freemsg(mp); 3380 return; 3381 } 3382 3383 /* 3384 * validate the message 3385 * Note: dereference fields in struct inside message only 3386 * after validating the message length. 3387 */ 3388 if (msz < sizeof (struct T_conn_res)) { 3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3390 "tl_conn_res:invalid message length")); 3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3392 freemsg(mp); 3393 return; 3394 } 3395 olen = cres->OPT_length; 3396 ooff = cres->OPT_offset; 3397 if (((olen > 0) && ((ooff + olen) > msz))) { 3398 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3399 "tl_conn_res:invalid message")); 3400 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim); 3401 freemsg(mp); 3402 return; 3403 } 3404 if (olen) { 3405 /* 3406 * no opts in connect res 3407 * supported in this provider 3408 */ 3409 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3410 "tl_conn_res:options not supported in message")); 3411 tl_error_ack(wq, ackmp, TBADOPT, 0, prim); 3412 freemsg(mp); 3413 return; 3414 } 3415 3416 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state); 3417 ASSERT(tep->te_state == TS_WACK_CRES); 3418 3419 if (cres->SEQ_number < TL_MINOR_START && 3420 cres->SEQ_number >= BADSEQNUM) { 3421 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3422 "tl_conn_res:remote endpoint sequence number bad")); 3423 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3424 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3425 freemsg(mp); 3426 return; 3427 } 3428 3429 /* 3430 * find accepting endpoint. Will have extra reference if found. 3431 */ 3432 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash, 3433 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id, 3434 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) { 3435 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3436 "tl_conn_res:bad accepting endpoint")); 3437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3438 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3439 freemsg(mp); 3440 return; 3441 } 3442 3443 /* 3444 * Prevent acceptor from closing. 3445 */ 3446 if (! tl_noclose(acc_ep)) { 3447 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3448 "tl_conn_res:bad accepting endpoint")); 3449 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3450 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3451 tl_refrele(acc_ep); 3452 freemsg(mp); 3453 return; 3454 } 3455 3456 acc_ep->te_flag |= TL_ACCEPTOR; 3457 3458 /* 3459 * validate that accepting endpoint, if different from listening 3460 * has address bound => state is TS_IDLE 3461 * TROUBLE in XPG4 !!? 3462 */ 3463 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) { 3464 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3465 "tl_conn_res:accepting endpoint has no address bound," 3466 "state=%d", acc_ep->te_state)); 3467 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3468 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim); 3469 freemsg(mp); 3470 tl_closeok(acc_ep); 3471 tl_refrele(acc_ep); 3472 return; 3473 } 3474 3475 /* 3476 * validate if accepting endpt same as listening, then 3477 * no other incoming connection should be on the queue 3478 */ 3479 3480 if ((tep == acc_ep) && (tep->te_nicon > 1)) { 3481 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 3482 "tl_conn_res: > 1 conn_ind on listener-acceptor")); 3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3484 tl_error_ack(wq, ackmp, TBADF, 0, prim); 3485 freemsg(mp); 3486 tl_closeok(acc_ep); 3487 tl_refrele(acc_ep); 3488 return; 3489 } 3490 3491 /* 3492 * Mark for deletion, the entry corresponding to client 3493 * on list of pending connections made by the listener 3494 * search list to see if client is one of the 3495 * recorded as a listener. 3496 */ 3497 tip = tl_icon_find(tep, cres->SEQ_number); 3498 if (tip == NULL) { 3499 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR, 3500 "tl_conn_res:no client in listener list")); 3501 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state); 3502 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim); 3503 freemsg(mp); 3504 tl_closeok(acc_ep); 3505 tl_refrele(acc_ep); 3506 return; 3507 } 3508 3509 /* 3510 * If ti_tep is NULL the client has already closed. In this case 3511 * the code below will avoid any action on the client side 3512 * but complete the server and acceptor state transitions. 3513 */ 3514 ASSERT(tip->ti_tep == NULL || 3515 tip->ti_tep->te_seqno == cres->SEQ_number); 3516 cl_ep = tip->ti_tep; 3517 3518 /* 3519 * If the client is present it is switched from listener's to acceptor's 3520 * serializer. We should block client closes while serializers are 3521 * being switched. 3522 * 3523 * It is possible that the client is present but is currently being 3524 * closed. There are two possible cases: 3525 * 3526 * 1) The client has already entered tl_close_finish_ser() and sent 3527 * T_ORDREL_IND. In this case we can just ignore the client (but we 3528 * still need to send all messages from tip->ti_mp to the acceptor). 3529 * 3530 * 2) The client started the close but has not entered 3531 * tl_close_finish_ser() yet. In this case, the client is already 3532 * proceeding asynchronously on the listener's serializer, so we're 3533 * forced to change the acceptor to use the listener's serializer to 3534 * ensure that any operations on the acceptor are serialized with 3535 * respect to the close that's in-progress. 3536 */ 3537 if (cl_ep != NULL) { 3538 if (tl_noclose(cl_ep)) { 3539 client_noclose_set = B_TRUE; 3540 } else { 3541 /* 3542 * Client is closing. If it it has sent the 3543 * T_ORDREL_IND, we can simply ignore it - otherwise, 3544 * we have to let let the client continue until it is 3545 * sent. 3546 * 3547 * If we do continue using the client, acceptor will 3548 * switch to client's serializer which is used by client 3549 * for its close. 3550 */ 3551 tl_client_closing_when_accepting++; 3552 switch_client_serializer = B_FALSE; 3553 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect || 3554 cl_ep->te_state == -1) 3555 cl_ep = NULL; 3556 } 3557 } 3558 3559 if (cl_ep != NULL) { 3560 /* 3561 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER 3562 * (latter for sockets only) 3563 */ 3564 if (cl_ep->te_state != TS_WCON_CREQ && 3565 (cl_ep->te_state != TS_DATA_XFER && 3566 IS_SOCKET(cl_ep))) { 3567 err = ECONNREFUSED; 3568 /* 3569 * T_DISCON_IND sent later after committing memory 3570 * and acking validity of request 3571 */ 3572 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE, 3573 "tl_conn_res:peer in bad state")); 3574 } 3575 3576 /* 3577 * preallocate now for T_DISCON_IND or T_CONN_CONN 3578 * ack validity of request (T_OK_ACK) after memory committed 3579 */ 3580 3581 if (err) 3582 size = sizeof (struct T_discon_ind); 3583 else { 3584 /* 3585 * calculate length of T_CONN_CON message 3586 */ 3587 olen = 0; 3588 if (cl_ep->te_flag & TL_SETCRED) { 3589 olen = (t_scalar_t)sizeof (struct opthdr) + 3590 OPTLEN(sizeof (tl_credopt_t)); 3591 } else if (cl_ep->te_flag & TL_SETUCRED) { 3592 olen = (t_scalar_t)sizeof (struct opthdr) + 3593 OPTLEN(ucredminsize(acc_ep->te_credp)); 3594 } 3595 size = T_ALIGN(sizeof (struct T_conn_con) + 3596 acc_ep->te_alen) + olen; 3597 } 3598 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3599 /* 3600 * roll back state changes 3601 */ 3602 tep->te_state = TS_WRES_CIND; 3603 tl_memrecover(wq, mp, size); 3604 freemsg(ackmp); 3605 if (client_noclose_set) 3606 tl_closeok(cl_ep); 3607 tl_closeok(acc_ep); 3608 tl_refrele(acc_ep); 3609 return; 3610 } 3611 mp = NULL; 3612 } 3613 3614 /* 3615 * Now ack validity of request 3616 */ 3617 if (tep->te_nicon == 1) { 3618 if (tep == acc_ep) 3619 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state); 3620 else 3621 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state); 3622 } else 3623 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state); 3624 3625 /* 3626 * send T_DISCON_IND now if client state validation failed earlier 3627 */ 3628 if (err) { 3629 tl_ok_ack(wq, ackmp, prim); 3630 /* 3631 * flush the queues - why always ? 3632 */ 3633 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR); 3634 3635 dimp = tl_resizemp(respmp, size); 3636 if (! dimp) { 3637 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3638 SL_TRACE|SL_ERROR, 3639 "tl_conn_res:con_ind:allocb failure")); 3640 tl_merror(wq, respmp, ENOMEM); 3641 tl_closeok(acc_ep); 3642 if (client_noclose_set) 3643 tl_closeok(cl_ep); 3644 tl_refrele(acc_ep); 3645 return; 3646 } 3647 if (dimp->b_cont) { 3648 /* no user data in provider generated discon ind */ 3649 freemsg(dimp->b_cont); 3650 dimp->b_cont = NULL; 3651 } 3652 3653 DB_TYPE(dimp) = M_PROTO; 3654 di = (struct T_discon_ind *)dimp->b_rptr; 3655 di->PRIM_type = T_DISCON_IND; 3656 di->DISCON_reason = err; 3657 di->SEQ_number = BADSEQNUM; 3658 3659 tep->te_state = TS_IDLE; 3660 /* 3661 * send T_DISCON_IND message 3662 */ 3663 putnext(acc_ep->te_rq, dimp); 3664 if (client_noclose_set) 3665 tl_closeok(cl_ep); 3666 tl_closeok(acc_ep); 3667 tl_refrele(acc_ep); 3668 return; 3669 } 3670 3671 /* 3672 * now start connecting the accepting endpoint 3673 */ 3674 if (tep != acc_ep) 3675 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state); 3676 3677 if (cl_ep == NULL) { 3678 /* 3679 * The client has already closed. Send up any queued messages 3680 * and change the state accordingly. 3681 */ 3682 tl_ok_ack(wq, ackmp, prim); 3683 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3684 3685 /* 3686 * remove endpoint from incoming connection 3687 * delete client from list of incoming connections 3688 */ 3689 tl_freetip(tep, tip); 3690 freemsg(mp); 3691 tl_closeok(acc_ep); 3692 tl_refrele(acc_ep); 3693 return; 3694 } else if (tip->ti_mp != NULL) { 3695 /* 3696 * The client could have queued a T_DISCON_IND which needs 3697 * to be sent up. 3698 * Note that t_discon_req can not operate the same as 3699 * t_data_req since it is not possible for it to putbq 3700 * the message and return -1 due to the use of qwriter. 3701 */ 3702 tl_icon_sendmsgs(acc_ep, &tip->ti_mp); 3703 } 3704 3705 /* 3706 * prepare connect confirm T_CONN_CON message 3707 */ 3708 3709 /* 3710 * allocate the message - original data blocks 3711 * retained in the returned mblk 3712 */ 3713 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) { 3714 ccmp = tl_resizemp(respmp, size); 3715 if (ccmp == NULL) { 3716 tl_ok_ack(wq, ackmp, prim); 3717 (void) (STRLOG(TL_ID, tep->te_minor, 3, 3718 SL_TRACE|SL_ERROR, 3719 "tl_conn_res:conn_con:allocb failure")); 3720 tl_merror(wq, respmp, ENOMEM); 3721 tl_closeok(acc_ep); 3722 if (client_noclose_set) 3723 tl_closeok(cl_ep); 3724 tl_refrele(acc_ep); 3725 return; 3726 } 3727 3728 DB_TYPE(ccmp) = M_PROTO; 3729 cc = (struct T_conn_con *)ccmp->b_rptr; 3730 cc->PRIM_type = T_CONN_CON; 3731 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con); 3732 cc->RES_length = acc_ep->te_alen; 3733 addr_startp = ccmp->b_rptr + cc->RES_offset; 3734 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen); 3735 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) { 3736 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset + 3737 cc->RES_length); 3738 cc->OPT_length = olen; 3739 tl_fill_option(ccmp->b_rptr + cc->OPT_offset, 3740 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag, 3741 cl_ep->te_credp); 3742 } else { 3743 cc->OPT_offset = 0; 3744 cc->OPT_length = 0; 3745 } 3746 /* 3747 * Forward the credential in the packet so it can be picked up 3748 * at the higher layers for more complete credential processing 3749 */ 3750 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid); 3751 } else { 3752 freemsg(respmp); 3753 respmp = NULL; 3754 } 3755 3756 /* 3757 * make connection linking 3758 * accepting and client endpoints 3759 * No need to increment references: 3760 * on client: it should already have one from tip->ti_tep linkage. 3761 * on acceptor is should already have one from the table lookup. 3762 * 3763 * At this point both client and acceptor can't close. Set client 3764 * serializer to acceptor's. 3765 */ 3766 ASSERT(cl_ep->te_refcnt >= 2); 3767 ASSERT(acc_ep->te_refcnt >= 2); 3768 ASSERT(cl_ep->te_conp == NULL); 3769 ASSERT(acc_ep->te_conp == NULL); 3770 cl_ep->te_conp = acc_ep; 3771 acc_ep->te_conp = cl_ep; 3772 ASSERT(cl_ep->te_ser == tep->te_ser); 3773 if (switch_client_serializer) { 3774 mutex_enter(&cl_ep->te_ser_lock); 3775 if (cl_ep->te_ser_count > 0) { 3776 switch_client_serializer = B_FALSE; 3777 tl_serializer_noswitch++; 3778 } else { 3779 /* 3780 * Move client to the acceptor's serializer. 3781 */ 3782 tl_serializer_refhold(acc_ep->te_ser); 3783 tl_serializer_refrele(cl_ep->te_ser); 3784 cl_ep->te_ser = acc_ep->te_ser; 3785 } 3786 mutex_exit(&cl_ep->te_ser_lock); 3787 } 3788 if (!switch_client_serializer) { 3789 /* 3790 * It is not possible to switch client to use acceptor's. 3791 * Move acceptor to client's serializer (which is the same as 3792 * listener's). 3793 */ 3794 tl_serializer_refhold(cl_ep->te_ser); 3795 tl_serializer_refrele(acc_ep->te_ser); 3796 acc_ep->te_ser = cl_ep->te_ser; 3797 } 3798 3799 TL_REMOVE_PEER(cl_ep->te_oconp); 3800 TL_REMOVE_PEER(acc_ep->te_oconp); 3801 3802 /* 3803 * remove endpoint from incoming connection 3804 * delete client from list of incoming connections 3805 */ 3806 tip->ti_tep = NULL; 3807 tl_freetip(tep, tip); 3808 tl_ok_ack(wq, ackmp, prim); 3809 3810 /* 3811 * data blocks already linked in reallocb() 3812 */ 3813 3814 /* 3815 * link queues so that I_SENDFD will work 3816 */ 3817 if (! IS_SOCKET(tep)) { 3818 acc_ep->te_wq->q_next = cl_ep->te_rq; 3819 cl_ep->te_wq->q_next = acc_ep->te_rq; 3820 } 3821 3822 /* 3823 * send T_CONN_CON up on client side unless it was already 3824 * done (for a socket). In cases any data or ordrel req has been 3825 * queued make sure that the service procedure runs. 3826 */ 3827 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) { 3828 enableok(cl_ep->te_wq); 3829 TL_QENABLE(cl_ep); 3830 if (ccmp != NULL) 3831 freemsg(ccmp); 3832 } else { 3833 /* 3834 * change client state on TE_CONN_CON event 3835 */ 3836 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state); 3837 putnext(cl_ep->te_rq, ccmp); 3838 } 3839 3840 /* Mark the both endpoints as accepted */ 3841 cl_ep->te_flag |= TL_ACCEPTED; 3842 acc_ep->te_flag |= TL_ACCEPTED; 3843 3844 /* 3845 * Allow client and acceptor to close. 3846 */ 3847 tl_closeok(acc_ep); 3848 if (client_noclose_set) 3849 tl_closeok(cl_ep); 3850 } 3851 3852 3853 3854 3855 static void 3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep) 3857 { 3858 queue_t *wq; 3859 struct T_discon_req *dr; 3860 ssize_t msz; 3861 tl_endpt_t *peer_tep = tep->te_conp; 3862 tl_endpt_t *srv_tep = tep->te_oconp; 3863 tl_icon_t *tip; 3864 size_t size; 3865 mblk_t *ackmp, *dimp, *respmp; 3866 struct T_discon_ind *di; 3867 t_scalar_t save_state, new_state; 3868 3869 if (tep->te_closing) { 3870 freemsg(mp); 3871 return; 3872 } 3873 3874 if ((peer_tep != NULL) && peer_tep->te_closing) { 3875 TL_UNCONNECT(tep->te_conp); 3876 peer_tep = NULL; 3877 } 3878 if ((srv_tep != NULL) && srv_tep->te_closing) { 3879 TL_UNCONNECT(tep->te_oconp); 3880 srv_tep = NULL; 3881 } 3882 3883 wq = tep->te_wq; 3884 3885 /* 3886 * preallocate memory for: 3887 * 1. max of T_ERROR_ACK and T_OK_ACK 3888 * ==> known max T_ERROR_ACK 3889 * 2. for T_DISCON_IND 3890 */ 3891 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED); 3892 if (! ackmp) { 3893 tl_memrecover(wq, mp, sizeof (struct T_error_ack)); 3894 return; 3895 } 3896 /* 3897 * memory committed for T_OK_ACK/T_ERROR_ACK now 3898 * will be committed for T_DISCON_IND later 3899 */ 3900 3901 dr = (struct T_discon_req *)mp->b_rptr; 3902 msz = MBLKL(mp); 3903 3904 /* 3905 * validate the state 3906 */ 3907 save_state = new_state = tep->te_state; 3908 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) && 3909 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) { 3910 (void) (STRLOG(TL_ID, tep->te_minor, 1, 3911 SL_TRACE|SL_ERROR, 3912 "tl_wput:T_DISCON_REQ:out of state, state=%d", 3913 tep->te_state)); 3914 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ); 3915 freemsg(mp); 3916 return; 3917 } 3918 /* 3919 * Defer committing the state change until it is determined if 3920 * the message will be queued with the tl_icon or not. 3921 */ 3922 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state); 3923 3924 /* validate the message */ 3925 if (msz < sizeof (struct T_discon_req)) { 3926 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 3927 "tl_discon_req:invalid message")); 3928 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3929 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ); 3930 freemsg(mp); 3931 return; 3932 } 3933 3934 /* 3935 * if server, then validate that client exists 3936 * by connection sequence number etc. 3937 */ 3938 if (tep->te_nicon > 0) { /* server */ 3939 3940 /* 3941 * search server list for disconnect client 3942 */ 3943 tip = tl_icon_find(tep, dr->SEQ_number); 3944 if (tip == NULL) { 3945 (void) (STRLOG(TL_ID, tep->te_minor, 2, 3946 SL_TRACE|SL_ERROR, 3947 "tl_discon_req:no disconnect endpoint")); 3948 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state); 3949 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ); 3950 freemsg(mp); 3951 return; 3952 } 3953 /* 3954 * If ti_tep is NULL the client has already closed. In this case 3955 * the code below will avoid any action on the client side. 3956 */ 3957 3958 IMPLY(tip->ti_tep != NULL, 3959 tip->ti_tep->te_seqno == dr->SEQ_number); 3960 peer_tep = tip->ti_tep; 3961 } 3962 3963 /* 3964 * preallocate now for T_DISCON_IND 3965 * ack validity of request (T_OK_ACK) after memory committed 3966 */ 3967 size = sizeof (struct T_discon_ind); 3968 if ((respmp = reallocb(mp, size, 0)) == NULL) { 3969 tl_memrecover(wq, mp, size); 3970 freemsg(ackmp); 3971 return; 3972 } 3973 3974 /* 3975 * prepare message to ack validity of request 3976 */ 3977 if (tep->te_nicon == 0) 3978 new_state = NEXTSTATE(TE_OK_ACK1, new_state); 3979 else 3980 if (tep->te_nicon == 1) 3981 new_state = NEXTSTATE(TE_OK_ACK2, new_state); 3982 else 3983 new_state = NEXTSTATE(TE_OK_ACK4, new_state); 3984 3985 /* 3986 * Flushing queues according to TPI. Using the old state. 3987 */ 3988 if ((tep->te_nicon <= 1) && 3989 ((save_state == TS_DATA_XFER) || 3990 (save_state == TS_WIND_ORDREL) || 3991 (save_state == TS_WREQ_ORDREL))) 3992 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW); 3993 3994 /* send T_OK_ACK up */ 3995 tl_ok_ack(wq, ackmp, T_DISCON_REQ); 3996 3997 /* 3998 * now do disconnect business 3999 */ 4000 if (tep->te_nicon > 0) { /* listener */ 4001 if (peer_tep != NULL && !peer_tep->te_closing) { 4002 /* 4003 * disconnect incoming connect request pending to tep 4004 */ 4005 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4006 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4007 SL_TRACE|SL_ERROR, 4008 "tl_discon_req: reallocb failed")); 4009 tep->te_state = new_state; 4010 tl_merror(wq, respmp, ENOMEM); 4011 return; 4012 } 4013 di = (struct T_discon_ind *)dimp->b_rptr; 4014 di->SEQ_number = BADSEQNUM; 4015 save_state = peer_tep->te_state; 4016 peer_tep->te_state = TS_IDLE; 4017 4018 TL_REMOVE_PEER(peer_tep->te_oconp); 4019 enableok(peer_tep->te_wq); 4020 TL_QENABLE(peer_tep); 4021 } else { 4022 freemsg(respmp); 4023 dimp = NULL; 4024 } 4025 4026 /* 4027 * remove endpoint from incoming connection list 4028 * - remove disconnect client from list on server 4029 */ 4030 tl_freetip(tep, tip); 4031 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */ 4032 /* 4033 * disconnect an outgoing request pending from tep 4034 */ 4035 4036 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4037 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4038 SL_TRACE|SL_ERROR, 4039 "tl_discon_req: reallocb failed")); 4040 tep->te_state = new_state; 4041 tl_merror(wq, respmp, ENOMEM); 4042 return; 4043 } 4044 di = (struct T_discon_ind *)dimp->b_rptr; 4045 DB_TYPE(dimp) = M_PROTO; 4046 di->PRIM_type = T_DISCON_IND; 4047 di->DISCON_reason = ECONNRESET; 4048 di->SEQ_number = tep->te_seqno; 4049 4050 /* 4051 * If this is a socket the T_DISCON_IND is queued with 4052 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 4053 * from the list of pending connections. 4054 * Note that when te_oconp is set the peer better have 4055 * a t_connind_t for the client. 4056 */ 4057 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 4058 /* 4059 * No need to check that 4060 * ti_tep == NULL since the T_DISCON_IND 4061 * takes precedence over other queued 4062 * messages. 4063 */ 4064 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp); 4065 peer_tep = NULL; 4066 dimp = NULL; 4067 /* 4068 * Can't clear te_oconp since tl_co_unconnect needs 4069 * it as a hint not to free the tep. 4070 * Keep the state unchanged since tl_conn_res inspects 4071 * it. 4072 */ 4073 new_state = tep->te_state; 4074 } else { 4075 /* Found - delete it */ 4076 tip = tl_icon_find(peer_tep, tep->te_seqno); 4077 if (tip != NULL) { 4078 ASSERT(tep == tip->ti_tep); 4079 save_state = peer_tep->te_state; 4080 if (peer_tep->te_nicon == 1) 4081 peer_tep->te_state = 4082 NEXTSTATE(TE_DISCON_IND2, 4083 peer_tep->te_state); 4084 else 4085 peer_tep->te_state = 4086 NEXTSTATE(TE_DISCON_IND3, 4087 peer_tep->te_state); 4088 tl_freetip(peer_tep, tip); 4089 } 4090 ASSERT(tep->te_oconp != NULL); 4091 TL_UNCONNECT(tep->te_oconp); 4092 } 4093 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */ 4094 if ((dimp = tl_resizemp(respmp, size)) == NULL) { 4095 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4096 SL_TRACE|SL_ERROR, 4097 "tl_discon_req: reallocb failed")); 4098 tep->te_state = new_state; 4099 tl_merror(wq, respmp, ENOMEM); 4100 return; 4101 } 4102 di = (struct T_discon_ind *)dimp->b_rptr; 4103 di->SEQ_number = BADSEQNUM; 4104 4105 save_state = peer_tep->te_state; 4106 peer_tep->te_state = TS_IDLE; 4107 } else { 4108 /* Not connected */ 4109 tep->te_state = new_state; 4110 freemsg(respmp); 4111 return; 4112 } 4113 4114 /* Commit state changes */ 4115 tep->te_state = new_state; 4116 4117 if (peer_tep == NULL) { 4118 ASSERT(dimp == NULL); 4119 goto done; 4120 } 4121 /* 4122 * Flush queues on peer before sending up 4123 * T_DISCON_IND according to TPI 4124 */ 4125 4126 if ((save_state == TS_DATA_XFER) || 4127 (save_state == TS_WIND_ORDREL) || 4128 (save_state == TS_WREQ_ORDREL)) 4129 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW); 4130 4131 DB_TYPE(dimp) = M_PROTO; 4132 di->PRIM_type = T_DISCON_IND; 4133 di->DISCON_reason = ECONNRESET; 4134 4135 /* 4136 * data blocks already linked into dimp by reallocb() 4137 */ 4138 /* 4139 * send indication message to peer user module 4140 */ 4141 ASSERT(dimp != NULL); 4142 putnext(peer_tep->te_rq, dimp); 4143 done: 4144 if (tep->te_conp) { /* disconnect pointers if connected */ 4145 ASSERT(! peer_tep->te_closing); 4146 4147 /* 4148 * Messages may be queued on peer's write queue 4149 * waiting to be processed by its write service 4150 * procedure. Before the pointer to the peer transport 4151 * structure is set to NULL, qenable the peer's write 4152 * queue so that the queued up messages are processed. 4153 */ 4154 if ((save_state == TS_DATA_XFER) || 4155 (save_state == TS_WIND_ORDREL) || 4156 (save_state == TS_WREQ_ORDREL)) 4157 TL_QENABLE(peer_tep); 4158 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL); 4159 TL_UNCONNECT(peer_tep->te_conp); 4160 if (! IS_SOCKET(tep)) { 4161 /* 4162 * unlink the streams 4163 */ 4164 tep->te_wq->q_next = NULL; 4165 peer_tep->te_wq->q_next = NULL; 4166 } 4167 TL_UNCONNECT(tep->te_conp); 4168 } 4169 } 4170 4171 static void 4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep) 4173 { 4174 if (!tep->te_closing) 4175 tl_addr_req(mp, tep); 4176 else 4177 freemsg(mp); 4178 4179 tl_serializer_exit(tep); 4180 tl_refrele(tep); 4181 } 4182 4183 static void 4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep) 4185 { 4186 queue_t *wq; 4187 size_t ack_sz; 4188 mblk_t *ackmp; 4189 struct T_addr_ack *taa; 4190 4191 if (tep->te_closing) { 4192 freemsg(mp); 4193 return; 4194 } 4195 4196 wq = tep->te_wq; 4197 4198 /* 4199 * Note: T_ADDR_REQ message has only PRIM_type field 4200 * so it is already validated earlier. 4201 */ 4202 4203 if (IS_CLTS(tep) || 4204 (tep->te_state > TS_WREQ_ORDREL) || 4205 (tep->te_state < TS_DATA_XFER)) { 4206 /* 4207 * Either connectionless or connection oriented but not 4208 * in connected data transfer state or half-closed states. 4209 */ 4210 ack_sz = sizeof (struct T_addr_ack); 4211 if (tep->te_state >= TS_IDLE) 4212 /* is bound */ 4213 ack_sz += tep->te_alen; 4214 ackmp = reallocb(mp, ack_sz, 0); 4215 if (ackmp == NULL) { 4216 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4217 SL_TRACE|SL_ERROR, 4218 "tl_addr_req: reallocb failed")); 4219 tl_memrecover(wq, mp, ack_sz); 4220 return; 4221 } 4222 4223 taa = (struct T_addr_ack *)ackmp->b_rptr; 4224 4225 bzero(taa, sizeof (struct T_addr_ack)); 4226 4227 taa->PRIM_type = T_ADDR_ACK; 4228 ackmp->b_datap->db_type = M_PCPROTO; 4229 ackmp->b_wptr = (uchar_t *)&taa[1]; 4230 4231 if (tep->te_state >= TS_IDLE) { 4232 /* endpoint is bound */ 4233 taa->LOCADDR_length = tep->te_alen; 4234 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4235 4236 bcopy(tep->te_abuf, ackmp->b_wptr, 4237 tep->te_alen); 4238 ackmp->b_wptr += tep->te_alen; 4239 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4240 } 4241 4242 (void) qreply(wq, ackmp); 4243 } else { 4244 ASSERT(tep->te_state == TS_DATA_XFER || 4245 tep->te_state == TS_WIND_ORDREL || 4246 tep->te_state == TS_WREQ_ORDREL); 4247 /* connection oriented in data transfer */ 4248 tl_connected_cots_addr_req(mp, tep); 4249 } 4250 } 4251 4252 4253 static void 4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep) 4255 { 4256 tl_endpt_t *peer_tep = tep->te_conp; 4257 size_t ack_sz; 4258 mblk_t *ackmp; 4259 struct T_addr_ack *taa; 4260 uchar_t *addr_startp; 4261 4262 if (tep->te_closing) { 4263 freemsg(mp); 4264 return; 4265 } 4266 4267 if (peer_tep == NULL || peer_tep->te_closing) { 4268 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ); 4269 return; 4270 } 4271 4272 ASSERT(tep->te_state >= TS_IDLE); 4273 4274 ack_sz = sizeof (struct T_addr_ack); 4275 ack_sz += T_ALIGN(tep->te_alen); 4276 ack_sz += peer_tep->te_alen; 4277 4278 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK); 4279 if (ackmp == NULL) { 4280 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4281 "tl_connected_cots_addr_req: reallocb failed")); 4282 tl_memrecover(tep->te_wq, mp, ack_sz); 4283 return; 4284 } 4285 4286 taa = (struct T_addr_ack *)ackmp->b_rptr; 4287 4288 /* endpoint is bound */ 4289 taa->LOCADDR_length = tep->te_alen; 4290 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa); 4291 4292 addr_startp = (uchar_t *)&taa[1]; 4293 4294 bcopy(tep->te_abuf, addr_startp, 4295 tep->te_alen); 4296 4297 taa->REMADDR_length = peer_tep->te_alen; 4298 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset + 4299 taa->LOCADDR_length); 4300 addr_startp = ackmp->b_rptr + taa->REMADDR_offset; 4301 bcopy(peer_tep->te_abuf, addr_startp, 4302 peer_tep->te_alen); 4303 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr + 4304 taa->REMADDR_offset + peer_tep->te_alen; 4305 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 4306 4307 putnext(tep->te_rq, ackmp); 4308 } 4309 4310 static void 4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep) 4312 { 4313 if (IS_CLTS(tep)) { 4314 *ia = tl_clts_info_ack; 4315 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */ 4316 } else { 4317 *ia = tl_cots_info_ack; 4318 if (IS_COTSORD(tep)) 4319 ia->SERV_type = T_COTS_ORD; 4320 } 4321 ia->TIDU_size = tl_tidusz; 4322 ia->CURRENT_state = tep->te_state; 4323 } 4324 4325 /* 4326 * This routine responds to T_CAPABILITY_REQ messages. It is called by 4327 * tl_wput. 4328 */ 4329 static void 4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep) 4331 { 4332 mblk_t *ackmp; 4333 t_uscalar_t cap_bits1; 4334 struct T_capability_ack *tcap; 4335 4336 if (tep->te_closing) { 4337 freemsg(mp); 4338 return; 4339 } 4340 4341 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 4342 4343 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 4344 M_PCPROTO, T_CAPABILITY_ACK); 4345 if (ackmp == NULL) { 4346 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4347 "tl_capability_req: reallocb failed")); 4348 tl_memrecover(tep->te_wq, mp, 4349 sizeof (struct T_capability_ack)); 4350 return; 4351 } 4352 4353 tcap = (struct T_capability_ack *)ackmp->b_rptr; 4354 tcap->CAP_bits1 = 0; 4355 4356 if (cap_bits1 & TC1_INFO) { 4357 tl_copy_info(&tcap->INFO_ack, tep); 4358 tcap->CAP_bits1 |= TC1_INFO; 4359 } 4360 4361 if (cap_bits1 & TC1_ACCEPTOR_ID) { 4362 tcap->ACCEPTOR_id = tep->te_acceptor_id; 4363 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID; 4364 } 4365 4366 putnext(tep->te_rq, ackmp); 4367 } 4368 4369 static void 4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep) 4371 { 4372 if (! tep->te_closing) 4373 tl_info_req(mp, tep); 4374 else 4375 freemsg(mp); 4376 4377 tl_serializer_exit(tep); 4378 tl_refrele(tep); 4379 } 4380 4381 static void 4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep) 4383 { 4384 mblk_t *ackmp; 4385 4386 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), 4387 M_PCPROTO, T_INFO_ACK); 4388 if (ackmp == NULL) { 4389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4390 "tl_info_req: reallocb failed")); 4391 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack)); 4392 return; 4393 } 4394 4395 /* 4396 * fill in T_INFO_ACK contents 4397 */ 4398 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep); 4399 4400 /* 4401 * send ack message 4402 */ 4403 putnext(tep->te_rq, ackmp); 4404 } 4405 4406 /* 4407 * Handle M_DATA, T_data_req and T_optdata_req. 4408 * If this is a socket pass through T_optdata_req options unmodified. 4409 */ 4410 static void 4411 tl_data(mblk_t *mp, tl_endpt_t *tep) 4412 { 4413 queue_t *wq = tep->te_wq; 4414 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4415 ssize_t msz = MBLKL(mp); 4416 tl_endpt_t *peer_tep; 4417 queue_t *peer_rq; 4418 boolean_t closing = tep->te_closing; 4419 4420 if (IS_CLTS(tep)) { 4421 (void) (STRLOG(TL_ID, tep->te_minor, 2, 4422 SL_TRACE|SL_ERROR, 4423 "tl_wput:clts:unattached M_DATA")); 4424 if (!closing) { 4425 tl_merror(wq, mp, EPROTO); 4426 } else { 4427 freemsg(mp); 4428 } 4429 return; 4430 } 4431 4432 /* 4433 * If the endpoint is closing it should still forward any data to the 4434 * peer (if it has one). If it is not allowed to forward it can just 4435 * free the message. 4436 */ 4437 if (closing && 4438 (tep->te_state != TS_DATA_XFER) && 4439 (tep->te_state != TS_WREQ_ORDREL)) { 4440 freemsg(mp); 4441 return; 4442 } 4443 4444 if (DB_TYPE(mp) == M_PROTO) { 4445 if (prim->type == T_DATA_REQ && 4446 msz < sizeof (struct T_data_req)) { 4447 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4448 SL_TRACE|SL_ERROR, 4449 "tl_data:T_DATA_REQ:invalid message")); 4450 if (!closing) { 4451 tl_merror(wq, mp, EPROTO); 4452 } else { 4453 freemsg(mp); 4454 } 4455 return; 4456 } else if (prim->type == T_OPTDATA_REQ && 4457 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) { 4458 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4459 SL_TRACE|SL_ERROR, 4460 "tl_data:T_OPTDATA_REQ:invalid message")); 4461 if (!closing) { 4462 tl_merror(wq, mp, EPROTO); 4463 } else { 4464 freemsg(mp); 4465 } 4466 return; 4467 } 4468 } 4469 4470 /* 4471 * connection oriented provider 4472 */ 4473 switch (tep->te_state) { 4474 case TS_IDLE: 4475 /* 4476 * Other end not here - do nothing. 4477 */ 4478 freemsg(mp); 4479 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4480 "tl_data:cots with endpoint idle")); 4481 return; 4482 4483 case TS_DATA_XFER: 4484 /* valid states */ 4485 if (tep->te_conp != NULL) 4486 break; 4487 4488 if (tep->te_oconp == NULL) { 4489 if (!closing) { 4490 tl_merror(wq, mp, EPROTO); 4491 } else { 4492 freemsg(mp); 4493 } 4494 return; 4495 } 4496 /* 4497 * For a socket the T_CONN_CON is sent early thus 4498 * the peer might not yet have accepted the connection. 4499 * If we are closing queue the packet with the T_CONN_IND. 4500 * Otherwise defer processing the packet until the peer 4501 * accepts the connection. 4502 * Note that the queue is noenabled when we go into this 4503 * state. 4504 */ 4505 if (!closing) { 4506 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4507 SL_TRACE|SL_ERROR, 4508 "tl_data: ocon")); 4509 TL_PUTBQ(tep, mp); 4510 return; 4511 } 4512 if (DB_TYPE(mp) == M_PROTO) { 4513 if (msz < sizeof (t_scalar_t)) { 4514 freemsg(mp); 4515 return; 4516 } 4517 /* reuse message block - just change REQ to IND */ 4518 if (prim->type == T_DATA_REQ) 4519 prim->type = T_DATA_IND; 4520 else 4521 prim->type = T_OPTDATA_IND; 4522 } 4523 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4524 return; 4525 4526 case TS_WREQ_ORDREL: 4527 if (tep->te_conp == NULL) { 4528 /* 4529 * Other end closed - generate discon_ind 4530 * with reason 0 to cause an EPIPE but no 4531 * read side error on AF_UNIX sockets. 4532 */ 4533 freemsg(mp); 4534 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4535 SL_TRACE|SL_ERROR, 4536 "tl_data: WREQ_ORDREL and no peer")); 4537 tl_discon_ind(tep, 0); 4538 return; 4539 } 4540 break; 4541 4542 default: 4543 /* invalid state for event TE_DATA_REQ */ 4544 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4545 "tl_data:cots:out of state")); 4546 tl_merror(wq, mp, EPROTO); 4547 return; 4548 } 4549 /* 4550 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state); 4551 * (State stays same on this event) 4552 */ 4553 4554 /* 4555 * get connected endpoint 4556 */ 4557 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4558 freemsg(mp); 4559 /* Peer closed */ 4560 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4561 "tl_data: peer gone")); 4562 return; 4563 } 4564 4565 ASSERT(tep->te_serializer == peer_tep->te_serializer); 4566 peer_rq = peer_tep->te_rq; 4567 4568 /* 4569 * Put it back if flow controlled 4570 * Note: Messages already on queue when we are closing is bounded 4571 * so we can ignore flow control. 4572 */ 4573 if (!canputnext(peer_rq) && !closing) { 4574 TL_PUTBQ(tep, mp); 4575 return; 4576 } 4577 4578 /* 4579 * validate peer state 4580 */ 4581 switch (peer_tep->te_state) { 4582 case TS_DATA_XFER: 4583 case TS_WIND_ORDREL: 4584 /* valid states */ 4585 break; 4586 default: 4587 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4588 "tl_data:rx side:invalid state")); 4589 tl_merror(peer_tep->te_wq, mp, EPROTO); 4590 return; 4591 } 4592 if (DB_TYPE(mp) == M_PROTO) { 4593 /* reuse message block - just change REQ to IND */ 4594 if (prim->type == T_DATA_REQ) 4595 prim->type = T_DATA_IND; 4596 else 4597 prim->type = T_OPTDATA_IND; 4598 } 4599 /* 4600 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4601 * (peer state stays same on this event) 4602 */ 4603 /* 4604 * send data to connected peer 4605 */ 4606 putnext(peer_rq, mp); 4607 } 4608 4609 4610 4611 static void 4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep) 4613 { 4614 queue_t *wq = tep->te_wq; 4615 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4616 ssize_t msz = MBLKL(mp); 4617 tl_endpt_t *peer_tep; 4618 queue_t *peer_rq; 4619 boolean_t closing = tep->te_closing; 4620 4621 if (msz < sizeof (struct T_exdata_req)) { 4622 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4623 "tl_exdata:invalid message")); 4624 if (!closing) { 4625 tl_merror(wq, mp, EPROTO); 4626 } else { 4627 freemsg(mp); 4628 } 4629 return; 4630 } 4631 4632 /* 4633 * If the endpoint is closing it should still forward any data to the 4634 * peer (if it has one). If it is not allowed to forward it can just 4635 * free the message. 4636 */ 4637 if (closing && 4638 (tep->te_state != TS_DATA_XFER) && 4639 (tep->te_state != TS_WREQ_ORDREL)) { 4640 freemsg(mp); 4641 return; 4642 } 4643 4644 /* 4645 * validate state 4646 */ 4647 switch (tep->te_state) { 4648 case TS_IDLE: 4649 /* 4650 * Other end not here - do nothing. 4651 */ 4652 freemsg(mp); 4653 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4654 "tl_exdata:cots with endpoint idle")); 4655 return; 4656 4657 case TS_DATA_XFER: 4658 /* valid states */ 4659 if (tep->te_conp != NULL) 4660 break; 4661 4662 if (tep->te_oconp == NULL) { 4663 if (!closing) { 4664 tl_merror(wq, mp, EPROTO); 4665 } else { 4666 freemsg(mp); 4667 } 4668 return; 4669 } 4670 /* 4671 * For a socket the T_CONN_CON is sent early thus 4672 * the peer might not yet have accepted the connection. 4673 * If we are closing queue the packet with the T_CONN_IND. 4674 * Otherwise defer processing the packet until the peer 4675 * accepts the connection. 4676 * Note that the queue is noenabled when we go into this 4677 * state. 4678 */ 4679 if (!closing) { 4680 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4681 SL_TRACE|SL_ERROR, 4682 "tl_exdata: ocon")); 4683 TL_PUTBQ(tep, mp); 4684 return; 4685 } 4686 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4687 "tl_exdata: closing socket ocon")); 4688 prim->type = T_EXDATA_IND; 4689 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4690 return; 4691 4692 case TS_WREQ_ORDREL: 4693 if (tep->te_conp == NULL) { 4694 /* 4695 * Other end closed - generate discon_ind 4696 * with reason 0 to cause an EPIPE but no 4697 * read side error on AF_UNIX sockets. 4698 */ 4699 freemsg(mp); 4700 (void) (STRLOG(TL_ID, tep->te_minor, 3, 4701 SL_TRACE|SL_ERROR, 4702 "tl_exdata: WREQ_ORDREL and no peer")); 4703 tl_discon_ind(tep, 0); 4704 return; 4705 } 4706 break; 4707 4708 default: 4709 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4710 SL_TRACE|SL_ERROR, 4711 "tl_wput:T_EXDATA_REQ:out of state, state=%d", 4712 tep->te_state)); 4713 tl_merror(wq, mp, EPROTO); 4714 return; 4715 } 4716 /* 4717 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state); 4718 * (state stays same on this event) 4719 */ 4720 4721 /* 4722 * get connected endpoint 4723 */ 4724 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4725 freemsg(mp); 4726 /* Peer closed */ 4727 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4728 "tl_exdata: peer gone")); 4729 return; 4730 } 4731 4732 peer_rq = peer_tep->te_rq; 4733 4734 /* 4735 * Put it back if flow controlled 4736 * Note: Messages already on queue when we are closing is bounded 4737 * so we can ignore flow control. 4738 */ 4739 if (!canputnext(peer_rq) && !closing) { 4740 TL_PUTBQ(tep, mp); 4741 return; 4742 } 4743 4744 /* 4745 * validate state on peer 4746 */ 4747 switch (peer_tep->te_state) { 4748 case TS_DATA_XFER: 4749 case TS_WIND_ORDREL: 4750 /* valid states */ 4751 break; 4752 default: 4753 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4754 "tl_exdata:rx side:invalid state")); 4755 tl_merror(peer_tep->te_wq, mp, EPROTO); 4756 return; 4757 } 4758 /* 4759 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state); 4760 * (peer state stays same on this event) 4761 */ 4762 /* 4763 * reuse message block 4764 */ 4765 prim->type = T_EXDATA_IND; 4766 4767 /* 4768 * send data to connected peer 4769 */ 4770 putnext(peer_rq, mp); 4771 } 4772 4773 4774 4775 static void 4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep) 4777 { 4778 queue_t *wq = tep->te_wq; 4779 union T_primitives *prim = (union T_primitives *)mp->b_rptr; 4780 ssize_t msz = MBLKL(mp); 4781 tl_endpt_t *peer_tep; 4782 queue_t *peer_rq; 4783 boolean_t closing = tep->te_closing; 4784 4785 if (msz < sizeof (struct T_ordrel_req)) { 4786 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4787 "tl_ordrel:invalid message")); 4788 if (!closing) { 4789 tl_merror(wq, mp, EPROTO); 4790 } else { 4791 freemsg(mp); 4792 } 4793 return; 4794 } 4795 4796 /* 4797 * validate state 4798 */ 4799 switch (tep->te_state) { 4800 case TS_DATA_XFER: 4801 case TS_WREQ_ORDREL: 4802 /* valid states */ 4803 if (tep->te_conp != NULL) 4804 break; 4805 4806 if (tep->te_oconp == NULL) 4807 break; 4808 4809 /* 4810 * For a socket the T_CONN_CON is sent early thus 4811 * the peer might not yet have accepted the connection. 4812 * If we are closing queue the packet with the T_CONN_IND. 4813 * Otherwise defer processing the packet until the peer 4814 * accepts the connection. 4815 * Note that the queue is noenabled when we go into this 4816 * state. 4817 */ 4818 if (!closing) { 4819 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4820 SL_TRACE|SL_ERROR, 4821 "tl_ordlrel: ocon")); 4822 TL_PUTBQ(tep, mp); 4823 return; 4824 } 4825 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4826 "tl_ordlrel: closing socket ocon")); 4827 prim->type = T_ORDREL_IND; 4828 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp); 4829 return; 4830 4831 default: 4832 (void) (STRLOG(TL_ID, tep->te_minor, 1, 4833 SL_TRACE|SL_ERROR, 4834 "tl_wput:T_ORDREL_REQ:out of state, state=%d", 4835 tep->te_state)); 4836 if (!closing) { 4837 tl_merror(wq, mp, EPROTO); 4838 } else { 4839 freemsg(mp); 4840 } 4841 return; 4842 } 4843 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state); 4844 4845 /* 4846 * get connected endpoint 4847 */ 4848 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) { 4849 /* Peer closed */ 4850 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4851 "tl_ordrel: peer gone")); 4852 freemsg(mp); 4853 return; 4854 } 4855 4856 peer_rq = peer_tep->te_rq; 4857 4858 /* 4859 * Put it back if flow controlled except when we are closing. 4860 * Note: Messages already on queue when we are closing is bounded 4861 * so we can ignore flow control. 4862 */ 4863 if (! canputnext(peer_rq) && !closing) { 4864 TL_PUTBQ(tep, mp); 4865 return; 4866 } 4867 4868 /* 4869 * validate state on peer 4870 */ 4871 switch (peer_tep->te_state) { 4872 case TS_DATA_XFER: 4873 case TS_WIND_ORDREL: 4874 /* valid states */ 4875 break; 4876 default: 4877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 4878 "tl_ordrel:rx side:invalid state")); 4879 tl_merror(peer_tep->te_wq, mp, EPROTO); 4880 return; 4881 } 4882 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 4883 4884 /* 4885 * reuse message block 4886 */ 4887 prim->type = T_ORDREL_IND; 4888 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 4889 "tl_ordrel: send ordrel_ind")); 4890 4891 /* 4892 * send data to connected peer 4893 */ 4894 putnext(peer_rq, mp); 4895 } 4896 4897 4898 /* 4899 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space. 4900 */ 4901 static void 4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err) 4903 { 4904 size_t err_sz; 4905 tl_endpt_t *tep; 4906 struct T_unitdata_req *udreq; 4907 mblk_t *err_mp; 4908 t_scalar_t alen; 4909 t_scalar_t olen; 4910 struct T_uderror_ind *uderr; 4911 uchar_t *addr_startp; 4912 4913 err_sz = sizeof (struct T_uderror_ind); 4914 tep = (tl_endpt_t *)wq->q_ptr; 4915 udreq = (struct T_unitdata_req *)mp->b_rptr; 4916 alen = udreq->DEST_length; 4917 olen = udreq->OPT_length; 4918 4919 if (alen > 0) 4920 err_sz = T_ALIGN(err_sz + alen); 4921 if (olen > 0) 4922 err_sz += olen; 4923 4924 err_mp = allocb(err_sz, BPRI_MED); 4925 if (! err_mp) { 4926 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 4927 "tl_uderr:allocb failure")); 4928 /* 4929 * Note: no rollback of state needed as it does 4930 * not change in connectionless transport 4931 */ 4932 tl_memrecover(wq, mp, err_sz); 4933 return; 4934 } 4935 4936 DB_TYPE(err_mp) = M_PROTO; 4937 err_mp->b_wptr = err_mp->b_rptr + err_sz; 4938 uderr = (struct T_uderror_ind *)err_mp->b_rptr; 4939 uderr->PRIM_type = T_UDERROR_IND; 4940 uderr->ERROR_type = err; 4941 uderr->DEST_length = alen; 4942 uderr->OPT_length = olen; 4943 if (alen <= 0) { 4944 uderr->DEST_offset = 0; 4945 } else { 4946 uderr->DEST_offset = 4947 (t_scalar_t)sizeof (struct T_uderror_ind); 4948 addr_startp = mp->b_rptr + udreq->DEST_offset; 4949 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset, 4950 (size_t)alen); 4951 } 4952 if (olen <= 0) { 4953 uderr->OPT_offset = 0; 4954 } else { 4955 uderr->OPT_offset = 4956 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) + 4957 uderr->DEST_length); 4958 addr_startp = mp->b_rptr + udreq->OPT_offset; 4959 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset, 4960 (size_t)olen); 4961 } 4962 freemsg(mp); 4963 4964 /* 4965 * send indication message 4966 */ 4967 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state); 4968 4969 qreply(wq, err_mp); 4970 } 4971 4972 static void 4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep) 4974 { 4975 queue_t *wq = tep->te_wq; 4976 4977 if (!tep->te_closing && (wq->q_first != NULL)) { 4978 TL_PUTQ(tep, mp); 4979 } else if (tep->te_rq != NULL) 4980 tl_unitdata(mp, tep); 4981 else 4982 freemsg(mp); 4983 4984 tl_serializer_exit(tep); 4985 tl_refrele(tep); 4986 } 4987 4988 /* 4989 * Handle T_unitdata_req. 4990 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options. 4991 * If this is a socket pass through options unmodified. 4992 */ 4993 static void 4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep) 4995 { 4996 queue_t *wq = tep->te_wq; 4997 soux_addr_t ux_addr; 4998 tl_addr_t destaddr; 4999 uchar_t *addr_startp; 5000 tl_endpt_t *peer_tep; 5001 struct T_unitdata_ind *udind; 5002 struct T_unitdata_req *udreq; 5003 ssize_t msz, ui_sz; 5004 t_scalar_t alen, aoff, olen, ooff; 5005 t_scalar_t oldolen = 0; 5006 cred_t *cr = NULL; 5007 pid_t cpid; 5008 5009 udreq = (struct T_unitdata_req *)mp->b_rptr; 5010 msz = MBLKL(mp); 5011 5012 /* 5013 * validate the state 5014 */ 5015 if (tep->te_state != TS_IDLE) { 5016 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5017 SL_TRACE|SL_ERROR, 5018 "tl_wput:T_CONN_REQ:out of state")); 5019 tl_merror(wq, mp, EPROTO); 5020 return; 5021 } 5022 /* 5023 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state); 5024 * (state does not change on this event) 5025 */ 5026 5027 /* 5028 * validate the message 5029 * Note: dereference fields in struct inside message only 5030 * after validating the message length. 5031 */ 5032 if (msz < sizeof (struct T_unitdata_req)) { 5033 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5034 "tl_unitdata:invalid message length")); 5035 tl_merror(wq, mp, EINVAL); 5036 return; 5037 } 5038 alen = udreq->DEST_length; 5039 aoff = udreq->DEST_offset; 5040 oldolen = olen = udreq->OPT_length; 5041 ooff = udreq->OPT_offset; 5042 if (olen == 0) 5043 ooff = 0; 5044 5045 if (IS_SOCKET(tep)) { 5046 if ((alen != TL_SOUX_ADDRLEN) || 5047 (aoff < 0) || 5048 (aoff + alen > msz) || 5049 (olen < 0) || (ooff < 0) || 5050 ((olen > 0) && ((ooff + olen) > msz))) { 5051 (void) (STRLOG(TL_ID, tep->te_minor, 5052 1, SL_TRACE|SL_ERROR, 5053 "tl_unitdata_req: invalid socket addr " 5054 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)", 5055 (int)msz, alen, aoff, olen, ooff)); 5056 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5057 return; 5058 } 5059 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN); 5060 5061 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) && 5062 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) { 5063 (void) (STRLOG(TL_ID, tep->te_minor, 5064 1, SL_TRACE|SL_ERROR, 5065 "tl_conn_req: invalid socket magic")); 5066 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ); 5067 return; 5068 } 5069 } else { 5070 if ((alen < 0) || 5071 (aoff < 0) || 5072 ((alen > 0) && ((aoff + alen) > msz)) || 5073 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) || 5074 ((aoff + alen) < 0) || 5075 ((olen > 0) && ((ooff + olen) > msz)) || 5076 (olen < 0) || 5077 (ooff < 0) || 5078 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) { 5079 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5080 SL_TRACE|SL_ERROR, 5081 "tl_unitdata:invalid unit data message")); 5082 tl_merror(wq, mp, EINVAL); 5083 return; 5084 } 5085 } 5086 5087 /* Options not supported unless it's a socket */ 5088 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) { 5089 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5090 "tl_unitdata:option use(unsupported) or zero len addr")); 5091 tl_uderr(wq, mp, EPROTO); 5092 return; 5093 } 5094 #ifdef DEBUG 5095 /* 5096 * Mild form of ASSERT()ion to detect broken TPI apps. 5097 * if (! assertion) 5098 * log warning; 5099 */ 5100 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) { 5101 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5102 "tl_unitdata:addr overlaps TPI message")); 5103 } 5104 #endif 5105 /* 5106 * get destination endpoint 5107 */ 5108 destaddr.ta_alen = alen; 5109 destaddr.ta_abuf = mp->b_rptr + aoff; 5110 destaddr.ta_zoneid = tep->te_zoneid; 5111 5112 /* 5113 * Check whether the destination is the same that was used previously 5114 * and the destination endpoint is in the right state. If something is 5115 * wrong, find destination again and cache it. 5116 */ 5117 peer_tep = tep->te_lastep; 5118 5119 if ((peer_tep == NULL) || peer_tep->te_closing || 5120 (peer_tep->te_state != TS_IDLE) || 5121 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) { 5122 /* 5123 * Not the same as cached destination , need to find the right 5124 * destination. 5125 */ 5126 peer_tep = (IS_SOCKET(tep) ? 5127 tl_sock_find_peer(tep, &ux_addr) : 5128 tl_find_peer(tep, &destaddr)); 5129 5130 if (peer_tep == NULL) { 5131 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5132 SL_TRACE|SL_ERROR, 5133 "tl_unitdata:no one at destination address")); 5134 tl_uderr(wq, mp, ECONNRESET); 5135 return; 5136 } 5137 5138 /* 5139 * Cache the new peer. 5140 */ 5141 if (tep->te_lastep != NULL) 5142 tl_refrele(tep->te_lastep); 5143 5144 tep->te_lastep = peer_tep; 5145 } 5146 5147 if (peer_tep->te_state != TS_IDLE) { 5148 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 5149 "tl_unitdata:provider in invalid state")); 5150 tl_uderr(wq, mp, EPROTO); 5151 return; 5152 } 5153 5154 ASSERT(peer_tep->te_rq != NULL); 5155 5156 /* 5157 * Put it back if flow controlled except when we are closing. 5158 * Note: Messages already on queue when we are closing is bounded 5159 * so we can ignore flow control. 5160 */ 5161 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) { 5162 /* record what we are flow controlled on */ 5163 if (tep->te_flowq != NULL) { 5164 list_remove(&tep->te_flowq->te_flowlist, tep); 5165 } 5166 list_insert_head(&peer_tep->te_flowlist, tep); 5167 tep->te_flowq = peer_tep; 5168 TL_PUTBQ(tep, mp); 5169 return; 5170 } 5171 /* 5172 * prepare indication message 5173 */ 5174 5175 /* 5176 * calculate length of message 5177 */ 5178 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5179 cr = msg_getcred(mp, &cpid); 5180 ASSERT(cr != NULL); 5181 5182 if (peer_tep->te_flag & TL_SETCRED) { 5183 ASSERT(olen == 0); 5184 olen = (t_scalar_t)sizeof (struct opthdr) + 5185 OPTLEN(sizeof (tl_credopt_t)); 5186 /* 1 option only */ 5187 } else if (peer_tep->te_flag & TL_SETUCRED) { 5188 ASSERT(olen == 0); 5189 olen = (t_scalar_t)sizeof (struct opthdr) + 5190 OPTLEN(ucredminsize(cr)); 5191 /* 1 option only */ 5192 } else { 5193 /* Possibly more than one option */ 5194 olen += (t_scalar_t)sizeof (struct T_opthdr) + 5195 OPTLEN(ucredminsize(cr)); 5196 } 5197 } 5198 5199 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + 5200 olen; 5201 /* 5202 * If the unitdata_ind fits and we are not adding options 5203 * reuse the udreq mblk. 5204 */ 5205 if (msz >= ui_sz && alen >= tep->te_alen && 5206 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) { 5207 /* 5208 * Reuse the original mblk. Leave options in place. 5209 */ 5210 udind = (struct T_unitdata_ind *)mp->b_rptr; 5211 udind->PRIM_type = T_UNITDATA_IND; 5212 udind->SRC_length = tep->te_alen; 5213 addr_startp = mp->b_rptr + udind->SRC_offset; 5214 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5215 } else { 5216 /* Allocate a new T_unidata_ind message */ 5217 mblk_t *ui_mp; 5218 5219 ui_mp = allocb(ui_sz, BPRI_MED); 5220 if (! ui_mp) { 5221 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE, 5222 "tl_unitdata:allocb failure:message queued")); 5223 tl_memrecover(wq, mp, ui_sz); 5224 return; 5225 } 5226 5227 /* 5228 * fill in T_UNITDATA_IND contents 5229 */ 5230 DB_TYPE(ui_mp) = M_PROTO; 5231 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz; 5232 udind = (struct T_unitdata_ind *)ui_mp->b_rptr; 5233 udind->PRIM_type = T_UNITDATA_IND; 5234 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind); 5235 udind->SRC_length = tep->te_alen; 5236 addr_startp = ui_mp->b_rptr + udind->SRC_offset; 5237 bcopy(tep->te_abuf, addr_startp, tep->te_alen); 5238 udind->OPT_offset = 5239 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length); 5240 udind->OPT_length = olen; 5241 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) { 5242 5243 if (oldolen != 0) { 5244 bcopy((void *)((uintptr_t)udreq + ooff), 5245 (void *)((uintptr_t)udind + 5246 udind->OPT_offset), 5247 oldolen); 5248 } 5249 ASSERT(cr != NULL); 5250 5251 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset + 5252 oldolen, cr, cpid, 5253 peer_tep->te_flag, peer_tep->te_credp); 5254 } else { 5255 bcopy((void *)((uintptr_t)udreq + ooff), 5256 (void *)((uintptr_t)udind + udind->OPT_offset), 5257 olen); 5258 } 5259 5260 /* 5261 * relink data blocks from mp to ui_mp 5262 */ 5263 ui_mp->b_cont = mp->b_cont; 5264 freeb(mp); 5265 mp = ui_mp; 5266 } 5267 /* 5268 * send indication message 5269 */ 5270 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state); 5271 putnext(peer_tep->te_rq, mp); 5272 } 5273 5274 5275 5276 /* 5277 * Check if a given addr is in use. 5278 * Endpoint ptr returned or NULL if not found. 5279 * The name space is separate for each mode. This implies that 5280 * sockets get their own name space. 5281 */ 5282 static tl_endpt_t * 5283 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap) 5284 { 5285 tl_endpt_t *peer_tep = NULL; 5286 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap, 5287 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5288 5289 ASSERT(! IS_SOCKET(tep)); 5290 5291 ASSERT(ap != NULL && ap->ta_alen > 0); 5292 ASSERT(ap->ta_zoneid == tep->te_zoneid); 5293 ASSERT(ap->ta_abuf != NULL); 5294 EQUIV(rc == 0, peer_tep != NULL); 5295 IMPLY(rc == 0, 5296 (tep->te_zoneid == peer_tep->te_zoneid) && 5297 (tep->te_transport == peer_tep->te_transport)); 5298 5299 if ((rc == 0) && (peer_tep->te_closing)) { 5300 tl_refrele(peer_tep); 5301 peer_tep = NULL; 5302 } 5303 5304 return (peer_tep); 5305 } 5306 5307 /* 5308 * Find peer for a socket based on unix domain address. 5309 * For implicit addresses our peer can be found by minor number in ai hash. For 5310 * explicit binds we look vnode address at addr_hash. 5311 */ 5312 static tl_endpt_t * 5313 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr) 5314 { 5315 tl_endpt_t *peer_tep = NULL; 5316 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ? 5317 tep->te_aihash : tep->te_addrhash; 5318 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp, 5319 (mod_hash_val_t *)&peer_tep, tl_find_callback); 5320 5321 ASSERT(IS_SOCKET(tep)); 5322 EQUIV(rc == 0, peer_tep != NULL); 5323 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport)); 5324 5325 if (peer_tep != NULL) { 5326 /* Don't attempt to use closing peer. */ 5327 if (peer_tep->te_closing) 5328 goto errout; 5329 5330 /* 5331 * Cross-zone unix sockets are permitted, but for Trusted 5332 * Extensions only, the "server" for these must be in the 5333 * global zone. 5334 */ 5335 if ((peer_tep->te_zoneid != tep->te_zoneid) && 5336 is_system_labeled() && 5337 (peer_tep->te_zoneid != GLOBAL_ZONEID)) 5338 goto errout; 5339 } 5340 5341 return (peer_tep); 5342 5343 errout: 5344 tl_refrele(peer_tep); 5345 return (NULL); 5346 } 5347 5348 /* 5349 * Generate a free addr and return it in struct pointed by ap 5350 * but allocating space for address buffer. 5351 * The generated address will be at least 4 bytes long and, if req->ta_alen 5352 * exceeds 4 bytes, be req->ta_alen bytes long. 5353 * 5354 * If address is found it will be inserted in the hash. 5355 * 5356 * If req->ta_alen is larger than the default alen (4 bytes) the last 5357 * alen-4 bytes will always be the same as in req. 5358 * 5359 * Return 0 for failure. 5360 * Return non-zero for success. 5361 */ 5362 static boolean_t 5363 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req) 5364 { 5365 t_scalar_t alen; 5366 uint32_t loopcnt; /* Limit loop to 2^32 */ 5367 5368 ASSERT(tep->te_hash_hndl != NULL); 5369 ASSERT(! IS_SOCKET(tep)); 5370 5371 if (tep->te_hash_hndl == NULL) 5372 return (B_FALSE); 5373 5374 /* 5375 * check if default addr is in use 5376 * if it is - bump it and try again 5377 */ 5378 if (req == NULL) { 5379 alen = sizeof (uint32_t); 5380 } else { 5381 alen = max(req->ta_alen, sizeof (uint32_t)); 5382 ASSERT(tep->te_zoneid == req->ta_zoneid); 5383 } 5384 5385 if (tep->te_alen < alen) { 5386 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP); 5387 5388 /* 5389 * Not enough space in tep->ta_ap to hold the address, 5390 * allocate a bigger space. 5391 */ 5392 if (abuf == NULL) 5393 return (B_FALSE); 5394 5395 if (tep->te_alen > 0) 5396 kmem_free(tep->te_abuf, tep->te_alen); 5397 5398 tep->te_alen = alen; 5399 tep->te_abuf = abuf; 5400 } 5401 5402 /* Copy in the address in req */ 5403 if (req != NULL) { 5404 ASSERT(alen >= req->ta_alen); 5405 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen); 5406 } 5407 5408 /* 5409 * First try minor number then try default addresses. 5410 */ 5411 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t)); 5412 5413 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) { 5414 if (mod_hash_insert_reserve(tep->te_addrhash, 5415 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep, 5416 tep->te_hash_hndl) == 0) { 5417 /* 5418 * found free address 5419 */ 5420 tep->te_flag |= TL_ADDRHASHED; 5421 tep->te_hash_hndl = NULL; 5422 5423 return (B_TRUE); /* successful return */ 5424 } 5425 /* 5426 * Use default address. 5427 */ 5428 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t)); 5429 atomic_add_32(&tep->te_defaddr, 1); 5430 } 5431 5432 /* 5433 * Failed to find anything. 5434 */ 5435 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR, 5436 "tl_get_any_addr:looped 2^32 times")); 5437 return (B_FALSE); 5438 } 5439 5440 /* 5441 * reallocb + set r/w ptrs to reflect size. 5442 */ 5443 static mblk_t * 5444 tl_resizemp(mblk_t *mp, ssize_t new_size) 5445 { 5446 if ((mp = reallocb(mp, new_size, 0)) == NULL) 5447 return (NULL); 5448 5449 mp->b_rptr = DB_BASE(mp); 5450 mp->b_wptr = mp->b_rptr + new_size; 5451 return (mp); 5452 } 5453 5454 static void 5455 tl_cl_backenable(tl_endpt_t *tep) 5456 { 5457 list_t *l = &tep->te_flowlist; 5458 tl_endpt_t *elp; 5459 5460 ASSERT(IS_CLTS(tep)); 5461 5462 for (elp = list_head(l); elp != NULL; elp = list_head(l)) { 5463 ASSERT(tep->te_ser == elp->te_ser); 5464 ASSERT(elp->te_flowq == tep); 5465 if (! elp->te_closing) 5466 TL_QENABLE(elp); 5467 elp->te_flowq = NULL; 5468 list_remove(l, elp); 5469 } 5470 } 5471 5472 /* 5473 * Unconnect endpoints. 5474 */ 5475 static void 5476 tl_co_unconnect(tl_endpt_t *tep) 5477 { 5478 tl_endpt_t *peer_tep = tep->te_conp; 5479 tl_endpt_t *srv_tep = tep->te_oconp; 5480 list_t *l; 5481 tl_icon_t *tip; 5482 tl_endpt_t *cl_tep; 5483 mblk_t *d_mp; 5484 5485 ASSERT(IS_COTS(tep)); 5486 /* 5487 * If our peer is closing, don't use it. 5488 */ 5489 if ((peer_tep != NULL) && peer_tep->te_closing) { 5490 TL_UNCONNECT(tep->te_conp); 5491 peer_tep = NULL; 5492 } 5493 if ((srv_tep != NULL) && srv_tep->te_closing) { 5494 TL_UNCONNECT(tep->te_oconp); 5495 srv_tep = NULL; 5496 } 5497 5498 if (tep->te_nicon > 0) { 5499 l = &tep->te_iconp; 5500 /* 5501 * If incoming requests pending, change state 5502 * of clients on disconnect ind event and send 5503 * discon_ind pdu to modules above them 5504 * for server: all clients get disconnect 5505 */ 5506 5507 while (tep->te_nicon > 0) { 5508 tip = list_head(l); 5509 cl_tep = tip->ti_tep; 5510 5511 if (cl_tep == NULL) { 5512 tl_freetip(tep, tip); 5513 continue; 5514 } 5515 5516 if (cl_tep->te_oconp != NULL) { 5517 ASSERT(cl_tep != cl_tep->te_oconp); 5518 TL_UNCONNECT(cl_tep->te_oconp); 5519 } 5520 5521 if (cl_tep->te_closing) { 5522 tl_freetip(tep, tip); 5523 continue; 5524 } 5525 5526 enableok(cl_tep->te_wq); 5527 TL_QENABLE(cl_tep); 5528 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM); 5529 if (d_mp != NULL) { 5530 cl_tep->te_state = TS_IDLE; 5531 putnext(cl_tep->te_rq, d_mp); 5532 } else { 5533 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5534 SL_TRACE|SL_ERROR, 5535 "tl_co_unconnect:icmng: " 5536 "allocb failure")); 5537 } 5538 tl_freetip(tep, tip); 5539 } 5540 } else if (srv_tep != NULL) { 5541 /* 5542 * If outgoing request pending, change state 5543 * of server on discon ind event 5544 */ 5545 5546 if (IS_SOCKET(tep) && !tl_disable_early_connect && 5547 IS_COTSORD(srv_tep) && 5548 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) { 5549 /* 5550 * Queue ordrel_ind for server to be picked up 5551 * when the connection is accepted. 5552 */ 5553 d_mp = tl_ordrel_ind_alloc(); 5554 } else { 5555 /* 5556 * send discon_ind to server 5557 */ 5558 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno); 5559 } 5560 if (d_mp == NULL) { 5561 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5562 SL_TRACE|SL_ERROR, 5563 "tl_co_unconnect:outgoing:allocb failure")); 5564 TL_UNCONNECT(tep->te_oconp); 5565 goto discon_peer; 5566 } 5567 5568 /* 5569 * If this is a socket the T_DISCON_IND is queued with 5570 * the T_CONN_IND. Otherwise the T_CONN_IND is removed 5571 * from the list of pending connections. 5572 * Note that when te_oconp is set the peer better have 5573 * a t_connind_t for the client. 5574 */ 5575 if (IS_SOCKET(tep) && !tl_disable_early_connect) { 5576 /* 5577 * Queue the disconnection message. 5578 */ 5579 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp); 5580 } else { 5581 tip = tl_icon_find(srv_tep, tep->te_seqno); 5582 if (tip == NULL) { 5583 freemsg(d_mp); 5584 } else { 5585 ASSERT(tep == tip->ti_tep); 5586 ASSERT(tep->te_ser == srv_tep->te_ser); 5587 /* 5588 * Delete tip from the server list. 5589 */ 5590 if (srv_tep->te_nicon == 1) { 5591 srv_tep->te_state = 5592 NEXTSTATE(TE_DISCON_IND2, 5593 srv_tep->te_state); 5594 } else { 5595 srv_tep->te_state = 5596 NEXTSTATE(TE_DISCON_IND3, 5597 srv_tep->te_state); 5598 } 5599 ASSERT(*(uint32_t *)(d_mp->b_rptr) == 5600 T_DISCON_IND); 5601 putnext(srv_tep->te_rq, d_mp); 5602 tl_freetip(srv_tep, tip); 5603 } 5604 TL_UNCONNECT(tep->te_oconp); 5605 srv_tep = NULL; 5606 } 5607 } else if (peer_tep != NULL) { 5608 /* 5609 * unconnect existing connection 5610 * If connected, change state of peer on 5611 * discon ind event and send discon ind pdu 5612 * to module above it 5613 */ 5614 5615 ASSERT(tep->te_ser == peer_tep->te_ser); 5616 if (IS_COTSORD(peer_tep) && 5617 (peer_tep->te_state == TS_WIND_ORDREL || 5618 peer_tep->te_state == TS_DATA_XFER)) { 5619 /* 5620 * send ordrel ind 5621 */ 5622 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE, 5623 "tl_co_unconnect:connected: ordrel_ind state %d->%d", 5624 peer_tep->te_state, 5625 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state))); 5626 d_mp = tl_ordrel_ind_alloc(); 5627 if (! d_mp) { 5628 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5629 SL_TRACE|SL_ERROR, 5630 "tl_co_unconnect:connected:" 5631 "allocb failure")); 5632 /* 5633 * Continue with cleaning up peer as 5634 * this side may go away with the close 5635 */ 5636 TL_QENABLE(peer_tep); 5637 goto discon_peer; 5638 } 5639 peer_tep->te_state = 5640 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state); 5641 5642 putnext(peer_tep->te_rq, d_mp); 5643 /* 5644 * Handle flow control case. This will generate 5645 * a t_discon_ind message with reason 0 if there 5646 * is data queued on the write side. 5647 */ 5648 TL_QENABLE(peer_tep); 5649 } else if (IS_COTSORD(peer_tep) && 5650 peer_tep->te_state == TS_WREQ_ORDREL) { 5651 /* 5652 * Sent an ordrel_ind. We send a discon with 5653 * with error 0 to inform that the peer is gone. 5654 */ 5655 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5656 SL_TRACE|SL_ERROR, 5657 "tl_co_unconnect: discon in state %d", 5658 tep->te_state)); 5659 tl_discon_ind(peer_tep, 0); 5660 } else { 5661 (void) (STRLOG(TL_ID, tep->te_minor, 3, 5662 SL_TRACE|SL_ERROR, 5663 "tl_co_unconnect: state %d", tep->te_state)); 5664 tl_discon_ind(peer_tep, ECONNRESET); 5665 } 5666 5667 discon_peer: 5668 /* 5669 * Disconnect cross-pointers only for close 5670 */ 5671 if (tep->te_closing) { 5672 peer_tep = tep->te_conp; 5673 TL_REMOVE_PEER(peer_tep->te_conp); 5674 TL_REMOVE_PEER(tep->te_conp); 5675 } 5676 } 5677 } 5678 5679 /* 5680 * Note: The following routine does not recover from allocb() 5681 * failures 5682 * The reason should be from the <sys/errno.h> space. 5683 */ 5684 static void 5685 tl_discon_ind(tl_endpt_t *tep, uint32_t reason) 5686 { 5687 mblk_t *d_mp; 5688 5689 if (tep->te_closing) 5690 return; 5691 5692 /* 5693 * flush the queues. 5694 */ 5695 flushq(tep->te_rq, FLUSHDATA); 5696 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW); 5697 5698 /* 5699 * send discon ind 5700 */ 5701 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno); 5702 if (! d_mp) { 5703 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR, 5704 "tl_discon_ind:allocb failure")); 5705 return; 5706 } 5707 tep->te_state = TS_IDLE; 5708 putnext(tep->te_rq, d_mp); 5709 } 5710 5711 /* 5712 * Note: The following routine does not recover from allocb() 5713 * failures 5714 * The reason should be from the <sys/errno.h> space. 5715 */ 5716 static mblk_t * 5717 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum) 5718 { 5719 mblk_t *mp; 5720 struct T_discon_ind *tdi; 5721 5722 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) { 5723 DB_TYPE(mp) = M_PROTO; 5724 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind); 5725 tdi = (struct T_discon_ind *)mp->b_rptr; 5726 tdi->PRIM_type = T_DISCON_IND; 5727 tdi->DISCON_reason = reason; 5728 tdi->SEQ_number = seqnum; 5729 } 5730 return (mp); 5731 } 5732 5733 5734 /* 5735 * Note: The following routine does not recover from allocb() 5736 * failures 5737 */ 5738 static mblk_t * 5739 tl_ordrel_ind_alloc(void) 5740 { 5741 mblk_t *mp; 5742 struct T_ordrel_ind *toi; 5743 5744 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) { 5745 DB_TYPE(mp) = M_PROTO; 5746 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind); 5747 toi = (struct T_ordrel_ind *)mp->b_rptr; 5748 toi->PRIM_type = T_ORDREL_IND; 5749 } 5750 return (mp); 5751 } 5752 5753 5754 /* 5755 * Lookup the seqno in the list of queued connections. 5756 */ 5757 static tl_icon_t * 5758 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno) 5759 { 5760 list_t *l = &tep->te_iconp; 5761 tl_icon_t *tip = list_head(l); 5762 5763 ASSERT(seqno != 0); 5764 5765 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip)) 5766 ; 5767 5768 return (tip); 5769 } 5770 5771 /* 5772 * Queue data for a given T_CONN_IND while verifying that redundant 5773 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued. 5774 * Used when the originator of the connection closes. 5775 */ 5776 static void 5777 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp) 5778 { 5779 tl_icon_t *tip; 5780 mblk_t **mpp, *mp; 5781 int prim, nprim; 5782 5783 if (nmp->b_datap->db_type == M_PROTO) 5784 nprim = ((union T_primitives *)nmp->b_rptr)->type; 5785 else 5786 nprim = -1; /* M_DATA */ 5787 5788 tip = tl_icon_find(tep, seqno); 5789 if (tip == NULL) { 5790 freemsg(nmp); 5791 return; 5792 } 5793 5794 ASSERT(tip->ti_seqno != 0); 5795 mpp = &tip->ti_mp; 5796 while (*mpp != NULL) { 5797 mp = *mpp; 5798 5799 if (mp->b_datap->db_type == M_PROTO) 5800 prim = ((union T_primitives *)mp->b_rptr)->type; 5801 else 5802 prim = -1; /* M_DATA */ 5803 5804 /* 5805 * Allow nothing after a T_DISCON_IND 5806 */ 5807 if (prim == T_DISCON_IND) { 5808 freemsg(nmp); 5809 return; 5810 } 5811 /* 5812 * Only allow a T_DISCON_IND after an T_ORDREL_IND 5813 */ 5814 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) { 5815 freemsg(nmp); 5816 return; 5817 } 5818 mpp = &(mp->b_next); 5819 } 5820 *mpp = nmp; 5821 } 5822 5823 /* 5824 * Verify if a certain TPI primitive exists on the connind queue. 5825 * Use prim -1 for M_DATA. 5826 * Return non-zero if found. 5827 */ 5828 static boolean_t 5829 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim) 5830 { 5831 tl_icon_t *tip = tl_icon_find(tep, seqno); 5832 boolean_t found = B_FALSE; 5833 5834 if (tip != NULL) { 5835 mblk_t *mp; 5836 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) { 5837 found = (DB_TYPE(mp) == M_PROTO && 5838 ((union T_primitives *)mp->b_rptr)->type == prim); 5839 } 5840 } 5841 return (found); 5842 } 5843 5844 /* 5845 * Send the b_next mblk chain that has accumulated before the connection 5846 * was accepted. Perform the necessary state transitions. 5847 */ 5848 static void 5849 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp) 5850 { 5851 mblk_t *mp; 5852 union T_primitives *primp; 5853 5854 if (tep->te_closing) { 5855 tl_icon_freemsgs(mpp); 5856 return; 5857 } 5858 5859 ASSERT(tep->te_state == TS_DATA_XFER); 5860 ASSERT(tep->te_rq->q_first == NULL); 5861 5862 while ((mp = *mpp) != NULL) { 5863 *mpp = mp->b_next; 5864 mp->b_next = NULL; 5865 5866 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO)); 5867 switch (DB_TYPE(mp)) { 5868 default: 5869 freemsg(mp); 5870 break; 5871 case M_DATA: 5872 putnext(tep->te_rq, mp); 5873 break; 5874 case M_PROTO: 5875 primp = (union T_primitives *)mp->b_rptr; 5876 switch (primp->type) { 5877 case T_UNITDATA_IND: 5878 case T_DATA_IND: 5879 case T_OPTDATA_IND: 5880 case T_EXDATA_IND: 5881 putnext(tep->te_rq, mp); 5882 break; 5883 case T_ORDREL_IND: 5884 tep->te_state = NEXTSTATE(TE_ORDREL_IND, 5885 tep->te_state); 5886 putnext(tep->te_rq, mp); 5887 break; 5888 case T_DISCON_IND: 5889 tep->te_state = TS_IDLE; 5890 putnext(tep->te_rq, mp); 5891 break; 5892 default: 5893 #ifdef DEBUG 5894 cmn_err(CE_PANIC, 5895 "tl_icon_sendmsgs: unknown primitive"); 5896 #endif /* DEBUG */ 5897 freemsg(mp); 5898 break; 5899 } 5900 break; 5901 } 5902 } 5903 } 5904 5905 /* 5906 * Free the b_next mblk chain that has accumulated before the connection 5907 * was accepted. 5908 */ 5909 static void 5910 tl_icon_freemsgs(mblk_t **mpp) 5911 { 5912 mblk_t *mp; 5913 5914 while ((mp = *mpp) != NULL) { 5915 *mpp = mp->b_next; 5916 mp->b_next = NULL; 5917 freemsg(mp); 5918 } 5919 } 5920 5921 /* 5922 * Send M_ERROR 5923 * Note: assumes caller ensured enough space in mp or enough 5924 * memory available. Does not attempt recovery from allocb() 5925 * failures 5926 */ 5927 5928 static void 5929 tl_merror(queue_t *wq, mblk_t *mp, int error) 5930 { 5931 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 5932 5933 if (tep->te_closing) { 5934 freemsg(mp); 5935 return; 5936 } 5937 5938 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5939 SL_TRACE|SL_ERROR, 5940 "tl_merror: tep=%p, err=%d", (void *)tep, error)); 5941 5942 /* 5943 * flush all messages on queue. we are shutting 5944 * the stream down on fatal error 5945 */ 5946 flushq(wq, FLUSHALL); 5947 if (IS_COTS(tep)) { 5948 /* connection oriented - unconnect endpoints */ 5949 tl_co_unconnect(tep); 5950 } 5951 if (mp->b_cont) { 5952 freemsg(mp->b_cont); 5953 mp->b_cont = NULL; 5954 } 5955 5956 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) { 5957 freemsg(mp); 5958 mp = allocb(1, BPRI_HI); 5959 if (!mp) { 5960 (void) (STRLOG(TL_ID, tep->te_minor, 1, 5961 SL_TRACE|SL_ERROR, 5962 "tl_merror:M_PROTO: out of memory")); 5963 return; 5964 } 5965 } 5966 if (mp) { 5967 DB_TYPE(mp) = M_ERROR; 5968 mp->b_rptr = DB_BASE(mp); 5969 *mp->b_rptr = (char)error; 5970 mp->b_wptr = mp->b_rptr + sizeof (char); 5971 qreply(wq, mp); 5972 } else { 5973 (void) putnextctl1(tep->te_rq, M_ERROR, error); 5974 } 5975 } 5976 5977 static void 5978 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr) 5979 { 5980 ASSERT(cr != NULL); 5981 5982 if (flag & TL_SETCRED) { 5983 struct opthdr *opt = (struct opthdr *)buf; 5984 tl_credopt_t *tlcred; 5985 5986 opt->level = TL_PROT_LEVEL; 5987 opt->name = TL_OPT_PEER_CRED; 5988 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t)); 5989 5990 tlcred = (tl_credopt_t *)(opt + 1); 5991 tlcred->tc_uid = crgetuid(cr); 5992 tlcred->tc_gid = crgetgid(cr); 5993 tlcred->tc_ruid = crgetruid(cr); 5994 tlcred->tc_rgid = crgetrgid(cr); 5995 tlcred->tc_suid = crgetsuid(cr); 5996 tlcred->tc_sgid = crgetsgid(cr); 5997 tlcred->tc_ngroups = crgetngroups(cr); 5998 } else if (flag & TL_SETUCRED) { 5999 struct opthdr *opt = (struct opthdr *)buf; 6000 6001 opt->level = TL_PROT_LEVEL; 6002 opt->name = TL_OPT_PEER_UCRED; 6003 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr)); 6004 6005 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr); 6006 } else { 6007 struct T_opthdr *topt = (struct T_opthdr *)buf; 6008 ASSERT(flag & TL_SOCKUCRED); 6009 6010 topt->level = SOL_SOCKET; 6011 topt->name = SCM_UCRED; 6012 topt->len = ucredminsize(cr) + sizeof (*topt); 6013 topt->status = 0; 6014 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr); 6015 } 6016 } 6017 6018 /* ARGSUSED */ 6019 static int 6020 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6021 { 6022 /* no default value processed in protocol specific code currently */ 6023 return (-1); 6024 } 6025 6026 /* ARGSUSED */ 6027 static int 6028 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr) 6029 { 6030 int len; 6031 tl_endpt_t *tep; 6032 int *valp; 6033 6034 tep = (tl_endpt_t *)wq->q_ptr; 6035 6036 len = 0; 6037 6038 /* 6039 * Assumes: option level and name sanity check done elsewhere 6040 */ 6041 6042 switch (level) { 6043 case SOL_SOCKET: 6044 if (! IS_SOCKET(tep)) 6045 break; 6046 switch (name) { 6047 case SO_RECVUCRED: 6048 len = sizeof (int); 6049 valp = (int *)ptr; 6050 *valp = (tep->te_flag & TL_SOCKUCRED) != 0; 6051 break; 6052 default: 6053 break; 6054 } 6055 break; 6056 case TL_PROT_LEVEL: 6057 switch (name) { 6058 case TL_OPT_PEER_CRED: 6059 case TL_OPT_PEER_UCRED: 6060 /* 6061 * option not supposed to retrieved directly 6062 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND 6063 * when some internal flags set by other options 6064 * Direct retrieval always designed to fail(ignored) 6065 * for this option. 6066 */ 6067 break; 6068 } 6069 } 6070 return (len); 6071 } 6072 6073 /* ARGSUSED */ 6074 static int 6075 tl_set_opt( 6076 queue_t *wq, 6077 uint_t mgmt_flags, 6078 int level, 6079 int name, 6080 uint_t inlen, 6081 uchar_t *invalp, 6082 uint_t *outlenp, 6083 uchar_t *outvalp, 6084 void *thisdg_attrs, 6085 cred_t *cr) 6086 { 6087 int error; 6088 tl_endpt_t *tep; 6089 6090 tep = (tl_endpt_t *)wq->q_ptr; 6091 6092 error = 0; /* NOERROR */ 6093 6094 /* 6095 * Assumes: option level and name sanity checks done elsewhere 6096 */ 6097 6098 switch (level) { 6099 case SOL_SOCKET: 6100 if (! IS_SOCKET(tep)) { 6101 error = EINVAL; 6102 break; 6103 } 6104 /* 6105 * TBD: fill in other AF_UNIX socket options and then stop 6106 * returning error. 6107 */ 6108 switch (name) { 6109 case SO_RECVUCRED: 6110 /* 6111 * We only support this for datagram sockets; 6112 * getpeerucred handles the connection oriented 6113 * transports. 6114 */ 6115 if (! IS_CLTS(tep)) { 6116 error = EINVAL; 6117 break; 6118 } 6119 if (*(int *)invalp == 0) 6120 tep->te_flag &= ~TL_SOCKUCRED; 6121 else 6122 tep->te_flag |= TL_SOCKUCRED; 6123 break; 6124 default: 6125 error = EINVAL; 6126 break; 6127 } 6128 break; 6129 case TL_PROT_LEVEL: 6130 switch (name) { 6131 case TL_OPT_PEER_CRED: 6132 case TL_OPT_PEER_UCRED: 6133 /* 6134 * option not supposed to be set directly 6135 * Its value in initialized for each endpoint at 6136 * driver open time. 6137 * Direct setting always designed to fail for this 6138 * option. 6139 */ 6140 (void) (STRLOG(TL_ID, tep->te_minor, 1, 6141 SL_TRACE|SL_ERROR, 6142 "tl_set_opt: option is not supported")); 6143 error = EPROTO; 6144 break; 6145 } 6146 } 6147 return (error); 6148 } 6149 6150 6151 static void 6152 tl_timer(void *arg) 6153 { 6154 queue_t *wq = arg; 6155 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6156 6157 ASSERT(tep); 6158 6159 tep->te_timoutid = 0; 6160 6161 enableok(wq); 6162 /* 6163 * Note: can call wsrv directly here and save context switch 6164 * Consider change when qtimeout (not timeout) is active 6165 */ 6166 qenable(wq); 6167 } 6168 6169 static void 6170 tl_buffer(void *arg) 6171 { 6172 queue_t *wq = arg; 6173 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr; 6174 6175 ASSERT(tep); 6176 6177 tep->te_bufcid = 0; 6178 tep->te_nowsrv = B_FALSE; 6179 6180 enableok(wq); 6181 /* 6182 * Note: can call wsrv directly here and save context switch 6183 * Consider change when qbufcall (not bufcall) is active 6184 */ 6185 qenable(wq); 6186 } 6187 6188 static void 6189 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size) 6190 { 6191 tl_endpt_t *tep; 6192 6193 tep = (tl_endpt_t *)wq->q_ptr; 6194 6195 if (tep->te_closing) { 6196 freemsg(mp); 6197 return; 6198 } 6199 noenable(wq); 6200 6201 (void) insq(wq, wq->q_first, mp); 6202 6203 if (tep->te_bufcid || tep->te_timoutid) { 6204 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR, 6205 "tl_memrecover:recover %p pending", (void *)wq)); 6206 return; 6207 } 6208 6209 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) { 6210 tep->te_timoutid = qtimeout(wq, tl_timer, wq, 6211 drv_usectohz(TL_BUFWAIT)); 6212 } 6213 } 6214 6215 static void 6216 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip) 6217 { 6218 ASSERT(tip->ti_seqno != 0); 6219 6220 if (tip->ti_mp != NULL) { 6221 tl_icon_freemsgs(&tip->ti_mp); 6222 tip->ti_mp = NULL; 6223 } 6224 if (tip->ti_tep != NULL) { 6225 tl_refrele(tip->ti_tep); 6226 tip->ti_tep = NULL; 6227 } 6228 list_remove(&tep->te_iconp, tip); 6229 kmem_free(tip, sizeof (tl_icon_t)); 6230 tep->te_nicon--; 6231 } 6232 6233 /* 6234 * Remove address from address hash. 6235 */ 6236 static void 6237 tl_addr_unbind(tl_endpt_t *tep) 6238 { 6239 tl_endpt_t *elp; 6240 6241 if (tep->te_flag & TL_ADDRHASHED) { 6242 if (IS_SOCKET(tep)) { 6243 (void) mod_hash_remove(tep->te_addrhash, 6244 (mod_hash_key_t)tep->te_vp, 6245 (mod_hash_val_t *)&elp); 6246 tep->te_vp = (void *)(uintptr_t)tep->te_minor; 6247 tep->te_magic = SOU_MAGIC_IMPLICIT; 6248 } else { 6249 (void) mod_hash_remove(tep->te_addrhash, 6250 (mod_hash_key_t)&tep->te_ap, 6251 (mod_hash_val_t *)&elp); 6252 (void) kmem_free(tep->te_abuf, tep->te_alen); 6253 tep->te_alen = -1; 6254 tep->te_abuf = NULL; 6255 } 6256 tep->te_flag &= ~TL_ADDRHASHED; 6257 } 6258 }