1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 * Copyright (c) 2018, Joyent, Inc.
29 */
30
31 /*
32 * Multithreaded STREAMS Local Transport Provider.
33 *
34 * OVERVIEW
35 * ========
36 *
37 * This driver provides TLI as well as socket semantics. It provides
38 * connectionless, connection oriented, and connection oriented with orderly
39 * release transports for TLI and sockets. Each transport type has separate name
40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41 * this removes any name space conflicts when binding to socket style transport
42 * addresses.
43 *
44 * NOTE: There is one exception: Socket ticots and ticotsord transports share
45 * the same namespace. In fact, sockets always use ticotsord type transport.
46 *
47 * The driver mode is specified during open() by the minor number used for
48 * open.
49 *
50 * The sockets in addition have the following semantic differences:
51 * No support for passing up credentials (TL_SET[U]CRED).
52 *
53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55 * T_OPTDATA_IND.
56 *
57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58 * a T_CONN_RES is received from the acceptor. This means that a socket
59 * connect will complete before the peer has called accept.
60 *
61 *
62 * MULTITHREADING
63 * ==============
64 *
65 * The driver does not use STREAMS protection mechanisms. Instead it uses a
66 * generic "serializer" abstraction. Most of the operations are executed behind
67 * the serializer and are, essentially single-threaded. All functions executed
68 * behind the same serializer are strictly serialized. So if one thread calls
69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73 * same time.
74 *
75 * Connectionless transport use a single serializer per transport type (one for
76 * TLI and one for sockets. Connection-oriented transports use finer-grained
77 * serializers.
78 *
79 * All COTS-type endpoints start their life with private serializers. During
80 * connection request processing the endpoint serializer is switched to the
81 * listener's serializer and the rest of T_CONN_REQ processing is done on the
82 * listener serializer. During T_CONN_RES processing the eager serializer is
83 * switched from listener to acceptor serializer and after that point all
84 * processing for eager and acceptor happens on this serializer. To avoid races
85 * with endpoint closes while its serializer may be changing closes are blocked
86 * while serializers are manipulated.
87 *
88 * References accounting
89 * ---------------------
90 *
91 * Endpoints are reference counted and freed when the last reference is
92 * dropped. Functions within the serializer may access an endpoint state even
93 * after an endpoint closed. The te_closing being set on the endpoint indicates
94 * that the endpoint entered its close routine.
95 *
96 * One reference is held for each opened endpoint instance. The reference
97 * counter is incremented when the endpoint is linked to another endpoint and
98 * decremented when the link disappears. It is also incremented when the
99 * endpoint is found by the hash table lookup. This increment is atomic with the
100 * lookup itself and happens while the hash table read lock is held.
101 *
102 * Close synchronization
103 * ---------------------
104 *
105 * During close the endpoint as marked as closing using te_closing flag. It is
106 * usually enough to check for te_closing flag since all other state changes
107 * happen after this flag is set and the close entered serializer. Immediately
108 * after setting te_closing flag tl_close() enters serializer and waits until
109 * the callback finishes. This allows all functions called within serializer to
110 * simply check te_closing without any locks.
111 *
112 * Serializer management.
113 * ---------------------
114 *
115 * For COTS transports serializers are created when the endpoint is constructed
116 * and destroyed when the endpoint is destructed. CLTS transports use global
117 * serializers - one for sockets and one for TLI.
118 *
119 * COTS serializers have separate reference counts to deal with several
120 * endpoints sharing the same serializer. There is a subtle problem related to
121 * the serializer destruction. The serializer should never be destroyed by any
122 * function executed inside serializer. This means that close has to wait till
123 * all serializer activity for this endpoint is finished before it can drop the
124 * last reference on the endpoint (which may as well free the serializer). This
125 * is only relevant for COTS transports which manage serializers
126 * dynamically. For CLTS transports close may complete without waiting for all
127 * serializer activity to finish since serializer is only destroyed at driver
128 * detach time.
129 *
130 * COTS endpoints keep track of the number of outstanding requests on the
131 * serializer for the endpoint. The code handling accept() avoids changing
132 * client serializer if it has any pending messages on the serializer and
133 * instead moves acceptor to listener's serializer.
134 *
135 *
136 * Use of hash tables
137 * ------------------
138 *
139 * The driver uses modhash hash table implementation. Each transport uses two
140 * hash tables - one for finding endpoints by acceptor ID and another one for
141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142 * pair of hash tables since sockets only use TICOTSORD.
143 *
144 * All hash tables lookups increment a reference count for returned endpoints,
145 * so we may safely check the endpoint state even when the endpoint is removed
146 * from the hash by another thread immediately after it is found.
147 *
148 *
149 * CLOSE processing
150 * ================
151 *
152 * The driver enters serializer twice on close(). The close sequence is the
153 * following:
154 *
155 * 1) Wait until closing is safe (te_closewait becomes zero)
156 * This step is needed to prevent close during serializer switches. In most
157 * cases (close happening after connection establishment) te_closewait is
158 * zero.
159 * 1) Set te_closing.
160 * 2) Call tl_close_ser() within serializer and wait for it to complete.
161 *
162 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
163 * It also needs to clear write-side q_next pointers - this should be done
164 * before qprocsoff().
165 *
166 * This synchronous serializer entry during close is needed to ensure that
167 * the queue is valid everywhere inside the serializer.
168 *
169 * Note that in many cases close will execute tl_close_ser() synchronously,
170 * so it will not wait at all.
171 *
172 * 3) Calls qprocsoff().
173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174 * complete (for COTS transports). For CLTS transport there is no wait.
175 *
176 * tl_close_finish_ser() Finishes the close process and wakes up waiting
177 * close if there is any.
178 *
179 * Note that in most cases close will enter te_close_ser_finish()
180 * synchronously and will not wait at all.
181 *
182 *
183 * Flow Control
184 * ============
185 *
186 * The driver implements both read and write side service routines. No one calls
187 * putq() on the read queue. The read side service routine tl_rsrv() is called
188 * when the read side stream is back-enabled. It enters serializer synchronously
189 * (waits till serializer processing is complete). Within serializer it
190 * back-enables all endpoints blocked by the queue for connection-less
191 * transports and enables write side service processing for the peer for
192 * connection-oriented transports.
193 *
194 * Read and write side service routines use special mblk_sized space in the
195 * endpoint structure to enter perimeter.
196 *
197 * Write-side flow control
198 * -----------------------
199 *
200 * Write side flow control is a bit tricky. The driver needs to deal with two
201 * message queues - the explicit STREAMS message queue maintained by
202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203 * queues should be synchronized to preserve message ordering and should
204 * maintain a single order determined by the order in which messages enter
205 * tl_wput(). In order to maintain the ordering between these two queues the
206 * STREAMS queue is only manipulated within the serializer, so the ordering is
207 * provided by the serializer.
208 *
209 * Functions called from the tl_wsrv() sometimes may call putbq(). To
210 * immediately stop any further processing of the STREAMS message queues the
211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212 * side service processing stops when the flag is set.
213 *
214 * The tl_wsrv() function enters serializer synchronously and waits for it to
215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218 * always bounded by the amount of messages on the STREAMS queue at the time
219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220 * queue from another serialized entry which can't happen in parallel. This
221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222 * of it draining forever while writer places new messages on the STREAMS
223 * queue).
224 *
225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226 *
227 *
228 * Unix Domain Sockets
229 * ===================
230 *
231 * The driver knows the structure of Unix Domain sockets addresses and treats
232 * them differently from generic TLI addresses. For sockets implicit binds are
233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234 * instead of using address length of zero. Explicit binds specify
235 * SOU_MAGIC_EXPLICIT as magic.
236 *
237 * For implicit binds we always use minor number as soua_vp part of the address
238 * and avoid any hash table lookups. This saves two hash tables lookups per
239 * anonymous bind.
240 *
241 * For explicit address we hash the vnode pointer instead of hashing the
242 * full-scale address+zone+length. Hashing by pointer is more efficient then
243 * hashing by the full address.
244 *
245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246 * tep structure, so it should be never freed.
247 *
248 * Also for sockets the driver always uses minor number as acceptor id.
249 *
250 * TPI VIOLATIONS
251 * --------------
252 *
253 * This driver violates TPI in several respects for Unix Domain Sockets:
254 *
255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256 * is requested and the endpoint is already in use. There is no point in
257 * generating an unused address since this address will be rejected by
258 * sockfs anyway. For implicit binds it always generates a new address
259 * (sets soua_vp to its minor number).
260 *
261 * 2) It always uses minor number as acceptor ID and never uses queue
262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263 * message and they do not use the queue pointer.
264 *
265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266 * followed by listen(). The listen() should be issued with non-zero
267 * backlog, so sotpi_listen() issues unbind request followed by bind
268 * request to the same address but with a non-zero qlen value. Both
269 * tl_bind() and tl_unbind() require write lock on the hash table to
270 * insert/remove the address. The driver does not remove the address from
271 * the hash for endpoints that are bound to the explicit address and have
272 * backlog of zero. During T_BIND_REQ processing if the address requested
273 * is equal to the address the endpoint already has it updates the backlog
274 * without reinserting the address in the hash table. This optimization
275 * avoids two hash table updates for each listener created. It always
276 * avoids the problem of a "stolen" address when another listener may use
277 * the same address between the unbind and bind and suddenly listen() fails
278 * because address is in use even though the bind() succeeded.
279 *
280 *
281 * CONNECTIONLESS TRANSPORTS
282 * =========================
283 *
284 * Connectionless transports all share the same serializer (one for TLI and one
285 * for Sockets). Functions executing behind serializer can check or modify state
286 * of any endpoint.
287 *
288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289 * te_lastep field. The next time X talks to some address A it checks whether A
290 * is the same as Y's address and if it is there is no need to lookup Y. If the
291 * address is different or the state of Y is not appropriate (e.g. closed or not
292 * idle) X does a lookup using tl_find_peer() and caches the new address.
293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294 * on the endpoint found.
295 *
296 * During close of endpoint Y it doesn't try to remove itself from other
297 * endpoints caches. They will detect that Y is gone and will search the peer
298 * endpoint again.
299 *
300 * Flow Control Handling.
301 * ----------------------
302 *
303 * Each connectionless endpoint keeps a list of endpoints which are
304 * flow-controlled by its queue. It also keeps a pointer to the queue which
305 * flow-controls itself. Whenever flow control releases for endpoint X it
306 * enables all queues from the list. During close it also back-enables everyone
307 * in the list. If X is flow-controlled when it is closing it removes it from
308 * the peers list.
309 *
310 * DATA STRUCTURES
311 * ===============
312 *
313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314 * endpoint state. For connection-oriented transports it has a keeps a list
315 * of pending connections (tl_icon_t). For connectionless transports it keeps a
316 * list of endpoints flow controlled by this one.
317 *
318 * Each transport type is represented by a per-transport data structure
319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320 * endpoint address hash tables for each transport. It also contains pointer to
321 * transport serializer for connectionless transports.
322 *
323 * Each endpoint keeps a link to its transport structure, so the code can find
324 * all per-transport information quickly.
325 */
326
327 #include <sys/types.h>
328 #include <sys/inttypes.h>
329 #include <sys/stream.h>
330 #include <sys/stropts.h>
331 #define _SUN_TPI_VERSION 2
332 #include <sys/tihdr.h>
333 #include <sys/strlog.h>
334 #include <sys/debug.h>
335 #include <sys/cred.h>
336 #include <sys/errno.h>
337 #include <sys/kmem.h>
338 #include <sys/id_space.h>
339 #include <sys/modhash.h>
340 #include <sys/mkdev.h>
341 #include <sys/tl.h>
342 #include <sys/stat.h>
343 #include <sys/conf.h>
344 #include <sys/modctl.h>
345 #include <sys/strsun.h>
346 #include <sys/socket.h>
347 #include <sys/socketvar.h>
348 #include <sys/sysmacros.h>
349 #include <sys/xti_xtiopt.h>
350 #include <sys/ddi.h>
351 #include <sys/sunddi.h>
352 #include <sys/zone.h>
353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
354 #include <inet/optcom.h>
355 #include <sys/strsubr.h>
356 #include <sys/ucred.h>
357 #include <sys/suntpi.h>
358 #include <sys/list.h>
359 #include <sys/serializer.h>
360
361 /*
362 * TBD List
363 * 14 Eliminate state changes through table
364 * 16. AF_UNIX socket options
365 * 17. connect() for ticlts
366 * 18. support for "netstat" to show AF_UNIX plus TLI local
367 * transport connections
368 * 21. sanity check to flushing on sending M_ERROR
369 */
370
371 /*
372 * CONSTANT DECLARATIONS
373 * --------------------
374 */
375
376 /*
377 * Local declarations
378 */
379 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
380
381 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
382 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
383 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
384 /*
385 * Hash tables size.
386 */
387 #define TL_HASH_SIZE 311
388
389 /*
390 * Definitions for module_info
391 */
392 #define TL_ID (104) /* module ID number */
393 #define TL_NAME "tl" /* module name */
394 #define TL_MINPSZ (0) /* min packet size */
395 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
396 #define TL_HIWAT (16*1024) /* hi water mark */
397 #define TL_LOWAT (256) /* lo water mark */
398 /*
399 * Definition of minor numbers/modes for new transport provider modes.
400 * We view the socket use as a separate mode to get a separate name space.
401 */
402 #define TL_TICOTS 0 /* connection oriented transport */
403 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
404 #define TL_TICLTS 2 /* connectionless transport */
405 #define TL_UNUSED 3
406 #define TL_SOCKET 4 /* Socket */
407 #define TL_SOCK_COTS (TL_SOCKET | TL_TICOTS)
408 #define TL_SOCK_COTSORD (TL_SOCKET | TL_TICOTSORD)
409 #define TL_SOCK_CLTS (TL_SOCKET | TL_TICLTS)
410
411 #define TL_MINOR_MASK 0x7
412 #define TL_MINOR_START (TL_TICLTS + 1)
413
414 /*
415 * LOCAL MACROS
416 */
417 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
418
419 /*
420 * EXTERNAL VARIABLE DECLARATIONS
421 * -----------------------------
422 */
423 /*
424 * state table defined in the OS space.c
425 */
426 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
427
428 /*
429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
430 */
431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432 static int tl_close(queue_t *, int, cred_t *);
433 static int tl_wput(queue_t *, mblk_t *);
434 static int tl_wsrv(queue_t *);
435 static int tl_rsrv(queue_t *);
436
437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440
441
442 /*
443 * GLOBAL DATA STRUCTURES AND VARIABLES
444 * -----------------------------------
445 */
446
447 /*
448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449 * For now, we only manage the SO_RECVUCRED option but we also have
450 * harmless dummy options to make things work with some common code we access.
451 */
452 opdes_t tl_opt_arr[] = {
453 /* The SO_TYPE is needed for the hack below */
454 {
455 SO_TYPE,
456 SOL_SOCKET,
457 OA_R,
458 OA_R,
459 OP_NP,
460 0,
461 sizeof (t_scalar_t),
462 0
463 },
464 {
465 SO_RECVUCRED,
466 SOL_SOCKET,
467 OA_RW,
468 OA_RW,
469 OP_NP,
470 0,
471 sizeof (int),
472 0
473 }
474 };
475
476 /*
477 * Table of all supported levels
478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479 * any supported options so we need this info separately.
480 *
481 * This is needed only for topmost tpi providers.
482 */
483 optlevel_t tl_valid_levels_arr[] = {
484 XTI_GENERIC,
485 SOL_SOCKET,
486 TL_PROT_LEVEL
487 };
488
489 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
490 /*
491 * Current upper bound on the amount of space needed to return all options.
492 * Additional options with data size of sizeof(long) are handled automatically.
493 * Others need hand job.
494 */
495 #define TL_MAX_OPT_BUF_LEN \
496 ((A_CNT(tl_opt_arr) << 2) + \
497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
498 + 64 + sizeof (struct T_optmgmt_ack))
499
500 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
501
502 /*
503 * transport addr structure
504 */
505 typedef struct tl_addr {
506 zoneid_t ta_zoneid; /* Zone scope of address */
507 t_scalar_t ta_alen; /* length of abuf */
508 void *ta_abuf; /* the addr itself */
509 } tl_addr_t;
510
511 /*
512 * Refcounted version of serializer.
513 */
514 typedef struct tl_serializer {
515 uint_t ts_refcnt;
516 serializer_t *ts_serializer;
517 } tl_serializer_t;
518
519 /*
520 * Each transport type has a separate state.
521 * Per-transport state.
522 */
523 typedef struct tl_transport_state {
524 char *tr_name;
525 minor_t tr_minor;
526 uint32_t tr_defaddr;
527 mod_hash_t *tr_ai_hash;
528 mod_hash_t *tr_addr_hash;
529 tl_serializer_t *tr_serializer;
530 } tl_transport_state_t;
531
532 #define TL_DFADDR 0x1000
533
534 static tl_transport_state_t tl_transports[] = {
535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
542 };
543
544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
545
546 struct tl_endpt;
547 typedef struct tl_endpt tl_endpt_t;
548
549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
550
551 /*
552 * Data structure used to represent pending connects.
553 * Records enough information so that the connecting peer can close
554 * before the connection gets accepted.
555 */
556 typedef struct tl_icon {
557 list_node_t ti_node;
558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
560 t_scalar_t ti_seqno; /* Sequence number */
561 } tl_icon_t;
562
563 typedef struct so_ux_addr soux_addr_t;
564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
565
566 /*
567 * Maximum number of unaccepted connection indications allowed per listener.
568 */
569 #define TL_MAXQLEN 4096
570 int tl_maxqlen = TL_MAXQLEN;
571
572 /*
573 * transport endpoint structure
574 */
575 struct tl_endpt {
576 queue_t *te_rq; /* stream read queue */
577 queue_t *te_wq; /* stream write queue */
578 uint32_t te_refcnt;
579 int32_t te_state; /* TPI state of endpoint */
580 minor_t te_minor; /* minor number */
581 #define te_seqno te_minor
582 uint_t te_flag; /* flag field */
583 boolean_t te_nowsrv;
584 tl_serializer_t *te_ser; /* Serializer to use */
585 #define te_serializer te_ser->ts_serializer
586
587 soux_addr_t te_uxaddr; /* Socket address */
588 #define te_magic te_uxaddr.soua_magic
589 #define te_vp te_uxaddr.soua_vp
590 tl_addr_t te_ap; /* addr bound to this endpt */
591 #define te_zoneid te_ap.ta_zoneid
592 #define te_alen te_ap.ta_alen
593 #define te_abuf te_ap.ta_abuf
594
595 tl_transport_state_t *te_transport;
596 #define te_addrhash te_transport->tr_addr_hash
597 #define te_aihash te_transport->tr_ai_hash
598 #define te_defaddr te_transport->tr_defaddr
599 cred_t *te_credp; /* endpoint user credentials */
600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
601
602 /*
603 * State specific for connection-oriented and connectionless transports.
604 */
605 union {
606 /* Connection-oriented state. */
607 struct {
608 t_uscalar_t _te_nicon; /* count of conn requests */
609 t_uscalar_t _te_qlen; /* max conn requests */
610 tl_endpt_t *_te_oconp; /* conn request pending */
611 tl_endpt_t *_te_conp; /* connected endpt */
612 #ifndef _ILP32
613 void *_te_pad;
614 #endif
615 list_t _te_iconp; /* list of conn ind. pending */
616 } _te_cots_state;
617 /* Connection-less state. */
618 struct {
619 tl_endpt_t *_te_lastep; /* last dest. endpoint */
620 tl_endpt_t *_te_flowq; /* flow controlled on whom */
621 list_node_t _te_flows; /* lists of connections */
622 list_t _te_flowlist; /* Who flowcontrols on me */
623 } _te_clts_state;
624 } _te_transport_state;
625 #define te_nicon _te_transport_state._te_cots_state._te_nicon
626 #define te_qlen _te_transport_state._te_cots_state._te_qlen
627 #define te_oconp _te_transport_state._te_cots_state._te_oconp
628 #define te_conp _te_transport_state._te_cots_state._te_conp
629 #define te_iconp _te_transport_state._te_cots_state._te_iconp
630 #define te_lastep _te_transport_state._te_clts_state._te_lastep
631 #define te_flowq _te_transport_state._te_clts_state._te_flowq
632 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
633 #define te_flows _te_transport_state._te_clts_state._te_flows
634
635 bufcall_id_t te_bufcid; /* outstanding bufcall id */
636 timeout_id_t te_timoutid; /* outstanding timeout id */
637 pid_t te_cpid; /* cached pid of endpoint */
638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
639 /*
640 * Pieces of the endpoint state needed for closing.
641 */
642 kmutex_t te_closelock;
643 kcondvar_t te_closecv;
644 uint8_t te_closing; /* The endpoint started closing */
645 uint8_t te_closewait; /* Wait in close until zero */
646 mblk_t te_closemp; /* for entering serializer on close */
647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
649 kmutex_t te_srv_lock;
650 kcondvar_t te_srv_cv;
651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
653 /*
654 * Pieces of the endpoint state needed for serializer transitions.
655 */
656 kmutex_t te_ser_lock; /* Protects the count below */
657 uint_t te_ser_count; /* Number of messages on serializer */
658 };
659
660 /*
661 * Flag values. Lower 4 bits specify that transport used.
662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663 * they allow to identify the endpoint more easily.
664 */
665 #define TL_LISTENER 0x00010 /* the listener endpoint */
666 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
667 #define TL_EAGER 0x00040 /* connecting endpoint */
668 #define TL_ACCEPTED 0x00080 /* accepted connection */
669 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
670 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
671 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
672 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
673 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
674 /*
675 * Boolean checks for the endpoint type.
676 */
677 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
678 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
679 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
680 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
681
682 /*
683 * Certain operations are always used together. These macros reduce the chance
684 * of missing a part of a combination.
685 */
686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
688
689 #define TL_PUTBQ(x, mp) { \
690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
691 (x)->te_nowsrv = B_TRUE; \
692 (void) putbq((x)->te_wq, mp); \
693 }
694
695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
697
698 /*
699 * STREAMS driver glue data structures.
700 */
701 static struct module_info tl_minfo = {
702 TL_ID, /* mi_idnum */
703 TL_NAME, /* mi_idname */
704 TL_MINPSZ, /* mi_minpsz */
705 TL_MAXPSZ, /* mi_maxpsz */
706 TL_HIWAT, /* mi_hiwat */
707 TL_LOWAT /* mi_lowat */
708 };
709
710 static struct qinit tl_rinit = {
711 NULL, /* qi_putp */
712 tl_rsrv, /* qi_srvp */
713 tl_open, /* qi_qopen */
714 tl_close, /* qi_qclose */
715 NULL, /* qi_qadmin */
716 &tl_minfo, /* qi_minfo */
717 NULL /* qi_mstat */
718 };
719
720 static struct qinit tl_winit = {
721 tl_wput, /* qi_putp */
722 tl_wsrv, /* qi_srvp */
723 NULL, /* qi_qopen */
724 NULL, /* qi_qclose */
725 NULL, /* qi_qadmin */
726 &tl_minfo, /* qi_minfo */
727 NULL /* qi_mstat */
728 };
729
730 static struct streamtab tlinfo = {
731 &tl_rinit, /* st_rdinit */
732 &tl_winit, /* st_wrinit */
733 NULL, /* st_muxrinit */
734 NULL /* st_muxwrinit */
735 };
736
737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
739
740 static struct modldrv modldrv = {
741 &mod_driverops, /* Type of module -- pseudo driver here */
742 "TPI Local Transport (tl)",
743 &tl_devops, /* driver ops */
744 };
745
746 /*
747 * Module linkage information for the kernel.
748 */
749 static struct modlinkage modlinkage = {
750 MODREV_1,
751 &modldrv,
752 NULL
753 };
754
755 /*
756 * Templates for response to info request
757 * Check sanity of unlimited connect data etc.
758 */
759
760 #define TL_CLTS_PROVIDER_FLAG (XPG4_1 | SENDZERO)
761 #define TL_COTS_PROVIDER_FLAG (XPG4_1 | SENDZERO)
762
763 static struct T_info_ack tl_cots_info_ack =
764 {
765 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
766 T_INFINITE, /* TSDU size */
767 T_INFINITE, /* ETSDU size */
768 T_INFINITE, /* CDATA_size */
769 T_INFINITE, /* DDATA_size */
770 T_INFINITE, /* ADDR_size */
771 T_INFINITE, /* OPT_size */
772 0, /* TIDU_size - fill at run time */
773 T_COTS, /* SERV_type */
774 -1, /* CURRENT_state */
775 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
776 };
777
778 static struct T_info_ack tl_clts_info_ack =
779 {
780 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
781 0, /* TSDU_size - fill at run time */
782 -2, /* ETSDU_size -2 => not supported */
783 -2, /* CDATA_size -2 => not supported */
784 -2, /* DDATA_size -2 => not supported */
785 -1, /* ADDR_size -1 => infinite */
786 -1, /* OPT_size */
787 0, /* TIDU_size - fill at run time */
788 T_CLTS, /* SERV_type */
789 -1, /* CURRENT_state */
790 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
791 };
792
793 /*
794 * private copy of devinfo pointer used in tl_info
795 */
796 static dev_info_t *tl_dip;
797
798 /*
799 * Endpoints cache.
800 */
801 static kmem_cache_t *tl_cache;
802 /*
803 * Minor number space.
804 */
805 static id_space_t *tl_minors;
806
807 /*
808 * Default Data Unit size.
809 */
810 static t_scalar_t tl_tidusz;
811
812 /*
813 * Size of hash tables.
814 */
815 static size_t tl_hash_size = TL_HASH_SIZE;
816
817 /*
818 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819 * for sockets.
820 */
821 static int tl_disable_early_connect = 0;
822 static int tl_client_closing_when_accepting;
823
824 static int tl_serializer_noswitch;
825
826 /*
827 * LOCAL FUNCTION PROTOTYPES
828 * -------------------------
829 */
830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835 t_scalar_t);
836 static void tl_bind(mblk_t *, tl_endpt_t *);
837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
839 static void tl_unbind(mblk_t *, tl_endpt_t *);
840 static void tl_optmgmt(queue_t *, mblk_t *);
841 static void tl_conn_req(queue_t *, mblk_t *);
842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
848 static void tl_info_req(mblk_t *, tl_endpt_t *);
849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851 static void tl_data(mblk_t *, tl_endpt_t *);
852 static void tl_exdata(mblk_t *, tl_endpt_t *);
853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860 static void tl_cl_backenable(tl_endpt_t *);
861 static void tl_co_unconnect(tl_endpt_t *);
862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865 static mblk_t *tl_ordrel_ind_alloc(void);
866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870 static void tl_icon_freemsgs(mblk_t **);
871 static void tl_merror(queue_t *, mblk_t *, int);
872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
876 uchar_t *, void *, cred_t *);
877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879 static void tl_free(tl_endpt_t *);
880 static int tl_constructor(void *, void *, int);
881 static void tl_destructor(void *, void *);
882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883 static tl_serializer_t *tl_serializer_alloc(int);
884 static void tl_serializer_refhold(tl_serializer_t *);
885 static void tl_serializer_refrele(tl_serializer_t *);
886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887 static void tl_serializer_exit(tl_endpt_t *);
888 static boolean_t tl_noclose(tl_endpt_t *);
889 static void tl_closeok(tl_endpt_t *);
890 static void tl_refhold(tl_endpt_t *);
891 static void tl_refrele(tl_endpt_t *);
892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903 static void tl_addr_unbind(tl_endpt_t *);
904
905 /*
906 * Intialize option database object for TL
907 */
908
909 optdb_obj_t tl_opt_obj = {
910 tl_default_opt, /* TL default value function pointer */
911 tl_get_opt, /* TL get function pointer */
912 tl_set_opt, /* TL set function pointer */
913 TL_OPT_ARR_CNT, /* TL option database count of entries */
914 tl_opt_arr, /* TL option database */
915 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
916 tl_valid_levels_arr /* TL valid level array */
917 };
918
919 /*
920 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921 * ---------------------------------------
922 */
923
924 /*
925 * Loadable module routines
926 */
927 int
928 _init(void)
929 {
930 return (mod_install(&modlinkage));
931 }
932
933 int
934 _fini(void)
935 {
936 return (mod_remove(&modlinkage));
937 }
938
939 int
940 _info(struct modinfo *modinfop)
941 {
942 return (mod_info(&modlinkage, modinfop));
943 }
944
945 /*
946 * Driver Entry Points and Other routines
947 */
948 static int
949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
950 {
951 int i;
952 char name[32];
953
954 /*
955 * Resume from a checkpoint state.
956 */
957 if (cmd == DDI_RESUME)
958 return (DDI_SUCCESS);
959
960 if (cmd != DDI_ATTACH)
961 return (DDI_FAILURE);
962
963 /*
964 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
965 * streams message sizes can be unlimited. We use a defined constant
966 * instead.
967 */
968 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
969
970 /*
971 * Create subdevices for each transport.
972 */
973 for (i = 0; i < TL_UNUSED; i++) {
974 if (ddi_create_minor_node(devi,
975 tl_transports[i].tr_name,
976 S_IFCHR, tl_transports[i].tr_minor,
977 DDI_PSEUDO, 0) == DDI_FAILURE) {
978 ddi_remove_minor_node(devi, NULL);
979 return (DDI_FAILURE);
980 }
981 }
982
983 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
985
986 if (tl_cache == NULL) {
987 ddi_remove_minor_node(devi, NULL);
988 return (DDI_FAILURE);
989 }
990
991 tl_minors = id_space_create("tl_minor_space",
992 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
993
994 /*
995 * Create ID space for minor numbers
996 */
997 for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 tl_transport_state_t *t = &tl_transports[i];
999
1000 if (i == TL_UNUSED)
1001 continue;
1002
1003 /* Socket COTSORD shares namespace with COTS */
1004 if (i == TL_SOCK_COTSORD) {
1005 t->tr_ai_hash =
1006 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 ASSERT(t->tr_ai_hash != NULL);
1008 t->tr_addr_hash =
1009 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 ASSERT(t->tr_addr_hash != NULL);
1011 continue;
1012 }
1013
1014 /*
1015 * Create hash tables.
1016 */
1017 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 t->tr_name);
1019 #ifdef _ILP32
1020 if (i & TL_SOCKET)
1021 t->tr_ai_hash =
1022 mod_hash_create_idhash(name, tl_hash_size - 1,
1023 mod_hash_null_valdtor);
1024 else
1025 t->tr_ai_hash =
1026 mod_hash_create_ptrhash(name, tl_hash_size,
1027 mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029 t->tr_ai_hash =
1030 mod_hash_create_idhash(name, tl_hash_size - 1,
1031 mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1033
1034 if (i & TL_SOCKET) {
1035 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 t->tr_name);
1037 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 tl_hash_size, mod_hash_null_valdtor,
1039 sizeof (uintptr_t));
1040 } else {
1041 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 t->tr_name);
1043 t->tr_addr_hash = mod_hash_create_extended(name,
1044 tl_hash_size, mod_hash_null_keydtor,
1045 mod_hash_null_valdtor,
1046 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047 }
1048
1049 /* Create serializer for connectionless transports. */
1050 if (i & TL_TICLTS)
1051 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052 }
1053
1054 tl_dip = devi;
1055
1056 return (DDI_SUCCESS);
1057 }
1058
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 {
1062 int i;
1063
1064 if (cmd == DDI_SUSPEND)
1065 return (DDI_SUCCESS);
1066
1067 if (cmd != DDI_DETACH)
1068 return (DDI_FAILURE);
1069
1070 /*
1071 * Destroy arenas and hash tables.
1072 */
1073 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 tl_transport_state_t *t = &tl_transports[i];
1075
1076 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 continue;
1078
1079 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080 if (t->tr_serializer != NULL) {
1081 tl_serializer_refrele(t->tr_serializer);
1082 t->tr_serializer = NULL;
1083 }
1084
1085 #ifdef _ILP32
1086 if (i & TL_SOCKET)
1087 mod_hash_destroy_idhash(t->tr_ai_hash);
1088 else
1089 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091 mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093 t->tr_ai_hash = NULL;
1094 if (i & TL_SOCKET)
1095 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 else
1097 mod_hash_destroy_hash(t->tr_addr_hash);
1098 t->tr_addr_hash = NULL;
1099 }
1100
1101 kmem_cache_destroy(tl_cache);
1102 tl_cache = NULL;
1103 id_space_destroy(tl_minors);
1104 tl_minors = NULL;
1105 ddi_remove_minor_node(devi, NULL);
1106 return (DDI_SUCCESS);
1107 }
1108
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 {
1113
1114 int retcode = DDI_FAILURE;
1115
1116 switch (infocmd) {
1117
1118 case DDI_INFO_DEVT2DEVINFO:
1119 if (tl_dip != NULL) {
1120 *result = (void *)tl_dip;
1121 retcode = DDI_SUCCESS;
1122 }
1123 break;
1124
1125 case DDI_INFO_DEVT2INSTANCE:
1126 *result = NULL;
1127 retcode = DDI_SUCCESS;
1128 break;
1129
1130 default:
1131 break;
1132 }
1133 return (retcode);
1134 }
1135
1136 /*
1137 * Endpoint reference management.
1138 */
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1141 {
1142 atomic_inc_32(&tep->te_refcnt);
1143 }
1144
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1147 {
1148 ASSERT(tep->te_refcnt != 0);
1149
1150 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151 tl_free(tep);
1152 }
1153
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 {
1158 tl_endpt_t *tep = buf;
1159
1160 bzero(tep, sizeof (tl_endpt_t));
1161 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166
1167 return (0);
1168 }
1169
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1173 {
1174 tl_endpt_t *tep = buf;
1175
1176 mutex_destroy(&tep->te_closelock);
1177 cv_destroy(&tep->te_closecv);
1178 mutex_destroy(&tep->te_srv_lock);
1179 cv_destroy(&tep->te_srv_cv);
1180 mutex_destroy(&tep->te_ser_lock);
1181 }
1182
1183 static void
1184 tl_free(tl_endpt_t *tep)
1185 {
1186 ASSERT(tep->te_refcnt == 0);
1187 ASSERT(tep->te_transport != NULL);
1188 ASSERT(tep->te_rq == NULL);
1189 ASSERT(tep->te_wq == NULL);
1190 ASSERT(tep->te_ser != NULL);
1191 ASSERT(tep->te_ser_count == 0);
1192 ASSERT(!(tep->te_flag & TL_ADDRHASHED));
1193
1194 if (IS_SOCKET(tep)) {
1195 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 } else if (tep->te_abuf != NULL) {
1200 kmem_free(tep->te_abuf, tep->te_alen);
1201 tep->te_alen = -1; /* uninitialized */
1202 tep->te_abuf = NULL;
1203 } else {
1204 ASSERT(tep->te_alen == -1);
1205 }
1206
1207 id_free(tl_minors, tep->te_minor);
1208 ASSERT(tep->te_credp == NULL);
1209
1210 if (tep->te_hash_hndl != NULL)
1211 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212
1213 if (IS_COTS(tep)) {
1214 TL_REMOVE_PEER(tep->te_conp);
1215 TL_REMOVE_PEER(tep->te_oconp);
1216 tl_serializer_refrele(tep->te_ser);
1217 tep->te_ser = NULL;
1218 ASSERT(tep->te_nicon == 0);
1219 ASSERT(list_head(&tep->te_iconp) == NULL);
1220 } else {
1221 ASSERT(tep->te_lastep == NULL);
1222 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 ASSERT(tep->te_flowq == NULL);
1224 }
1225
1226 ASSERT(tep->te_bufcid == 0);
1227 ASSERT(tep->te_timoutid == 0);
1228 bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 tep->te_acceptor_id = 0;
1230
1231 ASSERT(tep->te_closewait == 0);
1232 ASSERT(!tep->te_rsrv_active);
1233 ASSERT(!tep->te_wsrv_active);
1234 tep->te_closing = 0;
1235 tep->te_nowsrv = B_FALSE;
1236 tep->te_flag = 0;
1237
1238 kmem_cache_free(tl_cache, tep);
1239 }
1240
1241 /*
1242 * Allocate/free reference-counted wrappers for serializers.
1243 */
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1246 {
1247 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 serializer_t *ser;
1249
1250 if (s == NULL)
1251 return (NULL);
1252
1253 ser = serializer_create(flags);
1254
1255 if (ser == NULL) {
1256 kmem_free(s, sizeof (tl_serializer_t));
1257 return (NULL);
1258 }
1259
1260 s->ts_refcnt = 1;
1261 s->ts_serializer = ser;
1262 return (s);
1263 }
1264
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1267 {
1268 atomic_inc_32(&s->ts_refcnt);
1269 }
1270
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1273 {
1274 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275 serializer_destroy(s->ts_serializer);
1276 kmem_free(s, sizeof (tl_serializer_t));
1277 }
1278 }
1279
1280 /*
1281 * Post a request on the endpoint serializer. For COTS transports keep track of
1282 * the number of pending requests.
1283 */
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 {
1287 if (IS_COTS(tep)) {
1288 mutex_enter(&tep->te_ser_lock);
1289 tep->te_ser_count++;
1290 mutex_exit(&tep->te_ser_lock);
1291 }
1292 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 }
1294
1295 /*
1296 * Complete processing the request on the serializer. Decrement the counter for
1297 * pending requests for COTS transports.
1298 */
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1301 {
1302 if (IS_COTS(tep)) {
1303 mutex_enter(&tep->te_ser_lock);
1304 ASSERT(tep->te_ser_count != 0);
1305 tep->te_ser_count--;
1306 mutex_exit(&tep->te_ser_lock);
1307 }
1308 }
1309
1310 /*
1311 * Hash management functions.
1312 */
1313
1314 /*
1315 * Return TRUE if two addresses are equal, false otherwise.
1316 */
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 {
1320 return ((ap1->ta_alen > 0) &&
1321 (ap1->ta_alen == ap2->ta_alen) &&
1322 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 }
1325
1326 /*
1327 * This function is called whenever an endpoint is found in the hash table.
1328 */
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 {
1333 tl_refhold((tl_endpt_t *)val);
1334 }
1335
1336 /*
1337 * Address hash function.
1338 */
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 {
1343 tl_addr_t *ap = (tl_addr_t *)key;
1344 size_t len = ap->ta_alen;
1345 uchar_t *p = ap->ta_abuf;
1346 uint_t i, g;
1347
1348 ASSERT((len > 0) && (p != NULL));
1349
1350 for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 i = (i << 4) + (*p);
1352 if ((g = (i & 0xf0000000U)) != 0) {
1353 i ^= (g >> 24);
1354 i ^= g;
1355 }
1356 }
1357 return (i);
1358 }
1359
1360 /*
1361 * This function is used by hash lookups. It compares two generic addresses.
1362 */
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 {
1366 #ifdef DEBUG
1367 tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 tl_addr_t *ap2 = (tl_addr_t *)key2;
1369
1370 ASSERT(key1 != NULL);
1371 ASSERT(key2 != NULL);
1372
1373 ASSERT(ap1->ta_abuf != NULL);
1374 ASSERT(ap2->ta_abuf != NULL);
1375 ASSERT(ap1->ta_alen > 0);
1376 ASSERT(ap2->ta_alen > 0);
1377 #endif
1378
1379 return (!tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 }
1381
1382 /*
1383 * Prevent endpoint from closing if possible.
1384 * Return B_TRUE on success, B_FALSE on failure.
1385 */
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1388 {
1389 boolean_t rc = B_FALSE;
1390
1391 mutex_enter(&tep->te_closelock);
1392 if (!tep->te_closing) {
1393 ASSERT(tep->te_closewait == 0);
1394 tep->te_closewait++;
1395 rc = B_TRUE;
1396 }
1397 mutex_exit(&tep->te_closelock);
1398 return (rc);
1399 }
1400
1401 /*
1402 * Allow endpoint to close if needed.
1403 */
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1406 {
1407 ASSERT(tep->te_closewait > 0);
1408 mutex_enter(&tep->te_closelock);
1409 ASSERT(tep->te_closewait == 1);
1410 tep->te_closewait--;
1411 cv_signal(&tep->te_closecv);
1412 mutex_exit(&tep->te_closelock);
1413 }
1414
1415 /*
1416 * STREAMS open entry point.
1417 */
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1421 {
1422 tl_endpt_t *tep;
1423 minor_t minor = getminor(*devp);
1424
1425 /*
1426 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 * are illegal
1428 */
1429 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 return (ENXIO);
1431
1432 if (rq->q_ptr != NULL)
1433 return (0);
1434
1435 /* Minor number should specify the mode used for the driver. */
1436 if ((minor >= TL_UNUSED))
1437 return (ENXIO);
1438
1439 if (oflag & SO_SOCKSTR) {
1440 minor |= TL_SOCKET;
1441 }
1442
1443 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 tep->te_refcnt = 1;
1445 tep->te_cpid = curproc->p_pid;
1446 rq->q_ptr = WR(rq)->q_ptr = tep;
1447 tep->te_state = TS_UNBND;
1448 tep->te_credp = credp;
1449 crhold(credp);
1450 tep->te_zoneid = getzoneid();
1451
1452 tep->te_flag = minor & TL_MINOR_MASK;
1453 tep->te_transport = &tl_transports[minor];
1454
1455 /* Allocate a unique minor number for this instance. */
1456 tep->te_minor = (minor_t)id_alloc(tl_minors);
1457
1458 /* Reserve hash handle for bind(). */
1459 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460
1461 /* Transport-specific initialization */
1462 if (IS_COTS(tep)) {
1463 /* Use private serializer */
1464 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465
1466 /* Create list for pending connections */
1467 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 offsetof(tl_icon_t, ti_node));
1469 tep->te_qlen = 0;
1470 tep->te_nicon = 0;
1471 tep->te_oconp = NULL;
1472 tep->te_conp = NULL;
1473 } else {
1474 /* Use shared serializer */
1475 tep->te_ser = tep->te_transport->tr_serializer;
1476 bzero(&tep->te_flows, sizeof (list_node_t));
1477 /* Create list for flow control */
1478 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 offsetof(tl_endpt_t, te_flows));
1480 tep->te_flowq = NULL;
1481 tep->te_lastep = NULL;
1482
1483 }
1484
1485 /* Initialize endpoint address */
1486 if (IS_SOCKET(tep)) {
1487 /* Socket-specific address handling. */
1488 tep->te_alen = TL_SOUX_ADDRLEN;
1489 tep->te_abuf = &tep->te_uxaddr;
1490 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 } else {
1493 tep->te_alen = -1;
1494 tep->te_abuf = NULL;
1495 }
1496
1497 /* clone the driver */
1498 *devp = makedevice(getmajor(*devp), tep->te_minor);
1499
1500 tep->te_rq = rq;
1501 tep->te_wq = WR(rq);
1502
1503 #ifdef _ILP32
1504 if (IS_SOCKET(tep))
1505 tep->te_acceptor_id = tep->te_minor;
1506 else
1507 tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509 tep->te_acceptor_id = tep->te_minor;
1510 #endif /* _ILP32 */
1511
1512
1513 qprocson(rq);
1514
1515 /*
1516 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 * insertion so insertion can't fail.
1518 */
1519 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 (mod_hash_val_t)tep);
1522
1523 return (0);
1524 }
1525
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag, cred_t *credp)
1529 {
1530 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 tl_endpt_t *elp = NULL;
1532 queue_t *wq = tep->te_wq;
1533 int rc;
1534
1535 ASSERT(wq == WR(rq));
1536
1537 /*
1538 * Remove the endpoint from acceptor hash.
1539 */
1540 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 (mod_hash_val_t *)&elp);
1543 ASSERT(rc == 0 && tep == elp);
1544 if ((rc != 0) || (tep != elp)) {
1545 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1546 SL_TRACE | SL_ERROR,
1547 "tl_close:inconsistency in AI hash"));
1548 }
1549
1550 /*
1551 * Wait till close is safe, then mark endpoint as closing.
1552 */
1553 mutex_enter(&tep->te_closelock);
1554 while (tep->te_closewait)
1555 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 tep->te_closing = B_TRUE;
1557 /*
1558 * Will wait for the serializer part of the close to finish, so set
1559 * te_closewait now.
1560 */
1561 tep->te_closewait = 1;
1562 tep->te_nowsrv = B_FALSE;
1563 mutex_exit(&tep->te_closelock);
1564
1565 /*
1566 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 * It is safe because close will wait for tl_close_ser to finish.
1568 */
1569 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570
1571 /*
1572 * Wait for the first phase of close to complete before qprocsoff().
1573 */
1574 mutex_enter(&tep->te_closelock);
1575 while (tep->te_closewait)
1576 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 mutex_exit(&tep->te_closelock);
1578
1579 qprocsoff(rq);
1580
1581 if (tep->te_bufcid) {
1582 qunbufcall(rq, tep->te_bufcid);
1583 tep->te_bufcid = 0;
1584 }
1585 if (tep->te_timoutid) {
1586 (void) quntimeout(rq, tep->te_timoutid);
1587 tep->te_timoutid = 0;
1588 }
1589
1590 /*
1591 * Finish close behind serializer.
1592 *
1593 * For a CLTS endpoint increase a refcount and continue close processing
1594 * with serializer protection. This processing may happen asynchronously
1595 * with the completion of tl_close().
1596 *
1597 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 * may go away together with tep and we need to destroy serializer
1599 * outside of serializer context.
1600 */
1601 ASSERT(tep->te_closewait == 0);
1602 if (IS_COTS(tep))
1603 tep->te_closewait = 1;
1604 else
1605 tl_refhold(tep);
1606
1607 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608
1609 /*
1610 * For connection-oriented transports wait for all serializer activity
1611 * to settle down.
1612 */
1613 if (IS_COTS(tep)) {
1614 mutex_enter(&tep->te_closelock);
1615 while (tep->te_closewait)
1616 cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 mutex_exit(&tep->te_closelock);
1618 }
1619
1620 crfree(tep->te_credp);
1621 tep->te_credp = NULL;
1622 tep->te_wq = NULL;
1623 tl_refrele(tep);
1624 /*
1625 * tep is likely to be destroyed now, so can't reference it any more.
1626 */
1627
1628 rq->q_ptr = wq->q_ptr = NULL;
1629 return (0);
1630 }
1631
1632 /*
1633 * First phase of close processing done behind the serializer.
1634 *
1635 * Do not drop the reference in the end - tl_close() wants this reference to
1636 * stay.
1637 */
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 {
1642 ASSERT(tep->te_closing);
1643 ASSERT(tep->te_closewait == 1);
1644 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645
1646 tep->te_flag |= TL_CLOSE_SER;
1647
1648 /*
1649 * Drain out all messages on queue except for TL_TICOTS where the
1650 * abortive release semantics permit discarding of data on close
1651 */
1652 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 tl_wsrv_ser(NULL, tep);
1654 }
1655
1656 /* Remove address from hash table. */
1657 tl_addr_unbind(tep);
1658 /*
1659 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 * queue of the driver, so clear these before qprocsoff() is called.
1661 * Also clear q_next for the peer since this queue is going away.
1662 */
1663 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 tl_endpt_t *peer_tep = tep->te_conp;
1665
1666 tep->te_wq->q_next = NULL;
1667 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 peer_tep->te_wq->q_next = NULL;
1669 }
1670
1671 tep->te_rq = NULL;
1672
1673 /* wake up tl_close() */
1674 tl_closeok(tep);
1675 tl_serializer_exit(tep);
1676 }
1677
1678 /*
1679 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680 * the reference for CLTS.
1681 *
1682 * Called from serializer. Should drop reference count for CLTS only.
1683 */
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 {
1688 ASSERT(tep->te_closing);
1689 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691
1692 tep->te_state = -1; /* Uninitialized */
1693 if (IS_COTS(tep)) {
1694 tl_co_unconnect(tep);
1695 } else {
1696 /* Connectionless specific cleanup */
1697 TL_REMOVE_PEER(tep->te_lastep);
1698 /*
1699 * Backenable anybody that is flow controlled waiting for
1700 * this endpoint.
1701 */
1702 tl_cl_backenable(tep);
1703 if (tep->te_flowq != NULL) {
1704 list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 tep->te_flowq = NULL;
1706 }
1707 }
1708
1709 tl_serializer_exit(tep);
1710 if (IS_COTS(tep))
1711 tl_closeok(tep);
1712 else
1713 tl_refrele(tep);
1714 }
1715
1716 /*
1717 * STREAMS write-side put procedure.
1718 * Enter serializer for most of the processing.
1719 *
1720 * The T_CONN_REQ is processed outside of serializer.
1721 */
1722 static int
1723 tl_wput(queue_t *wq, mblk_t *mp)
1724 {
1725 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1726 ssize_t msz = MBLKL(mp);
1727 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1728 tlproc_t *tl_proc = NULL;
1729
1730 switch (DB_TYPE(mp)) {
1731 case M_DATA:
1732 /* Only valid for connection-oriented transports */
1733 if (IS_CLTS(tep)) {
1734 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1735 SL_TRACE | SL_ERROR,
1736 "tl_wput:M_DATA invalid for ticlts driver"));
1737 tl_merror(wq, mp, EPROTO);
1738 return (0);
1739 }
1740 tl_proc = tl_wput_data_ser;
1741 break;
1742
1743 case M_IOCTL:
1744 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 case TL_IOC_CREDOPT:
1746 /* FALLTHROUGH */
1747 case TL_IOC_UCREDOPT:
1748 /*
1749 * Serialize endpoint state change.
1750 */
1751 tl_proc = tl_do_ioctl_ser;
1752 break;
1753
1754 default:
1755 miocnak(wq, mp, 0, EINVAL);
1756 return (0);
1757 }
1758 break;
1759
1760 case M_FLUSH:
1761 /*
1762 * do canonical M_FLUSH processing
1763 */
1764 if (*mp->b_rptr & FLUSHW) {
1765 flushq(wq, FLUSHALL);
1766 *mp->b_rptr &= ~FLUSHW;
1767 }
1768 if (*mp->b_rptr & FLUSHR) {
1769 flushq(RD(wq), FLUSHALL);
1770 qreply(wq, mp);
1771 } else {
1772 freemsg(mp);
1773 }
1774 return (0);
1775
1776 case M_PROTO:
1777 if (msz < sizeof (prim->type)) {
1778 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1779 SL_TRACE | SL_ERROR,
1780 "tl_wput:M_PROTO data too short"));
1781 tl_merror(wq, mp, EPROTO);
1782 return (0);
1783 }
1784 switch (prim->type) {
1785 case T_OPTMGMT_REQ:
1786 case T_SVR4_OPTMGMT_REQ:
1787 /*
1788 * Process TPI option management requests immediately
1789 * in put procedure regardless of in-order processing
1790 * of already queued messages.
1791 * (Note: This driver supports AF_UNIX socket
1792 * implementation. Unless we implement this processing,
1793 * setsockopt() on socket endpoint will block on flow
1794 * controlled endpoints which it should not. That is
1795 * required for successful execution of VSU socket tests
1796 * and is consistent with BSD socket behavior).
1797 */
1798 tl_optmgmt(wq, mp);
1799 return (0);
1800 case O_T_BIND_REQ:
1801 case T_BIND_REQ:
1802 tl_proc = tl_bind_ser;
1803 break;
1804 case T_CONN_REQ:
1805 if (IS_CLTS(tep)) {
1806 tl_merror(wq, mp, EPROTO);
1807 return (0);
1808 }
1809 tl_conn_req(wq, mp);
1810 return (0);
1811 case T_DATA_REQ:
1812 case T_OPTDATA_REQ:
1813 case T_EXDATA_REQ:
1814 case T_ORDREL_REQ:
1815 tl_proc = tl_putq_ser;
1816 break;
1817 case T_UNITDATA_REQ:
1818 if (IS_COTS(tep) ||
1819 (msz < sizeof (struct T_unitdata_req))) {
1820 tl_merror(wq, mp, EPROTO);
1821 return (0);
1822 }
1823 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 tl_proc = tl_unitdata_ser;
1825 } else {
1826 tl_proc = tl_putq_ser;
1827 }
1828 break;
1829 default:
1830 /*
1831 * process in service procedure if message already
1832 * queued (maintain in-order processing)
1833 */
1834 if (wq->q_first != NULL) {
1835 tl_proc = tl_putq_ser;
1836 } else {
1837 tl_proc = tl_wput_ser;
1838 }
1839 break;
1840 }
1841 break;
1842
1843 case M_PCPROTO:
1844 /*
1845 * Check that the message has enough data to figure out TPI
1846 * primitive.
1847 */
1848 if (msz < sizeof (prim->type)) {
1849 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1850 SL_TRACE | SL_ERROR,
1851 "tl_wput:M_PCROTO data too short"));
1852 tl_merror(wq, mp, EPROTO);
1853 return (0);
1854 }
1855 switch (prim->type) {
1856 case T_CAPABILITY_REQ:
1857 tl_capability_req(mp, tep);
1858 return (0);
1859 case T_INFO_REQ:
1860 tl_proc = tl_info_req_ser;
1861 break;
1862 case T_ADDR_REQ:
1863 tl_proc = tl_addr_req_ser;
1864 break;
1865
1866 default:
1867 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1868 SL_TRACE | SL_ERROR,
1869 "tl_wput:unknown TPI msg primitive"));
1870 tl_merror(wq, mp, EPROTO);
1871 return (0);
1872 }
1873 break;
1874 default:
1875 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
1876 "tl_wput:default:unexpected Streams message"));
1877 freemsg(mp);
1878 return (0);
1879 }
1880
1881 /*
1882 * Continue processing via serializer.
1883 */
1884 ASSERT(tl_proc != NULL);
1885 tl_refhold(tep);
1886 tl_serializer_enter(tep, tl_proc, mp);
1887 return (0);
1888 }
1889
1890 /*
1891 * Place message on the queue while preserving order.
1892 */
1893 static void
1894 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1895 {
1896 if (tep->te_closing) {
1897 tl_wput_ser(mp, tep);
1898 } else {
1899 TL_PUTQ(tep, mp);
1900 tl_serializer_exit(tep);
1901 tl_refrele(tep);
1902 }
1903
1904 }
1905
1906 static void
1907 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1908 {
1909 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1910
1911 switch (DB_TYPE(mp)) {
1912 case M_DATA:
1913 tl_data(mp, tep);
1914 break;
1915 case M_PROTO:
1916 tl_do_proto(mp, tep);
1917 break;
1918 default:
1919 freemsg(mp);
1920 break;
1921 }
1922 }
1923
1924 /*
1925 * Write side put procedure called from serializer.
1926 */
1927 static void
1928 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1929 {
1930 tl_wput_common_ser(mp, tep);
1931 tl_serializer_exit(tep);
1932 tl_refrele(tep);
1933 }
1934
1935 /*
1936 * M_DATA processing. Called from serializer.
1937 */
1938 static void
1939 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1940 {
1941 tl_endpt_t *peer_tep = tep->te_conp;
1942 queue_t *peer_rq;
1943
1944 ASSERT(DB_TYPE(mp) == M_DATA);
1945 ASSERT(IS_COTS(tep));
1946
1947 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1948
1949 /*
1950 * fastpath for data. Ignore flow control if tep is closing.
1951 */
1952 if ((peer_tep != NULL) &&
1953 !peer_tep->te_closing &&
1954 ((tep->te_state == TS_DATA_XFER) ||
1955 (tep->te_state == TS_WREQ_ORDREL)) &&
1956 (tep->te_wq != NULL) &&
1957 (tep->te_wq->q_first == NULL) &&
1958 ((peer_tep->te_state == TS_DATA_XFER) ||
1959 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1960 ((peer_rq = peer_tep->te_rq) != NULL) &&
1961 (canputnext(peer_rq) || tep->te_closing)) {
1962 putnext(peer_rq, mp);
1963 } else if (tep->te_closing) {
1964 /*
1965 * It is possible that by the time we got here tep started to
1966 * close. If the write queue is not empty, and the state is
1967 * TS_DATA_XFER the data should be delivered in order, so we
1968 * call putq() instead of freeing the data.
1969 */
1970 if ((tep->te_wq != NULL) &&
1971 ((tep->te_state == TS_DATA_XFER) ||
1972 (tep->te_state == TS_WREQ_ORDREL))) {
1973 TL_PUTQ(tep, mp);
1974 } else {
1975 freemsg(mp);
1976 }
1977 } else {
1978 TL_PUTQ(tep, mp);
1979 }
1980
1981 tl_serializer_exit(tep);
1982 tl_refrele(tep);
1983 }
1984
1985 /*
1986 * Write side service routine.
1987 *
1988 * All actual processing happens within serializer which is entered
1989 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1990 * messages that need processing may have arrived, so tl_wsrv repeats until
1991 * queue is empty or te_nowsrv is set.
1992 */
1993 static int
1994 tl_wsrv(queue_t *wq)
1995 {
1996 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1997
1998 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1999 mutex_enter(&tep->te_srv_lock);
2000 ASSERT(tep->te_wsrv_active == B_FALSE);
2001 tep->te_wsrv_active = B_TRUE;
2002 mutex_exit(&tep->te_srv_lock);
2003
2004 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2005
2006 /*
2007 * Wait for serializer job to complete.
2008 */
2009 mutex_enter(&tep->te_srv_lock);
2010 while (tep->te_wsrv_active) {
2011 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2012 }
2013 cv_signal(&tep->te_srv_cv);
2014 mutex_exit(&tep->te_srv_lock);
2015 }
2016 return (0);
2017 }
2018
2019 /*
2020 * Serialized write side processing of the STREAMS queue.
2021 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2022 * is NULL.
2023 */
2024 static void
2025 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2026 {
2027 mblk_t *mp;
2028 queue_t *wq = tep->te_wq;
2029
2030 ASSERT(wq != NULL);
2031 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2032 tl_wput_common_ser(mp, tep);
2033 }
2034
2035 /*
2036 * Wakeup service routine unless called from close.
2037 * If ser_mp is specified, the caller is tl_wsrv().
2038 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2039 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2040 * be no matching tl_serializer_exit() in this case.
2041 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2042 * waiting on te_srv_cv.
2043 */
2044 if (ser_mp != NULL) {
2045 /*
2046 * We are called from tl_wsrv.
2047 */
2048 mutex_enter(&tep->te_srv_lock);
2049 ASSERT(tep->te_wsrv_active);
2050 tep->te_wsrv_active = B_FALSE;
2051 cv_signal(&tep->te_srv_cv);
2052 mutex_exit(&tep->te_srv_lock);
2053 tl_serializer_exit(tep);
2054 }
2055 }
2056
2057 /*
2058 * Called when the stream is backenabled. Enter serializer and qenable everyone
2059 * flow controlled by tep.
2060 *
2061 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2062 * is possible that two instances of tl_rsrv will be running reusing the same
2063 * rsrv mblk.
2064 */
2065 static int
2066 tl_rsrv(queue_t *rq)
2067 {
2068 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2069
2070 ASSERT(rq->q_first == NULL);
2071 ASSERT(tep->te_rsrv_active == 0);
2072
2073 tep->te_rsrv_active = B_TRUE;
2074 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2075 /*
2076 * Wait for serializer job to complete.
2077 */
2078 mutex_enter(&tep->te_srv_lock);
2079 while (tep->te_rsrv_active) {
2080 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2081 }
2082 cv_signal(&tep->te_srv_cv);
2083 mutex_exit(&tep->te_srv_lock);
2084 return (0);
2085 }
2086
2087 /* ARGSUSED */
2088 static void
2089 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2090 {
2091 tl_endpt_t *peer_tep;
2092
2093 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2094 tl_cl_backenable(tep);
2095 } else if (
2096 IS_COTS(tep) &&
2097 ((peer_tep = tep->te_conp) != NULL) &&
2098 !peer_tep->te_closing &&
2099 ((tep->te_state == TS_DATA_XFER) ||
2100 (tep->te_state == TS_WIND_ORDREL)||
2101 (tep->te_state == TS_WREQ_ORDREL))) {
2102 TL_QENABLE(peer_tep);
2103 }
2104
2105 /*
2106 * Wakeup read side service routine.
2107 */
2108 mutex_enter(&tep->te_srv_lock);
2109 ASSERT(tep->te_rsrv_active);
2110 tep->te_rsrv_active = B_FALSE;
2111 cv_signal(&tep->te_srv_cv);
2112 mutex_exit(&tep->te_srv_lock);
2113 tl_serializer_exit(tep);
2114 }
2115
2116 /*
2117 * process M_PROTO messages. Always called from serializer.
2118 */
2119 static void
2120 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2121 {
2122 ssize_t msz = MBLKL(mp);
2123 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2124
2125 /* Message size was validated by tl_wput(). */
2126 ASSERT(msz >= sizeof (prim->type));
2127
2128 switch (prim->type) {
2129 case T_UNBIND_REQ:
2130 tl_unbind(mp, tep);
2131 break;
2132
2133 case T_ADDR_REQ:
2134 tl_addr_req(mp, tep);
2135 break;
2136
2137 case O_T_CONN_RES:
2138 case T_CONN_RES:
2139 if (IS_CLTS(tep)) {
2140 tl_merror(tep->te_wq, mp, EPROTO);
2141 break;
2142 }
2143 tl_conn_res(mp, tep);
2144 break;
2145
2146 case T_DISCON_REQ:
2147 if (IS_CLTS(tep)) {
2148 tl_merror(tep->te_wq, mp, EPROTO);
2149 break;
2150 }
2151 tl_discon_req(mp, tep);
2152 break;
2153
2154 case T_DATA_REQ:
2155 if (IS_CLTS(tep)) {
2156 tl_merror(tep->te_wq, mp, EPROTO);
2157 break;
2158 }
2159 tl_data(mp, tep);
2160 break;
2161
2162 case T_OPTDATA_REQ:
2163 if (IS_CLTS(tep)) {
2164 tl_merror(tep->te_wq, mp, EPROTO);
2165 break;
2166 }
2167 tl_data(mp, tep);
2168 break;
2169
2170 case T_EXDATA_REQ:
2171 if (IS_CLTS(tep)) {
2172 tl_merror(tep->te_wq, mp, EPROTO);
2173 break;
2174 }
2175 tl_exdata(mp, tep);
2176 break;
2177
2178 case T_ORDREL_REQ:
2179 if (!IS_COTSORD(tep)) {
2180 tl_merror(tep->te_wq, mp, EPROTO);
2181 break;
2182 }
2183 tl_ordrel(mp, tep);
2184 break;
2185
2186 case T_UNITDATA_REQ:
2187 if (IS_COTS(tep)) {
2188 tl_merror(tep->te_wq, mp, EPROTO);
2189 break;
2190 }
2191 tl_unitdata(mp, tep);
2192 break;
2193
2194 default:
2195 tl_merror(tep->te_wq, mp, EPROTO);
2196 break;
2197 }
2198 }
2199
2200 /*
2201 * Process ioctl from serializer.
2202 * This is a wrapper around tl_do_ioctl().
2203 */
2204 static void
2205 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2206 {
2207 if (!tep->te_closing)
2208 tl_do_ioctl(mp, tep);
2209 else
2210 freemsg(mp);
2211
2212 tl_serializer_exit(tep);
2213 tl_refrele(tep);
2214 }
2215
2216 static void
2217 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2218 {
2219 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2220 int cmd = iocbp->ioc_cmd;
2221 queue_t *wq = tep->te_wq;
2222 int error;
2223 int thisopt, otheropt;
2224
2225 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2226
2227 switch (cmd) {
2228 case TL_IOC_CREDOPT:
2229 if (cmd == TL_IOC_CREDOPT) {
2230 thisopt = TL_SETCRED;
2231 otheropt = TL_SETUCRED;
2232 } else {
2233 /* FALLTHROUGH */
2234 case TL_IOC_UCREDOPT:
2235 thisopt = TL_SETUCRED;
2236 otheropt = TL_SETCRED;
2237 }
2238 /*
2239 * The credentials passing does not apply to sockets.
2240 * Only one of the cred options can be set at a given time.
2241 */
2242 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2243 miocnak(wq, mp, 0, EINVAL);
2244 return;
2245 }
2246
2247 /*
2248 * Turn on generation of credential options for
2249 * T_conn_req, T_conn_con, T_unidata_ind.
2250 */
2251 error = miocpullup(mp, sizeof (uint32_t));
2252 if (error != 0) {
2253 miocnak(wq, mp, 0, error);
2254 return;
2255 }
2256 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2257 miocnak(wq, mp, 0, EINVAL);
2258 return;
2259 }
2260
2261 if (*(uint32_t *)mp->b_cont->b_rptr)
2262 tep->te_flag |= thisopt;
2263 else
2264 tep->te_flag &= ~thisopt;
2265
2266 miocack(wq, mp, 0, 0);
2267 break;
2268
2269 default:
2270 /* Should not be here */
2271 miocnak(wq, mp, 0, EINVAL);
2272 break;
2273 }
2274 }
2275
2276
2277 /*
2278 * send T_ERROR_ACK
2279 * Note: assumes enough memory or caller passed big enough mp
2280 * - no recovery from allocb failures
2281 */
2282
2283 static void
2284 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2285 t_scalar_t unix_err, t_scalar_t type)
2286 {
2287 struct T_error_ack *err_ack;
2288 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2289 M_PCPROTO, T_ERROR_ACK);
2290
2291 if (ackmp == NULL) {
2292 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE | SL_ERROR,
2293 "tl_error_ack:out of mblk memory"));
2294 tl_merror(wq, NULL, ENOSR);
2295 return;
2296 }
2297 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2298 err_ack->ERROR_prim = type;
2299 err_ack->TLI_error = tli_err;
2300 err_ack->UNIX_error = unix_err;
2301
2302 /*
2303 * send error ack message
2304 */
2305 qreply(wq, ackmp);
2306 }
2307
2308
2309
2310 /*
2311 * send T_OK_ACK
2312 * Note: assumes enough memory or caller passed big enough mp
2313 * - no recovery from allocb failures
2314 */
2315 static void
2316 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2317 {
2318 struct T_ok_ack *ok_ack;
2319 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2320 M_PCPROTO, T_OK_ACK);
2321
2322 if (ackmp == NULL) {
2323 tl_merror(wq, NULL, ENOMEM);
2324 return;
2325 }
2326
2327 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2328 ok_ack->CORRECT_prim = type;
2329
2330 (void) qreply(wq, ackmp);
2331 }
2332
2333 /*
2334 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2335 * This is a wrapper around tl_bind().
2336 */
2337 static void
2338 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2339 {
2340 if (!tep->te_closing)
2341 tl_bind(mp, tep);
2342 else
2343 freemsg(mp);
2344
2345 tl_serializer_exit(tep);
2346 tl_refrele(tep);
2347 }
2348
2349 /*
2350 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2351 * Assumes that the endpoint is in the unbound.
2352 */
2353 static void
2354 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2355 {
2356 queue_t *wq = tep->te_wq;
2357 struct T_bind_ack *b_ack;
2358 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2359 mblk_t *ackmp, *bamp;
2360 soux_addr_t ux_addr;
2361 t_uscalar_t qlen = 0;
2362 t_scalar_t alen, aoff;
2363 tl_addr_t addr_req;
2364 void *addr_startp;
2365 ssize_t msz = MBLKL(mp), basize;
2366 t_scalar_t tli_err = 0, unix_err = 0;
2367 t_scalar_t save_prim_type = bind->PRIM_type;
2368 t_scalar_t save_state = tep->te_state;
2369
2370 if (tep->te_state != TS_UNBND) {
2371 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2372 SL_TRACE | SL_ERROR,
2373 "tl_wput:bind_request:out of state, state=%d",
2374 tep->te_state));
2375 tli_err = TOUTSTATE;
2376 goto error;
2377 }
2378
2379 if (msz < sizeof (struct T_bind_req)) {
2380 tli_err = TSYSERR;
2381 unix_err = EINVAL;
2382 goto error;
2383 }
2384
2385 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2386
2387 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2388 (bind->PRIM_type == T_BIND_REQ));
2389
2390 alen = bind->ADDR_length;
2391 aoff = bind->ADDR_offset;
2392
2393 /* negotiate max conn req pending */
2394 if (IS_COTS(tep)) {
2395 qlen = bind->CONIND_number;
2396 if (qlen > tl_maxqlen)
2397 qlen = tl_maxqlen;
2398 }
2399
2400 /*
2401 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2402 * and bound again.
2403 */
2404 if ((tep->te_hash_hndl == NULL) &&
2405 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2406 mod_hash_reserve_nosleep(tep->te_addrhash,
2407 &tep->te_hash_hndl) != 0) {
2408 tli_err = TSYSERR;
2409 unix_err = ENOSR;
2410 goto error;
2411 }
2412
2413 /*
2414 * Verify address correctness.
2415 */
2416 if (IS_SOCKET(tep)) {
2417 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2418
2419 if ((alen != TL_SOUX_ADDRLEN) ||
2420 (aoff < 0) ||
2421 (aoff + alen > msz)) {
2422 (void) (STRLOG(TL_ID, tep->te_minor,
2423 1, SL_TRACE | SL_ERROR,
2424 "tl_bind: invalid socket addr"));
2425 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2426 tli_err = TSYSERR;
2427 unix_err = EINVAL;
2428 goto error;
2429 }
2430 /* Copy address from message to local buffer. */
2431 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2432 /*
2433 * Check that we got correct address from sockets
2434 */
2435 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2436 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2437 (void) (STRLOG(TL_ID, tep->te_minor,
2438 1, SL_TRACE | SL_ERROR,
2439 "tl_bind: invalid socket magic"));
2440 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2441 tli_err = TSYSERR;
2442 unix_err = EINVAL;
2443 goto error;
2444 }
2445 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2446 (ux_addr.soua_vp != NULL)) {
2447 (void) (STRLOG(TL_ID, tep->te_minor,
2448 1, SL_TRACE | SL_ERROR,
2449 "tl_bind: implicit addr non-empty"));
2450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 tli_err = TSYSERR;
2452 unix_err = EINVAL;
2453 goto error;
2454 }
2455 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2456 (ux_addr.soua_vp == NULL)) {
2457 (void) (STRLOG(TL_ID, tep->te_minor,
2458 1, SL_TRACE | SL_ERROR,
2459 "tl_bind: explicit addr empty"));
2460 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2461 tli_err = TSYSERR;
2462 unix_err = EINVAL;
2463 goto error;
2464 }
2465 } else {
2466 if ((alen > 0) && ((aoff < 0) ||
2467 ((ssize_t)(aoff + alen) > msz) ||
2468 ((aoff + alen) < 0))) {
2469 (void) (STRLOG(TL_ID, tep->te_minor,
2470 1, SL_TRACE | SL_ERROR,
2471 "tl_bind: invalid message"));
2472 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2473 tli_err = TSYSERR;
2474 unix_err = EINVAL;
2475 goto error;
2476 }
2477 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2478 (void) (STRLOG(TL_ID, tep->te_minor,
2479 1, SL_TRACE | SL_ERROR,
2480 "tl_bind: bad addr in message"));
2481 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2482 tli_err = TBADADDR;
2483 goto error;
2484 }
2485 #ifdef DEBUG
2486 /*
2487 * Mild form of ASSERT()ion to detect broken TPI apps.
2488 * if (!assertion)
2489 * log warning;
2490 */
2491 if (!((alen == 0 && aoff == 0) ||
2492 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2493 (void) (STRLOG(TL_ID, tep->te_minor,
2494 3, SL_TRACE | SL_ERROR,
2495 "tl_bind: addr overlaps TPI message"));
2496 }
2497 #endif
2498 }
2499
2500 /*
2501 * Bind the address provided or allocate one if requested.
2502 * Allow rebinds with a new qlen value.
2503 */
2504 if (IS_SOCKET(tep)) {
2505 /*
2506 * For anonymous requests the te_ap is already set up properly
2507 * so use minor number as an address.
2508 * For explicit requests need to check whether the address is
2509 * already in use.
2510 */
2511 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2512 int rc;
2513
2514 if (tep->te_flag & TL_ADDRHASHED) {
2515 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2516 if (tep->te_vp == ux_addr.soua_vp)
2517 goto skip_addr_bind;
2518 else /* Rebind to a new address. */
2519 tl_addr_unbind(tep);
2520 }
2521 /*
2522 * Insert address in the hash if it is not already
2523 * there. Since we use preallocated handle, the insert
2524 * can fail only if the key is already present.
2525 */
2526 rc = mod_hash_insert_reserve(tep->te_addrhash,
2527 (mod_hash_key_t)ux_addr.soua_vp,
2528 (mod_hash_val_t)tep, tep->te_hash_hndl);
2529
2530 if (rc != 0) {
2531 ASSERT(rc == MH_ERR_DUPLICATE);
2532 /*
2533 * Violate O_T_BIND_REQ semantics and fail with
2534 * TADDRBUSY - sockets will not use any address
2535 * other than supplied one for explicit binds.
2536 */
2537 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2538 SL_TRACE | SL_ERROR,
2539 "tl_bind:requested addr %p is busy",
2540 ux_addr.soua_vp));
2541 tli_err = TADDRBUSY;
2542 unix_err = 0;
2543 goto error;
2544 }
2545 tep->te_uxaddr = ux_addr;
2546 tep->te_flag |= TL_ADDRHASHED;
2547 tep->te_hash_hndl = NULL;
2548 }
2549 } else if (alen == 0) {
2550 /*
2551 * assign any free address
2552 */
2553 if (!tl_get_any_addr(tep, NULL)) {
2554 (void) (STRLOG(TL_ID, tep->te_minor,
2555 1, SL_TRACE | SL_ERROR,
2556 "tl_bind:failed to get buffer for any "
2557 "address"));
2558 tli_err = TSYSERR;
2559 unix_err = ENOSR;
2560 goto error;
2561 }
2562 } else {
2563 addr_req.ta_alen = alen;
2564 addr_req.ta_abuf = (mp->b_rptr + aoff);
2565 addr_req.ta_zoneid = tep->te_zoneid;
2566
2567 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2568 if (tep->te_abuf == NULL) {
2569 tli_err = TSYSERR;
2570 unix_err = ENOSR;
2571 goto error;
2572 }
2573 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2574 tep->te_alen = alen;
2575
2576 if (mod_hash_insert_reserve(tep->te_addrhash,
2577 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2578 tep->te_hash_hndl) != 0) {
2579 if (save_prim_type == T_BIND_REQ) {
2580 /*
2581 * The bind semantics for this primitive
2582 * require a failure if the exact address
2583 * requested is busy
2584 */
2585 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2586 SL_TRACE | SL_ERROR,
2587 "tl_bind:requested addr is busy"));
2588 tli_err = TADDRBUSY;
2589 unix_err = 0;
2590 goto error;
2591 }
2592
2593 /*
2594 * O_T_BIND_REQ semantics say if address if requested
2595 * address is busy, bind to any available free address
2596 */
2597 if (!tl_get_any_addr(tep, &addr_req)) {
2598 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2599 SL_TRACE | SL_ERROR,
2600 "tl_bind:unable to get any addr buf"));
2601 tli_err = TSYSERR;
2602 unix_err = ENOMEM;
2603 goto error;
2604 }
2605 } else {
2606 tep->te_flag |= TL_ADDRHASHED;
2607 tep->te_hash_hndl = NULL;
2608 }
2609 }
2610
2611 ASSERT(tep->te_alen >= 0);
2612
2613 skip_addr_bind:
2614 /*
2615 * prepare T_BIND_ACK TPI message
2616 */
2617 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2618 bamp = reallocb(mp, basize, 0);
2619 if (bamp == NULL) {
2620 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2621 "tl_wput:tl_bind: allocb failed"));
2622 /*
2623 * roll back state changes
2624 */
2625 tl_addr_unbind(tep);
2626 tep->te_state = TS_UNBND;
2627 tl_memrecover(wq, mp, basize);
2628 return;
2629 }
2630
2631 DB_TYPE(bamp) = M_PCPROTO;
2632 bamp->b_wptr = bamp->b_rptr + basize;
2633 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2634 b_ack->PRIM_type = T_BIND_ACK;
2635 b_ack->CONIND_number = qlen;
2636 b_ack->ADDR_length = tep->te_alen;
2637 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2638 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2639 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2640
2641 if (IS_COTS(tep)) {
2642 tep->te_qlen = qlen;
2643 if (qlen > 0)
2644 tep->te_flag |= TL_LISTENER;
2645 }
2646
2647 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2648 /*
2649 * send T_BIND_ACK message
2650 */
2651 (void) qreply(wq, bamp);
2652 return;
2653
2654 error:
2655 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2656 if (ackmp == NULL) {
2657 /*
2658 * roll back state changes
2659 */
2660 tep->te_state = save_state;
2661 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2662 return;
2663 }
2664 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2665 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2666 }
2667
2668 /*
2669 * Process T_UNBIND_REQ.
2670 * Called from serializer.
2671 */
2672 static void
2673 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2674 {
2675 queue_t *wq;
2676 mblk_t *ackmp;
2677
2678 if (tep->te_closing) {
2679 freemsg(mp);
2680 return;
2681 }
2682
2683 wq = tep->te_wq;
2684
2685 /*
2686 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2687 * ==> allocate for T_ERROR_ACK (known max)
2688 */
2689 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2690 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2691 return;
2692 }
2693 /*
2694 * memory resources committed
2695 * Note: no message validation. T_UNBIND_REQ message is
2696 * same size as PRIM_type field so already verified earlier.
2697 */
2698
2699 /*
2700 * validate state
2701 */
2702 if (tep->te_state != TS_IDLE) {
2703 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2704 SL_TRACE | SL_ERROR,
2705 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2706 tep->te_state));
2707 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2708 return;
2709 }
2710 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2711
2712 /*
2713 * TPI says on T_UNBIND_REQ:
2714 * send up a M_FLUSH to flush both
2715 * read and write queues
2716 */
2717 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2718
2719 if (!IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2720 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2721
2722 /*
2723 * Sockets use bind with qlen==0 followed by bind() to
2724 * the same address with qlen > 0 for listeners.
2725 * We allow rebind with a new qlen value.
2726 */
2727 tl_addr_unbind(tep);
2728 }
2729
2730 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2731 /*
2732 * send T_OK_ACK
2733 */
2734 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2735 }
2736
2737
2738 /*
2739 * Option management code from drv/ip is used here
2740 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2741 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2742 * However, that is what we want as that option is 'unorthodox'
2743 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2744 * and not in T_SVR4_OPTMGMT_REQ/ACK
2745 * Note2: use of optcom_req means this routine is an exception to
2746 * recovery from allocb() failures.
2747 */
2748
2749 static void
2750 tl_optmgmt(queue_t *wq, mblk_t *mp)
2751 {
2752 tl_endpt_t *tep;
2753 mblk_t *ackmp;
2754 union T_primitives *prim;
2755 cred_t *cr;
2756
2757 tep = (tl_endpt_t *)wq->q_ptr;
2758 prim = (union T_primitives *)mp->b_rptr;
2759
2760 /*
2761 * All Solaris components should pass a db_credp
2762 * for this TPI message, hence we ASSERT.
2763 * But in case there is some other M_PROTO that looks
2764 * like a TPI message sent by some other kernel
2765 * component, we check and return an error.
2766 */
2767 cr = msg_getcred(mp, NULL);
2768 ASSERT(cr != NULL);
2769 if (cr == NULL) {
2770 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2771 return;
2772 }
2773
2774 /* all states OK for AF_UNIX options ? */
2775 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2776 prim->type == T_SVR4_OPTMGMT_REQ) {
2777 /*
2778 * Broken TLI semantics that options can only be managed
2779 * in TS_IDLE state. Needed for Sparc ABI test suite that
2780 * tests this TLI (mis)feature using this device driver.
2781 */
2782 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2783 SL_TRACE | SL_ERROR,
2784 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2785 tep->te_state));
2786 /*
2787 * preallocate memory for T_ERROR_ACK
2788 */
2789 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2790 if (ackmp == NULL) {
2791 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2792 return;
2793 }
2794
2795 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2796 freemsg(mp);
2797 return;
2798 }
2799
2800 /*
2801 * call common option management routine from drv/ip
2802 */
2803 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2804 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2805 } else {
2806 ASSERT(prim->type == T_OPTMGMT_REQ);
2807 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2808 }
2809 }
2810
2811 /*
2812 * Handle T_conn_req - the driver part of accept().
2813 * If TL_SET[U]CRED generate the credentials options.
2814 * If this is a socket pass through options unmodified.
2815 * For sockets generate the T_CONN_CON here instead of
2816 * waiting for the T_CONN_RES.
2817 */
2818 static void
2819 tl_conn_req(queue_t *wq, mblk_t *mp)
2820 {
2821 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2822 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2823 ssize_t msz = MBLKL(mp);
2824 t_scalar_t alen, aoff, olen, ooff, err = 0;
2825 tl_endpt_t *peer_tep = NULL;
2826 mblk_t *ackmp;
2827 mblk_t *dimp;
2828 struct T_discon_ind *di;
2829 soux_addr_t ux_addr;
2830 tl_addr_t dst;
2831
2832 ASSERT(IS_COTS(tep));
2833
2834 if (tep->te_closing) {
2835 freemsg(mp);
2836 return;
2837 }
2838
2839 /*
2840 * preallocate memory for:
2841 * 1. max of T_ERROR_ACK and T_OK_ACK
2842 * ==> known max T_ERROR_ACK
2843 * 2. max of T_DISCON_IND and T_CONN_IND
2844 */
2845 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2846 if (ackmp == NULL) {
2847 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2848 return;
2849 }
2850 /*
2851 * memory committed for T_OK_ACK/T_ERROR_ACK now
2852 * will be committed for T_DISCON_IND/T_CONN_IND later
2853 */
2854
2855 if (tep->te_state != TS_IDLE) {
2856 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2857 SL_TRACE | SL_ERROR,
2858 "tl_wput:T_CONN_REQ:out of state, state=%d",
2859 tep->te_state));
2860 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2861 freemsg(mp);
2862 return;
2863 }
2864
2865 /*
2866 * validate the message
2867 * Note: dereference fields in struct inside message only
2868 * after validating the message length.
2869 */
2870 if (msz < sizeof (struct T_conn_req)) {
2871 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2872 "tl_conn_req:invalid message length"));
2873 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2874 freemsg(mp);
2875 return;
2876 }
2877 alen = creq->DEST_length;
2878 aoff = creq->DEST_offset;
2879 olen = creq->OPT_length;
2880 ooff = creq->OPT_offset;
2881 if (olen == 0)
2882 ooff = 0;
2883
2884 if (IS_SOCKET(tep)) {
2885 if ((alen != TL_SOUX_ADDRLEN) ||
2886 (aoff < 0) ||
2887 (aoff + alen > msz) ||
2888 (alen > msz - sizeof (struct T_conn_req))) {
2889 (void) (STRLOG(TL_ID, tep->te_minor,
2890 1, SL_TRACE | SL_ERROR,
2891 "tl_conn_req: invalid socket addr"));
2892 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2893 freemsg(mp);
2894 return;
2895 }
2896 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2897 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2898 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2899 (void) (STRLOG(TL_ID, tep->te_minor,
2900 1, SL_TRACE | SL_ERROR,
2901 "tl_conn_req: invalid socket magic"));
2902 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2903 freemsg(mp);
2904 return;
2905 }
2906 } else {
2907 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2908 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2909 ooff + olen < 0)) ||
2910 olen < 0 || ooff < 0) {
2911 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2912 SL_TRACE | SL_ERROR,
2913 "tl_conn_req:invalid message"));
2914 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2915 freemsg(mp);
2916 return;
2917 }
2918
2919 if (alen <= 0 || aoff < 0 ||
2920 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2921 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2922 SL_TRACE | SL_ERROR,
2923 "tl_conn_req:bad addr in message, "
2924 "alen=%d, msz=%ld",
2925 alen, msz));
2926 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2927 freemsg(mp);
2928 return;
2929 }
2930 #ifdef DEBUG
2931 /*
2932 * Mild form of ASSERT()ion to detect broken TPI apps.
2933 * if (!assertion)
2934 * log warning;
2935 */
2936 if (!(aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2937 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2938 SL_TRACE | SL_ERROR,
2939 "tl_conn_req: addr overlaps TPI message"));
2940 }
2941 #endif
2942 if (olen) {
2943 /*
2944 * no opts in connect req
2945 * supported in this provider except for sockets.
2946 */
2947 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2948 SL_TRACE | SL_ERROR,
2949 "tl_conn_req:options not supported "
2950 "in message"));
2951 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2952 freemsg(mp);
2953 return;
2954 }
2955 }
2956
2957 /*
2958 * Prevent tep from closing on us.
2959 */
2960 if (!tl_noclose(tep)) {
2961 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2962 "tl_conn_req:endpoint is closing"));
2963 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2964 freemsg(mp);
2965 return;
2966 }
2967
2968 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2969 /*
2970 * get endpoint to connect to
2971 * check that peer with DEST addr is bound to addr
2972 * and has CONIND_number > 0
2973 */
2974 dst.ta_alen = alen;
2975 dst.ta_abuf = mp->b_rptr + aoff;
2976 dst.ta_zoneid = tep->te_zoneid;
2977
2978 /*
2979 * Verify if remote addr is in use
2980 */
2981 peer_tep = (IS_SOCKET(tep) ?
2982 tl_sock_find_peer(tep, &ux_addr) :
2983 tl_find_peer(tep, &dst));
2984
2985 if (peer_tep == NULL) {
2986 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
2987 "tl_conn_req:no one at connect address"));
2988 err = ECONNREFUSED;
2989 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2990 /*
2991 * validate that number of incoming connection is
2992 * not to capacity on destination endpoint
2993 */
2994 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2995 "tl_conn_req: qlen overflow connection refused"));
2996 err = ECONNREFUSED;
2997 }
2998
2999 /*
3000 * Send T_DISCON_IND in case of error
3001 */
3002 if (err != 0) {
3003 if (peer_tep != NULL)
3004 tl_refrele(peer_tep);
3005 /* We are still expected to send T_OK_ACK */
3006 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3007 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
3008 tl_closeok(tep);
3009 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
3010 M_PROTO, T_DISCON_IND);
3011 if (dimp == NULL) {
3012 tl_merror(wq, NULL, ENOSR);
3013 return;
3014 }
3015 di = (struct T_discon_ind *)dimp->b_rptr;
3016 di->DISCON_reason = err;
3017 di->SEQ_number = BADSEQNUM;
3018
3019 tep->te_state = TS_IDLE;
3020 /*
3021 * send T_DISCON_IND message
3022 */
3023 putnext(tep->te_rq, dimp);
3024 return;
3025 }
3026
3027 ASSERT(IS_COTS(peer_tep));
3028
3029 /*
3030 * Found the listener. At this point processing will continue on
3031 * listener serializer. Close of the endpoint should be blocked while we
3032 * switch serializers.
3033 */
3034 tl_serializer_refhold(peer_tep->te_ser);
3035 tl_serializer_refrele(tep->te_ser);
3036 tep->te_ser = peer_tep->te_ser;
3037 ASSERT(tep->te_oconp == NULL);
3038 tep->te_oconp = peer_tep;
3039
3040 /*
3041 * It is safe to close now. Close may continue on listener serializer.
3042 */
3043 tl_closeok(tep);
3044
3045 /*
3046 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3047 * data, so we link mp to ackmp.
3048 */
3049 ackmp->b_cont = mp;
3050 mp = ackmp;
3051
3052 tl_refhold(tep);
3053 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3054 }
3055
3056 /*
3057 * Finish T_CONN_REQ processing on listener serializer.
3058 */
3059 static void
3060 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3061 {
3062 queue_t *wq;
3063 tl_endpt_t *peer_tep = tep->te_oconp;
3064 mblk_t *confmp, *cimp, *indmp;
3065 void *opts = NULL;
3066 mblk_t *ackmp = mp;
3067 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3068 struct T_conn_ind *ci;
3069 tl_icon_t *tip;
3070 void *addr_startp;
3071 t_scalar_t olen = creq->OPT_length;
3072 t_scalar_t ooff = creq->OPT_offset;
3073 size_t ci_msz;
3074 size_t size;
3075 cred_t *cr = NULL;
3076 pid_t cpid;
3077
3078 if (tep->te_closing) {
3079 TL_UNCONNECT(tep->te_oconp);
3080 tl_serializer_exit(tep);
3081 tl_refrele(tep);
3082 freemsg(mp);
3083 return;
3084 }
3085
3086 wq = tep->te_wq;
3087 tep->te_flag |= TL_EAGER;
3088
3089 /*
3090 * Extract preallocated ackmp from mp.
3091 */
3092 mp = mp->b_cont;
3093 ackmp->b_cont = NULL;
3094
3095 if (olen == 0)
3096 ooff = 0;
3097
3098 if (peer_tep->te_closing ||
3099 !((peer_tep->te_state == TS_IDLE) ||
3100 (peer_tep->te_state == TS_WRES_CIND))) {
3101 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3102 "tl_conn_req:peer in bad state (%d)",
3103 peer_tep->te_state));
3104 TL_UNCONNECT(tep->te_oconp);
3105 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3106 freemsg(ackmp);
3107 tl_serializer_exit(tep);
3108 tl_refrele(tep);
3109 return;
3110 }
3111
3112 /*
3113 * preallocate now for T_DISCON_IND or T_CONN_IND
3114 */
3115 /*
3116 * calculate length of T_CONN_IND message
3117 */
3118 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3119 cr = msg_getcred(mp, &cpid);
3120 ASSERT(cr != NULL);
3121 if (peer_tep->te_flag & TL_SETCRED) {
3122 ooff = 0;
3123 olen = (t_scalar_t) sizeof (struct opthdr) +
3124 OPTLEN(sizeof (tl_credopt_t));
3125 /* 1 option only */
3126 } else {
3127 ooff = 0;
3128 olen = (t_scalar_t)sizeof (struct opthdr) +
3129 OPTLEN(ucredminsize(cr));
3130 /* 1 option only */
3131 }
3132 }
3133 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3134 ci_msz = T_ALIGN(ci_msz) + olen;
3135 size = max(ci_msz, sizeof (struct T_discon_ind));
3136
3137 /*
3138 * Save options from mp - we'll need them for T_CONN_IND.
3139 */
3140 if (ooff != 0) {
3141 opts = kmem_alloc(olen, KM_NOSLEEP);
3142 if (opts == NULL) {
3143 /*
3144 * roll back state changes
3145 */
3146 tep->te_state = TS_IDLE;
3147 tl_memrecover(wq, mp, size);
3148 freemsg(ackmp);
3149 TL_UNCONNECT(tep->te_oconp);
3150 tl_serializer_exit(tep);
3151 tl_refrele(tep);
3152 return;
3153 }
3154 /* Copy options to a temp buffer */
3155 bcopy(mp->b_rptr + ooff, opts, olen);
3156 }
3157
3158 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3159 /*
3160 * Generate a T_CONN_CON that has the identical address
3161 * (and options) as the T_CONN_REQ.
3162 * NOTE: assumes that the T_conn_req and T_conn_con structures
3163 * are isomorphic.
3164 */
3165 confmp = copyb(mp);
3166 if (confmp == NULL) {
3167 /*
3168 * roll back state changes
3169 */
3170 tep->te_state = TS_IDLE;
3171 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3172 freemsg(ackmp);
3173 if (opts != NULL)
3174 kmem_free(opts, olen);
3175 TL_UNCONNECT(tep->te_oconp);
3176 tl_serializer_exit(tep);
3177 tl_refrele(tep);
3178 return;
3179 }
3180 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3181 T_CONN_CON;
3182 } else {
3183 confmp = NULL;
3184 }
3185 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3186 /*
3187 * roll back state changes
3188 */
3189 tep->te_state = TS_IDLE;
3190 tl_memrecover(wq, mp, size);
3191 freemsg(ackmp);
3192 if (opts != NULL)
3193 kmem_free(opts, olen);
3194 freemsg(confmp);
3195 TL_UNCONNECT(tep->te_oconp);
3196 tl_serializer_exit(tep);
3197 tl_refrele(tep);
3198 return;
3199 }
3200
3201 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3202 if (tip == NULL) {
3203 /*
3204 * roll back state changes
3205 */
3206 tep->te_state = TS_IDLE;
3207 tl_memrecover(wq, indmp, sizeof (*tip));
3208 freemsg(ackmp);
3209 if (opts != NULL)
3210 kmem_free(opts, olen);
3211 freemsg(confmp);
3212 TL_UNCONNECT(tep->te_oconp);
3213 tl_serializer_exit(tep);
3214 tl_refrele(tep);
3215 return;
3216 }
3217 tip->ti_mp = NULL;
3218
3219 /*
3220 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3221 * and tl_icon_t cell.
3222 */
3223
3224 /*
3225 * ack validity of request and send the peer credential in the ACK.
3226 */
3227 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3228
3229 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3230 confmp != NULL) {
3231 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3232 }
3233
3234 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3235
3236 /*
3237 * prepare message to send T_CONN_IND
3238 */
3239 /*
3240 * allocate the message - original data blocks retained
3241 * in the returned mblk
3242 */
3243 cimp = tl_resizemp(indmp, size);
3244 if (cimp == NULL) {
3245 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3246 "tl_conn_req:con_ind:allocb failure"));
3247 tl_merror(wq, indmp, ENOMEM);
3248 TL_UNCONNECT(tep->te_oconp);
3249 tl_serializer_exit(tep);
3250 tl_refrele(tep);
3251 if (opts != NULL)
3252 kmem_free(opts, olen);
3253 freemsg(confmp);
3254 ASSERT(tip->ti_mp == NULL);
3255 kmem_free(tip, sizeof (*tip));
3256 return;
3257 }
3258
3259 DB_TYPE(cimp) = M_PROTO;
3260 ci = (struct T_conn_ind *)cimp->b_rptr;
3261 ci->PRIM_type = T_CONN_IND;
3262 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3263 ci->SRC_length = tep->te_alen;
3264 ci->SEQ_number = tep->te_seqno;
3265
3266 addr_startp = cimp->b_rptr + ci->SRC_offset;
3267 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3268 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3269
3270 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3271 ci->SRC_length);
3272 ci->OPT_length = olen; /* because only 1 option */
3273 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3274 cr, cpid,
3275 peer_tep->te_flag, peer_tep->te_credp);
3276 } else if (ooff != 0) {
3277 /* Copy option from T_CONN_REQ */
3278 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3279 ci->SRC_length);
3280 ci->OPT_length = olen;
3281 ASSERT(opts != NULL);
3282 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3283 } else {
3284 ci->OPT_offset = 0;
3285 ci->OPT_length = 0;
3286 }
3287 if (opts != NULL)
3288 kmem_free(opts, olen);
3289
3290 /*
3291 * register connection request with server peer
3292 * append to list of incoming connections
3293 * increment references for both peer_tep and tep: peer_tep is placed on
3294 * te_oconp and tep is placed on listeners queue.
3295 */
3296 tip->ti_tep = tep;
3297 tip->ti_seqno = tep->te_seqno;
3298 list_insert_tail(&peer_tep->te_iconp, tip);
3299 peer_tep->te_nicon++;
3300
3301 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3302 /*
3303 * send the T_CONN_IND message
3304 */
3305 putnext(peer_tep->te_rq, cimp);
3306
3307 /*
3308 * Send a T_CONN_CON message for sockets.
3309 * Disable the queues until we have reached the correct state!
3310 */
3311 if (confmp != NULL) {
3312 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3313 noenable(wq);
3314 putnext(tep->te_rq, confmp);
3315 }
3316 /*
3317 * Now we need to increment tep reference because tep is referenced by
3318 * server list of pending connections. We also need to decrement
3319 * reference before exiting serializer. Two operations void each other
3320 * so we don't modify reference at all.
3321 */
3322 ASSERT(tep->te_refcnt >= 2);
3323 ASSERT(peer_tep->te_refcnt >= 2);
3324 tl_serializer_exit(tep);
3325 }
3326
3327
3328
3329 /*
3330 * Handle T_conn_res on listener stream. Called on listener serializer.
3331 * tl_conn_req has already generated the T_CONN_CON.
3332 * tl_conn_res is called on listener serializer.
3333 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3334 * Switch eager serializer to acceptor's.
3335 *
3336 * If TL_SET[U]CRED generate the credentials options.
3337 * For sockets tl_conn_req has already generated the T_CONN_CON.
3338 */
3339 static void
3340 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3341 {
3342 queue_t *wq;
3343 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3344 ssize_t msz = MBLKL(mp);
3345 t_scalar_t olen, ooff, err = 0;
3346 t_scalar_t prim = cres->PRIM_type;
3347 uchar_t *addr_startp;
3348 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3349 tl_icon_t *tip;
3350 size_t size;
3351 mblk_t *ackmp, *respmp;
3352 mblk_t *dimp, *ccmp = NULL;
3353 struct T_discon_ind *di;
3354 struct T_conn_con *cc;
3355 boolean_t client_noclose_set = B_FALSE;
3356 boolean_t switch_client_serializer = B_TRUE;
3357
3358 ASSERT(IS_COTS(tep));
3359
3360 if (tep->te_closing) {
3361 freemsg(mp);
3362 return;
3363 }
3364
3365 wq = tep->te_wq;
3366
3367 /*
3368 * preallocate memory for:
3369 * 1. max of T_ERROR_ACK and T_OK_ACK
3370 * ==> known max T_ERROR_ACK
3371 * 2. max of T_DISCON_IND and T_CONN_CON
3372 */
3373 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3374 if (ackmp == NULL) {
3375 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3376 return;
3377 }
3378 /*
3379 * memory committed for T_OK_ACK/T_ERROR_ACK now
3380 * will be committed for T_DISCON_IND/T_CONN_CON later
3381 */
3382
3383
3384 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3385
3386 /*
3387 * validate state
3388 */
3389 if (tep->te_state != TS_WRES_CIND) {
3390 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3391 SL_TRACE | SL_ERROR,
3392 "tl_wput:T_CONN_RES:out of state, state=%d",
3393 tep->te_state));
3394 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3395 freemsg(mp);
3396 return;
3397 }
3398
3399 /*
3400 * validate the message
3401 * Note: dereference fields in struct inside message only
3402 * after validating the message length.
3403 */
3404 if (msz < sizeof (struct T_conn_res)) {
3405 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3406 "tl_conn_res:invalid message length"));
3407 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3408 freemsg(mp);
3409 return;
3410 }
3411 olen = cres->OPT_length;
3412 ooff = cres->OPT_offset;
3413 if (((olen > 0) && ((ooff + olen) > msz))) {
3414 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3415 "tl_conn_res:invalid message"));
3416 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3417 freemsg(mp);
3418 return;
3419 }
3420 if (olen) {
3421 /*
3422 * no opts in connect res
3423 * supported in this provider
3424 */
3425 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3426 "tl_conn_res:options not supported in message"));
3427 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3428 freemsg(mp);
3429 return;
3430 }
3431
3432 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3433 ASSERT(tep->te_state == TS_WACK_CRES);
3434
3435 if (cres->SEQ_number < TL_MINOR_START &&
3436 cres->SEQ_number >= BADSEQNUM) {
3437 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3438 "tl_conn_res:remote endpoint sequence number bad"));
3439 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3440 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3441 freemsg(mp);
3442 return;
3443 }
3444
3445 /*
3446 * find accepting endpoint. Will have extra reference if found.
3447 */
3448 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3449 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3450 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3451 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3452 "tl_conn_res:bad accepting endpoint"));
3453 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3454 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3455 freemsg(mp);
3456 return;
3457 }
3458
3459 /*
3460 * Prevent acceptor from closing.
3461 */
3462 if (!tl_noclose(acc_ep)) {
3463 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3464 "tl_conn_res:bad accepting endpoint"));
3465 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3466 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3467 tl_refrele(acc_ep);
3468 freemsg(mp);
3469 return;
3470 }
3471
3472 acc_ep->te_flag |= TL_ACCEPTOR;
3473
3474 /*
3475 * validate that accepting endpoint, if different from listening
3476 * has address bound => state is TS_IDLE
3477 * TROUBLE in XPG4 !!?
3478 */
3479 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3480 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3481 "tl_conn_res:accepting endpoint has no address bound,"
3482 "state=%d", acc_ep->te_state));
3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3484 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3485 freemsg(mp);
3486 tl_closeok(acc_ep);
3487 tl_refrele(acc_ep);
3488 return;
3489 }
3490
3491 /*
3492 * validate if accepting endpt same as listening, then
3493 * no other incoming connection should be on the queue
3494 */
3495
3496 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3497 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
3498 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3499 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3500 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3501 freemsg(mp);
3502 tl_closeok(acc_ep);
3503 tl_refrele(acc_ep);
3504 return;
3505 }
3506
3507 /*
3508 * Mark for deletion, the entry corresponding to client
3509 * on list of pending connections made by the listener
3510 * search list to see if client is one of the
3511 * recorded as a listener.
3512 */
3513 tip = tl_icon_find(tep, cres->SEQ_number);
3514 if (tip == NULL) {
3515 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3516 "tl_conn_res:no client in listener list"));
3517 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3518 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3519 freemsg(mp);
3520 tl_closeok(acc_ep);
3521 tl_refrele(acc_ep);
3522 return;
3523 }
3524
3525 /*
3526 * If ti_tep is NULL the client has already closed. In this case
3527 * the code below will avoid any action on the client side
3528 * but complete the server and acceptor state transitions.
3529 */
3530 ASSERT(tip->ti_tep == NULL ||
3531 tip->ti_tep->te_seqno == cres->SEQ_number);
3532 cl_ep = tip->ti_tep;
3533
3534 /*
3535 * If the client is present it is switched from listener's to acceptor's
3536 * serializer. We should block client closes while serializers are
3537 * being switched.
3538 *
3539 * It is possible that the client is present but is currently being
3540 * closed. There are two possible cases:
3541 *
3542 * 1) The client has already entered tl_close_finish_ser() and sent
3543 * T_ORDREL_IND. In this case we can just ignore the client (but we
3544 * still need to send all messages from tip->ti_mp to the acceptor).
3545 *
3546 * 2) The client started the close but has not entered
3547 * tl_close_finish_ser() yet. In this case, the client is already
3548 * proceeding asynchronously on the listener's serializer, so we're
3549 * forced to change the acceptor to use the listener's serializer to
3550 * ensure that any operations on the acceptor are serialized with
3551 * respect to the close that's in-progress.
3552 */
3553 if (cl_ep != NULL) {
3554 if (tl_noclose(cl_ep)) {
3555 client_noclose_set = B_TRUE;
3556 } else {
3557 /*
3558 * Client is closing. If it it has sent the
3559 * T_ORDREL_IND, we can simply ignore it - otherwise,
3560 * we have to let let the client continue until it is
3561 * sent.
3562 *
3563 * If we do continue using the client, acceptor will
3564 * switch to client's serializer which is used by client
3565 * for its close.
3566 */
3567 tl_client_closing_when_accepting++;
3568 switch_client_serializer = B_FALSE;
3569 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3570 cl_ep->te_state == -1)
3571 cl_ep = NULL;
3572 }
3573 }
3574
3575 if (cl_ep != NULL) {
3576 /*
3577 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3578 * (latter for sockets only)
3579 */
3580 if (cl_ep->te_state != TS_WCON_CREQ &&
3581 (cl_ep->te_state != TS_DATA_XFER &&
3582 IS_SOCKET(cl_ep))) {
3583 err = ECONNREFUSED;
3584 /*
3585 * T_DISCON_IND sent later after committing memory
3586 * and acking validity of request
3587 */
3588 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3589 "tl_conn_res:peer in bad state"));
3590 }
3591
3592 /*
3593 * preallocate now for T_DISCON_IND or T_CONN_CONN
3594 * ack validity of request (T_OK_ACK) after memory committed
3595 */
3596
3597 if (err) {
3598 size = sizeof (struct T_discon_ind);
3599 } else {
3600 /*
3601 * calculate length of T_CONN_CON message
3602 */
3603 olen = 0;
3604 if (cl_ep->te_flag & TL_SETCRED) {
3605 olen = (t_scalar_t)sizeof (struct opthdr) +
3606 OPTLEN(sizeof (tl_credopt_t));
3607 } else if (cl_ep->te_flag & TL_SETUCRED) {
3608 olen = (t_scalar_t)sizeof (struct opthdr) +
3609 OPTLEN(ucredminsize(acc_ep->te_credp));
3610 }
3611 size = T_ALIGN(sizeof (struct T_conn_con) +
3612 acc_ep->te_alen) + olen;
3613 }
3614 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3615 /*
3616 * roll back state changes
3617 */
3618 tep->te_state = TS_WRES_CIND;
3619 tl_memrecover(wq, mp, size);
3620 freemsg(ackmp);
3621 if (client_noclose_set)
3622 tl_closeok(cl_ep);
3623 tl_closeok(acc_ep);
3624 tl_refrele(acc_ep);
3625 return;
3626 }
3627 mp = NULL;
3628 }
3629
3630 /*
3631 * Now ack validity of request
3632 */
3633 if (tep->te_nicon == 1) {
3634 if (tep == acc_ep)
3635 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3636 else
3637 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3638 } else {
3639 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3640 }
3641
3642 /*
3643 * send T_DISCON_IND now if client state validation failed earlier
3644 */
3645 if (err) {
3646 tl_ok_ack(wq, ackmp, prim);
3647 /*
3648 * flush the queues - why always ?
3649 */
3650 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3651
3652 dimp = tl_resizemp(respmp, size);
3653 if (dimp == NULL) {
3654 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3655 SL_TRACE | SL_ERROR,
3656 "tl_conn_res:con_ind:allocb failure"));
3657 tl_merror(wq, respmp, ENOMEM);
3658 tl_closeok(acc_ep);
3659 if (client_noclose_set)
3660 tl_closeok(cl_ep);
3661 tl_refrele(acc_ep);
3662 return;
3663 }
3664 if (dimp->b_cont) {
3665 /* no user data in provider generated discon ind */
3666 freemsg(dimp->b_cont);
3667 dimp->b_cont = NULL;
3668 }
3669
3670 DB_TYPE(dimp) = M_PROTO;
3671 di = (struct T_discon_ind *)dimp->b_rptr;
3672 di->PRIM_type = T_DISCON_IND;
3673 di->DISCON_reason = err;
3674 di->SEQ_number = BADSEQNUM;
3675
3676 tep->te_state = TS_IDLE;
3677 /*
3678 * send T_DISCON_IND message
3679 */
3680 putnext(acc_ep->te_rq, dimp);
3681 if (client_noclose_set)
3682 tl_closeok(cl_ep);
3683 tl_closeok(acc_ep);
3684 tl_refrele(acc_ep);
3685 return;
3686 }
3687
3688 /*
3689 * now start connecting the accepting endpoint
3690 */
3691 if (tep != acc_ep)
3692 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3693
3694 if (cl_ep == NULL) {
3695 /*
3696 * The client has already closed. Send up any queued messages
3697 * and change the state accordingly.
3698 */
3699 tl_ok_ack(wq, ackmp, prim);
3700 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3701
3702 /*
3703 * remove endpoint from incoming connection
3704 * delete client from list of incoming connections
3705 */
3706 tl_freetip(tep, tip);
3707 freemsg(mp);
3708 tl_closeok(acc_ep);
3709 tl_refrele(acc_ep);
3710 return;
3711 } else if (tip->ti_mp != NULL) {
3712 /*
3713 * The client could have queued a T_DISCON_IND which needs
3714 * to be sent up.
3715 * Note that t_discon_req can not operate the same as
3716 * t_data_req since it is not possible for it to putbq
3717 * the message and return -1 due to the use of qwriter.
3718 */
3719 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3720 }
3721
3722 /*
3723 * prepare connect confirm T_CONN_CON message
3724 */
3725
3726 /*
3727 * allocate the message - original data blocks
3728 * retained in the returned mblk
3729 */
3730 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3731 ccmp = tl_resizemp(respmp, size);
3732 if (ccmp == NULL) {
3733 tl_ok_ack(wq, ackmp, prim);
3734 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3735 SL_TRACE | SL_ERROR,
3736 "tl_conn_res:conn_con:allocb failure"));
3737 tl_merror(wq, respmp, ENOMEM);
3738 tl_closeok(acc_ep);
3739 if (client_noclose_set)
3740 tl_closeok(cl_ep);
3741 tl_refrele(acc_ep);
3742 return;
3743 }
3744
3745 DB_TYPE(ccmp) = M_PROTO;
3746 cc = (struct T_conn_con *)ccmp->b_rptr;
3747 cc->PRIM_type = T_CONN_CON;
3748 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3749 cc->RES_length = acc_ep->te_alen;
3750 addr_startp = ccmp->b_rptr + cc->RES_offset;
3751 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3752 if (cl_ep->te_flag & (TL_SETCRED | TL_SETUCRED)) {
3753 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3754 cc->RES_length);
3755 cc->OPT_length = olen;
3756 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3757 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3758 cl_ep->te_credp);
3759 } else {
3760 cc->OPT_offset = 0;
3761 cc->OPT_length = 0;
3762 }
3763 /*
3764 * Forward the credential in the packet so it can be picked up
3765 * at the higher layers for more complete credential processing
3766 */
3767 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3768 } else {
3769 freemsg(respmp);
3770 respmp = NULL;
3771 }
3772
3773 /*
3774 * make connection linking
3775 * accepting and client endpoints
3776 * No need to increment references:
3777 * on client: it should already have one from tip->ti_tep linkage.
3778 * on acceptor is should already have one from the table lookup.
3779 *
3780 * At this point both client and acceptor can't close. Set client
3781 * serializer to acceptor's.
3782 */
3783 ASSERT(cl_ep->te_refcnt >= 2);
3784 ASSERT(acc_ep->te_refcnt >= 2);
3785 ASSERT(cl_ep->te_conp == NULL);
3786 ASSERT(acc_ep->te_conp == NULL);
3787 cl_ep->te_conp = acc_ep;
3788 acc_ep->te_conp = cl_ep;
3789 ASSERT(cl_ep->te_ser == tep->te_ser);
3790 if (switch_client_serializer) {
3791 mutex_enter(&cl_ep->te_ser_lock);
3792 if (cl_ep->te_ser_count > 0) {
3793 switch_client_serializer = B_FALSE;
3794 tl_serializer_noswitch++;
3795 } else {
3796 /*
3797 * Move client to the acceptor's serializer.
3798 */
3799 tl_serializer_refhold(acc_ep->te_ser);
3800 tl_serializer_refrele(cl_ep->te_ser);
3801 cl_ep->te_ser = acc_ep->te_ser;
3802 }
3803 mutex_exit(&cl_ep->te_ser_lock);
3804 }
3805 if (!switch_client_serializer) {
3806 /*
3807 * It is not possible to switch client to use acceptor's.
3808 * Move acceptor to client's serializer (which is the same as
3809 * listener's).
3810 */
3811 tl_serializer_refhold(cl_ep->te_ser);
3812 tl_serializer_refrele(acc_ep->te_ser);
3813 acc_ep->te_ser = cl_ep->te_ser;
3814 }
3815
3816 TL_REMOVE_PEER(cl_ep->te_oconp);
3817 TL_REMOVE_PEER(acc_ep->te_oconp);
3818
3819 /*
3820 * remove endpoint from incoming connection
3821 * delete client from list of incoming connections
3822 */
3823 tip->ti_tep = NULL;
3824 tl_freetip(tep, tip);
3825 tl_ok_ack(wq, ackmp, prim);
3826
3827 /*
3828 * data blocks already linked in reallocb()
3829 */
3830
3831 /*
3832 * link queues so that I_SENDFD will work
3833 */
3834 if (!IS_SOCKET(tep)) {
3835 acc_ep->te_wq->q_next = cl_ep->te_rq;
3836 cl_ep->te_wq->q_next = acc_ep->te_rq;
3837 }
3838
3839 /*
3840 * send T_CONN_CON up on client side unless it was already
3841 * done (for a socket). In cases any data or ordrel req has been
3842 * queued make sure that the service procedure runs.
3843 */
3844 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3845 enableok(cl_ep->te_wq);
3846 TL_QENABLE(cl_ep);
3847 if (ccmp != NULL)
3848 freemsg(ccmp);
3849 } else {
3850 /*
3851 * change client state on TE_CONN_CON event
3852 */
3853 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3854 putnext(cl_ep->te_rq, ccmp);
3855 }
3856
3857 /* Mark the both endpoints as accepted */
3858 cl_ep->te_flag |= TL_ACCEPTED;
3859 acc_ep->te_flag |= TL_ACCEPTED;
3860
3861 /*
3862 * Allow client and acceptor to close.
3863 */
3864 tl_closeok(acc_ep);
3865 if (client_noclose_set)
3866 tl_closeok(cl_ep);
3867 }
3868
3869
3870
3871
3872 static void
3873 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3874 {
3875 queue_t *wq;
3876 struct T_discon_req *dr;
3877 ssize_t msz;
3878 tl_endpt_t *peer_tep = tep->te_conp;
3879 tl_endpt_t *srv_tep = tep->te_oconp;
3880 tl_icon_t *tip;
3881 size_t size;
3882 mblk_t *ackmp, *dimp, *respmp;
3883 struct T_discon_ind *di;
3884 t_scalar_t save_state, new_state;
3885
3886 if (tep->te_closing) {
3887 freemsg(mp);
3888 return;
3889 }
3890
3891 if ((peer_tep != NULL) && peer_tep->te_closing) {
3892 TL_UNCONNECT(tep->te_conp);
3893 peer_tep = NULL;
3894 }
3895 if ((srv_tep != NULL) && srv_tep->te_closing) {
3896 TL_UNCONNECT(tep->te_oconp);
3897 srv_tep = NULL;
3898 }
3899
3900 wq = tep->te_wq;
3901
3902 /*
3903 * preallocate memory for:
3904 * 1. max of T_ERROR_ACK and T_OK_ACK
3905 * ==> known max T_ERROR_ACK
3906 * 2. for T_DISCON_IND
3907 */
3908 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3909 if (ackmp == NULL) {
3910 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3911 return;
3912 }
3913 /*
3914 * memory committed for T_OK_ACK/T_ERROR_ACK now
3915 * will be committed for T_DISCON_IND later
3916 */
3917
3918 dr = (struct T_discon_req *)mp->b_rptr;
3919 msz = MBLKL(mp);
3920
3921 /*
3922 * validate the state
3923 */
3924 save_state = new_state = tep->te_state;
3925 if (!(save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3926 !(save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3927 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3928 SL_TRACE | SL_ERROR,
3929 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3930 tep->te_state));
3931 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3932 freemsg(mp);
3933 return;
3934 }
3935 /*
3936 * Defer committing the state change until it is determined if
3937 * the message will be queued with the tl_icon or not.
3938 */
3939 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3940
3941 /* validate the message */
3942 if (msz < sizeof (struct T_discon_req)) {
3943 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
3944 "tl_discon_req:invalid message"));
3945 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3946 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3947 freemsg(mp);
3948 return;
3949 }
3950
3951 /*
3952 * if server, then validate that client exists
3953 * by connection sequence number etc.
3954 */
3955 if (tep->te_nicon > 0) { /* server */
3956
3957 /*
3958 * search server list for disconnect client
3959 */
3960 tip = tl_icon_find(tep, dr->SEQ_number);
3961 if (tip == NULL) {
3962 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3963 SL_TRACE | SL_ERROR,
3964 "tl_discon_req:no disconnect endpoint"));
3965 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3966 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3967 freemsg(mp);
3968 return;
3969 }
3970 /*
3971 * If ti_tep is NULL the client has already closed. In this case
3972 * the code below will avoid any action on the client side.
3973 */
3974
3975 IMPLY(tip->ti_tep != NULL,
3976 tip->ti_tep->te_seqno == dr->SEQ_number);
3977 peer_tep = tip->ti_tep;
3978 }
3979
3980 /*
3981 * preallocate now for T_DISCON_IND
3982 * ack validity of request (T_OK_ACK) after memory committed
3983 */
3984 size = sizeof (struct T_discon_ind);
3985 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3986 tl_memrecover(wq, mp, size);
3987 freemsg(ackmp);
3988 return;
3989 }
3990
3991 /*
3992 * prepare message to ack validity of request
3993 */
3994 if (tep->te_nicon == 0) {
3995 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3996 } else {
3997 if (tep->te_nicon == 1)
3998 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3999 else
4000 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
4001 }
4002
4003 /*
4004 * Flushing queues according to TPI. Using the old state.
4005 */
4006 if ((tep->te_nicon <= 1) &&
4007 ((save_state == TS_DATA_XFER) ||
4008 (save_state == TS_WIND_ORDREL) ||
4009 (save_state == TS_WREQ_ORDREL)))
4010 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
4011
4012 /* send T_OK_ACK up */
4013 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
4014
4015 /*
4016 * now do disconnect business
4017 */
4018 if (tep->te_nicon > 0) { /* listener */
4019 if (peer_tep != NULL && !peer_tep->te_closing) {
4020 /*
4021 * disconnect incoming connect request pending to tep
4022 */
4023 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4024 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4025 SL_TRACE | SL_ERROR,
4026 "tl_discon_req: reallocb failed"));
4027 tep->te_state = new_state;
4028 tl_merror(wq, respmp, ENOMEM);
4029 return;
4030 }
4031 di = (struct T_discon_ind *)dimp->b_rptr;
4032 di->SEQ_number = BADSEQNUM;
4033 save_state = peer_tep->te_state;
4034 peer_tep->te_state = TS_IDLE;
4035
4036 TL_REMOVE_PEER(peer_tep->te_oconp);
4037 enableok(peer_tep->te_wq);
4038 TL_QENABLE(peer_tep);
4039 } else {
4040 freemsg(respmp);
4041 dimp = NULL;
4042 }
4043
4044 /*
4045 * remove endpoint from incoming connection list
4046 * - remove disconnect client from list on server
4047 */
4048 tl_freetip(tep, tip);
4049 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4050 /*
4051 * disconnect an outgoing request pending from tep
4052 */
4053
4054 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4055 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4056 SL_TRACE | SL_ERROR,
4057 "tl_discon_req: reallocb failed"));
4058 tep->te_state = new_state;
4059 tl_merror(wq, respmp, ENOMEM);
4060 return;
4061 }
4062 di = (struct T_discon_ind *)dimp->b_rptr;
4063 DB_TYPE(dimp) = M_PROTO;
4064 di->PRIM_type = T_DISCON_IND;
4065 di->DISCON_reason = ECONNRESET;
4066 di->SEQ_number = tep->te_seqno;
4067
4068 /*
4069 * If this is a socket the T_DISCON_IND is queued with
4070 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4071 * from the list of pending connections.
4072 * Note that when te_oconp is set the peer better have
4073 * a t_connind_t for the client.
4074 */
4075 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4076 /*
4077 * No need to check that
4078 * ti_tep == NULL since the T_DISCON_IND
4079 * takes precedence over other queued
4080 * messages.
4081 */
4082 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4083 peer_tep = NULL;
4084 dimp = NULL;
4085 /*
4086 * Can't clear te_oconp since tl_co_unconnect needs
4087 * it as a hint not to free the tep.
4088 * Keep the state unchanged since tl_conn_res inspects
4089 * it.
4090 */
4091 new_state = tep->te_state;
4092 } else {
4093 /* Found - delete it */
4094 tip = tl_icon_find(peer_tep, tep->te_seqno);
4095 if (tip != NULL) {
4096 ASSERT(tep == tip->ti_tep);
4097 save_state = peer_tep->te_state;
4098 if (peer_tep->te_nicon == 1)
4099 peer_tep->te_state =
4100 NEXTSTATE(TE_DISCON_IND2,
4101 peer_tep->te_state);
4102 else
4103 peer_tep->te_state =
4104 NEXTSTATE(TE_DISCON_IND3,
4105 peer_tep->te_state);
4106 tl_freetip(peer_tep, tip);
4107 }
4108 ASSERT(tep->te_oconp != NULL);
4109 TL_UNCONNECT(tep->te_oconp);
4110 }
4111 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4112 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4113 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4114 SL_TRACE | SL_ERROR,
4115 "tl_discon_req: reallocb failed"));
4116 tep->te_state = new_state;
4117 tl_merror(wq, respmp, ENOMEM);
4118 return;
4119 }
4120 di = (struct T_discon_ind *)dimp->b_rptr;
4121 di->SEQ_number = BADSEQNUM;
4122
4123 save_state = peer_tep->te_state;
4124 peer_tep->te_state = TS_IDLE;
4125 } else {
4126 /* Not connected */
4127 tep->te_state = new_state;
4128 freemsg(respmp);
4129 return;
4130 }
4131
4132 /* Commit state changes */
4133 tep->te_state = new_state;
4134
4135 if (peer_tep == NULL) {
4136 ASSERT(dimp == NULL);
4137 goto done;
4138 }
4139 /*
4140 * Flush queues on peer before sending up
4141 * T_DISCON_IND according to TPI
4142 */
4143
4144 if ((save_state == TS_DATA_XFER) ||
4145 (save_state == TS_WIND_ORDREL) ||
4146 (save_state == TS_WREQ_ORDREL))
4147 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4148
4149 DB_TYPE(dimp) = M_PROTO;
4150 di->PRIM_type = T_DISCON_IND;
4151 di->DISCON_reason = ECONNRESET;
4152
4153 /*
4154 * data blocks already linked into dimp by reallocb()
4155 */
4156 /*
4157 * send indication message to peer user module
4158 */
4159 ASSERT(dimp != NULL);
4160 putnext(peer_tep->te_rq, dimp);
4161 done:
4162 if (tep->te_conp) { /* disconnect pointers if connected */
4163 ASSERT(!peer_tep->te_closing);
4164
4165 /*
4166 * Messages may be queued on peer's write queue
4167 * waiting to be processed by its write service
4168 * procedure. Before the pointer to the peer transport
4169 * structure is set to NULL, qenable the peer's write
4170 * queue so that the queued up messages are processed.
4171 */
4172 if ((save_state == TS_DATA_XFER) ||
4173 (save_state == TS_WIND_ORDREL) ||
4174 (save_state == TS_WREQ_ORDREL))
4175 TL_QENABLE(peer_tep);
4176 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4177 TL_UNCONNECT(peer_tep->te_conp);
4178 if (!IS_SOCKET(tep)) {
4179 /*
4180 * unlink the streams
4181 */
4182 tep->te_wq->q_next = NULL;
4183 peer_tep->te_wq->q_next = NULL;
4184 }
4185 TL_UNCONNECT(tep->te_conp);
4186 }
4187 }
4188
4189 static void
4190 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4191 {
4192 if (!tep->te_closing)
4193 tl_addr_req(mp, tep);
4194 else
4195 freemsg(mp);
4196
4197 tl_serializer_exit(tep);
4198 tl_refrele(tep);
4199 }
4200
4201 static void
4202 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4203 {
4204 queue_t *wq;
4205 size_t ack_sz;
4206 mblk_t *ackmp;
4207 struct T_addr_ack *taa;
4208
4209 if (tep->te_closing) {
4210 freemsg(mp);
4211 return;
4212 }
4213
4214 wq = tep->te_wq;
4215
4216 /*
4217 * Note: T_ADDR_REQ message has only PRIM_type field
4218 * so it is already validated earlier.
4219 */
4220
4221 if (IS_CLTS(tep) ||
4222 (tep->te_state > TS_WREQ_ORDREL) ||
4223 (tep->te_state < TS_DATA_XFER)) {
4224 /*
4225 * Either connectionless or connection oriented but not
4226 * in connected data transfer state or half-closed states.
4227 */
4228 ack_sz = sizeof (struct T_addr_ack);
4229 if (tep->te_state >= TS_IDLE)
4230 /* is bound */
4231 ack_sz += tep->te_alen;
4232 ackmp = reallocb(mp, ack_sz, 0);
4233 if (ackmp == NULL) {
4234 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4235 SL_TRACE | SL_ERROR,
4236 "tl_addr_req: reallocb failed"));
4237 tl_memrecover(wq, mp, ack_sz);
4238 return;
4239 }
4240
4241 taa = (struct T_addr_ack *)ackmp->b_rptr;
4242
4243 bzero(taa, sizeof (struct T_addr_ack));
4244
4245 taa->PRIM_type = T_ADDR_ACK;
4246 ackmp->b_datap->db_type = M_PCPROTO;
4247 ackmp->b_wptr = (uchar_t *)&taa[1];
4248
4249 if (tep->te_state >= TS_IDLE) {
4250 /* endpoint is bound */
4251 taa->LOCADDR_length = tep->te_alen;
4252 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4253
4254 bcopy(tep->te_abuf, ackmp->b_wptr,
4255 tep->te_alen);
4256 ackmp->b_wptr += tep->te_alen;
4257 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4258 }
4259
4260 (void) qreply(wq, ackmp);
4261 } else {
4262 ASSERT(tep->te_state == TS_DATA_XFER ||
4263 tep->te_state == TS_WIND_ORDREL ||
4264 tep->te_state == TS_WREQ_ORDREL);
4265 /* connection oriented in data transfer */
4266 tl_connected_cots_addr_req(mp, tep);
4267 }
4268 }
4269
4270
4271 static void
4272 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4273 {
4274 tl_endpt_t *peer_tep = tep->te_conp;
4275 size_t ack_sz;
4276 mblk_t *ackmp;
4277 struct T_addr_ack *taa;
4278 uchar_t *addr_startp;
4279
4280 if (tep->te_closing) {
4281 freemsg(mp);
4282 return;
4283 }
4284
4285 if (peer_tep == NULL || peer_tep->te_closing) {
4286 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4287 return;
4288 }
4289
4290 ASSERT(tep->te_state >= TS_IDLE);
4291
4292 ack_sz = sizeof (struct T_addr_ack);
4293 ack_sz += T_ALIGN(tep->te_alen);
4294 ack_sz += peer_tep->te_alen;
4295
4296 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4297 if (ackmp == NULL) {
4298 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4299 "tl_connected_cots_addr_req: reallocb failed"));
4300 tl_memrecover(tep->te_wq, mp, ack_sz);
4301 return;
4302 }
4303
4304 taa = (struct T_addr_ack *)ackmp->b_rptr;
4305
4306 /* endpoint is bound */
4307 taa->LOCADDR_length = tep->te_alen;
4308 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4309
4310 addr_startp = (uchar_t *)&taa[1];
4311
4312 bcopy(tep->te_abuf, addr_startp,
4313 tep->te_alen);
4314
4315 taa->REMADDR_length = peer_tep->te_alen;
4316 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4317 taa->LOCADDR_length);
4318 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4319 bcopy(peer_tep->te_abuf, addr_startp,
4320 peer_tep->te_alen);
4321 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4322 taa->REMADDR_offset + peer_tep->te_alen;
4323 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4324
4325 putnext(tep->te_rq, ackmp);
4326 }
4327
4328 static void
4329 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4330 {
4331 if (IS_CLTS(tep)) {
4332 *ia = tl_clts_info_ack;
4333 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4334 } else {
4335 *ia = tl_cots_info_ack;
4336 if (IS_COTSORD(tep))
4337 ia->SERV_type = T_COTS_ORD;
4338 }
4339 ia->TIDU_size = tl_tidusz;
4340 ia->CURRENT_state = tep->te_state;
4341 }
4342
4343 /*
4344 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4345 * tl_wput.
4346 */
4347 static void
4348 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4349 {
4350 mblk_t *ackmp;
4351 t_uscalar_t cap_bits1;
4352 struct T_capability_ack *tcap;
4353
4354 if (tep->te_closing) {
4355 freemsg(mp);
4356 return;
4357 }
4358
4359 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4360
4361 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4362 M_PCPROTO, T_CAPABILITY_ACK);
4363 if (ackmp == NULL) {
4364 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4365 "tl_capability_req: reallocb failed"));
4366 tl_memrecover(tep->te_wq, mp,
4367 sizeof (struct T_capability_ack));
4368 return;
4369 }
4370
4371 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4372 tcap->CAP_bits1 = 0;
4373
4374 if (cap_bits1 & TC1_INFO) {
4375 tl_copy_info(&tcap->INFO_ack, tep);
4376 tcap->CAP_bits1 |= TC1_INFO;
4377 }
4378
4379 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4380 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4381 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4382 }
4383
4384 putnext(tep->te_rq, ackmp);
4385 }
4386
4387 static void
4388 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4389 {
4390 if (!tep->te_closing)
4391 tl_info_req(mp, tep);
4392 else
4393 freemsg(mp);
4394
4395 tl_serializer_exit(tep);
4396 tl_refrele(tep);
4397 }
4398
4399 static void
4400 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4401 {
4402 mblk_t *ackmp;
4403
4404 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4405 M_PCPROTO, T_INFO_ACK);
4406 if (ackmp == NULL) {
4407 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4408 "tl_info_req: reallocb failed"));
4409 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4410 return;
4411 }
4412
4413 /*
4414 * fill in T_INFO_ACK contents
4415 */
4416 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4417
4418 /*
4419 * send ack message
4420 */
4421 putnext(tep->te_rq, ackmp);
4422 }
4423
4424 /*
4425 * Handle M_DATA, T_data_req and T_optdata_req.
4426 * If this is a socket pass through T_optdata_req options unmodified.
4427 */
4428 static void
4429 tl_data(mblk_t *mp, tl_endpt_t *tep)
4430 {
4431 queue_t *wq = tep->te_wq;
4432 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4433 ssize_t msz = MBLKL(mp);
4434 tl_endpt_t *peer_tep;
4435 queue_t *peer_rq;
4436 boolean_t closing = tep->te_closing;
4437
4438 if (IS_CLTS(tep)) {
4439 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4440 SL_TRACE | SL_ERROR,
4441 "tl_wput:clts:unattached M_DATA"));
4442 if (!closing) {
4443 tl_merror(wq, mp, EPROTO);
4444 } else {
4445 freemsg(mp);
4446 }
4447 return;
4448 }
4449
4450 /*
4451 * If the endpoint is closing it should still forward any data to the
4452 * peer (if it has one). If it is not allowed to forward it can just
4453 * free the message.
4454 */
4455 if (closing &&
4456 (tep->te_state != TS_DATA_XFER) &&
4457 (tep->te_state != TS_WREQ_ORDREL)) {
4458 freemsg(mp);
4459 return;
4460 }
4461
4462 if (DB_TYPE(mp) == M_PROTO) {
4463 if (prim->type == T_DATA_REQ &&
4464 msz < sizeof (struct T_data_req)) {
4465 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4466 SL_TRACE | SL_ERROR,
4467 "tl_data:T_DATA_REQ:invalid message"));
4468 if (!closing) {
4469 tl_merror(wq, mp, EPROTO);
4470 } else {
4471 freemsg(mp);
4472 }
4473 return;
4474 } else if (prim->type == T_OPTDATA_REQ &&
4475 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4476 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4477 SL_TRACE | SL_ERROR,
4478 "tl_data:T_OPTDATA_REQ:invalid message"));
4479 if (!closing) {
4480 tl_merror(wq, mp, EPROTO);
4481 } else {
4482 freemsg(mp);
4483 }
4484 return;
4485 }
4486 }
4487
4488 /*
4489 * connection oriented provider
4490 */
4491 switch (tep->te_state) {
4492 case TS_IDLE:
4493 /*
4494 * Other end not here - do nothing.
4495 */
4496 freemsg(mp);
4497 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4498 "tl_data:cots with endpoint idle"));
4499 return;
4500
4501 case TS_DATA_XFER:
4502 /* valid states */
4503 if (tep->te_conp != NULL)
4504 break;
4505
4506 if (tep->te_oconp == NULL) {
4507 if (!closing) {
4508 tl_merror(wq, mp, EPROTO);
4509 } else {
4510 freemsg(mp);
4511 }
4512 return;
4513 }
4514 /*
4515 * For a socket the T_CONN_CON is sent early thus
4516 * the peer might not yet have accepted the connection.
4517 * If we are closing queue the packet with the T_CONN_IND.
4518 * Otherwise defer processing the packet until the peer
4519 * accepts the connection.
4520 * Note that the queue is noenabled when we go into this
4521 * state.
4522 */
4523 if (!closing) {
4524 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4525 SL_TRACE | SL_ERROR,
4526 "tl_data: ocon"));
4527 TL_PUTBQ(tep, mp);
4528 return;
4529 }
4530 if (DB_TYPE(mp) == M_PROTO) {
4531 if (msz < sizeof (t_scalar_t)) {
4532 freemsg(mp);
4533 return;
4534 }
4535 /* reuse message block - just change REQ to IND */
4536 if (prim->type == T_DATA_REQ)
4537 prim->type = T_DATA_IND;
4538 else
4539 prim->type = T_OPTDATA_IND;
4540 }
4541 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4542 return;
4543
4544 case TS_WREQ_ORDREL:
4545 if (tep->te_conp == NULL) {
4546 /*
4547 * Other end closed - generate discon_ind
4548 * with reason 0 to cause an EPIPE but no
4549 * read side error on AF_UNIX sockets.
4550 */
4551 freemsg(mp);
4552 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4553 SL_TRACE | SL_ERROR,
4554 "tl_data: WREQ_ORDREL and no peer"));
4555 tl_discon_ind(tep, 0);
4556 return;
4557 }
4558 break;
4559
4560 default:
4561 /* invalid state for event TE_DATA_REQ */
4562 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4563 "tl_data:cots:out of state"));
4564 tl_merror(wq, mp, EPROTO);
4565 return;
4566 }
4567 /*
4568 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4569 * (State stays same on this event)
4570 */
4571
4572 /*
4573 * get connected endpoint
4574 */
4575 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4576 freemsg(mp);
4577 /* Peer closed */
4578 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4579 "tl_data: peer gone"));
4580 return;
4581 }
4582
4583 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4584 peer_rq = peer_tep->te_rq;
4585
4586 /*
4587 * Put it back if flow controlled
4588 * Note: Messages already on queue when we are closing is bounded
4589 * so we can ignore flow control.
4590 */
4591 if (!canputnext(peer_rq) && !closing) {
4592 TL_PUTBQ(tep, mp);
4593 return;
4594 }
4595
4596 /*
4597 * validate peer state
4598 */
4599 switch (peer_tep->te_state) {
4600 case TS_DATA_XFER:
4601 case TS_WIND_ORDREL:
4602 /* valid states */
4603 break;
4604 default:
4605 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4606 "tl_data:rx side:invalid state"));
4607 tl_merror(peer_tep->te_wq, mp, EPROTO);
4608 return;
4609 }
4610 if (DB_TYPE(mp) == M_PROTO) {
4611 /* reuse message block - just change REQ to IND */
4612 if (prim->type == T_DATA_REQ)
4613 prim->type = T_DATA_IND;
4614 else
4615 prim->type = T_OPTDATA_IND;
4616 }
4617 /*
4618 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4619 * (peer state stays same on this event)
4620 */
4621 /*
4622 * send data to connected peer
4623 */
4624 putnext(peer_rq, mp);
4625 }
4626
4627
4628
4629 static void
4630 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4631 {
4632 queue_t *wq = tep->te_wq;
4633 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4634 ssize_t msz = MBLKL(mp);
4635 tl_endpt_t *peer_tep;
4636 queue_t *peer_rq;
4637 boolean_t closing = tep->te_closing;
4638
4639 if (msz < sizeof (struct T_exdata_req)) {
4640 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4641 "tl_exdata:invalid message"));
4642 if (!closing) {
4643 tl_merror(wq, mp, EPROTO);
4644 } else {
4645 freemsg(mp);
4646 }
4647 return;
4648 }
4649
4650 /*
4651 * If the endpoint is closing it should still forward any data to the
4652 * peer (if it has one). If it is not allowed to forward it can just
4653 * free the message.
4654 */
4655 if (closing &&
4656 (tep->te_state != TS_DATA_XFER) &&
4657 (tep->te_state != TS_WREQ_ORDREL)) {
4658 freemsg(mp);
4659 return;
4660 }
4661
4662 /*
4663 * validate state
4664 */
4665 switch (tep->te_state) {
4666 case TS_IDLE:
4667 /*
4668 * Other end not here - do nothing.
4669 */
4670 freemsg(mp);
4671 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4672 "tl_exdata:cots with endpoint idle"));
4673 return;
4674
4675 case TS_DATA_XFER:
4676 /* valid states */
4677 if (tep->te_conp != NULL)
4678 break;
4679
4680 if (tep->te_oconp == NULL) {
4681 if (!closing) {
4682 tl_merror(wq, mp, EPROTO);
4683 } else {
4684 freemsg(mp);
4685 }
4686 return;
4687 }
4688 /*
4689 * For a socket the T_CONN_CON is sent early thus
4690 * the peer might not yet have accepted the connection.
4691 * If we are closing queue the packet with the T_CONN_IND.
4692 * Otherwise defer processing the packet until the peer
4693 * accepts the connection.
4694 * Note that the queue is noenabled when we go into this
4695 * state.
4696 */
4697 if (!closing) {
4698 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4699 SL_TRACE | SL_ERROR,
4700 "tl_exdata: ocon"));
4701 TL_PUTBQ(tep, mp);
4702 return;
4703 }
4704 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4705 "tl_exdata: closing socket ocon"));
4706 prim->type = T_EXDATA_IND;
4707 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4708 return;
4709
4710 case TS_WREQ_ORDREL:
4711 if (tep->te_conp == NULL) {
4712 /*
4713 * Other end closed - generate discon_ind
4714 * with reason 0 to cause an EPIPE but no
4715 * read side error on AF_UNIX sockets.
4716 */
4717 freemsg(mp);
4718 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4719 SL_TRACE | SL_ERROR,
4720 "tl_exdata: WREQ_ORDREL and no peer"));
4721 tl_discon_ind(tep, 0);
4722 return;
4723 }
4724 break;
4725
4726 default:
4727 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4728 SL_TRACE | SL_ERROR,
4729 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4730 tep->te_state));
4731 tl_merror(wq, mp, EPROTO);
4732 return;
4733 }
4734 /*
4735 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4736 * (state stays same on this event)
4737 */
4738
4739 /*
4740 * get connected endpoint
4741 */
4742 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4743 freemsg(mp);
4744 /* Peer closed */
4745 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4746 "tl_exdata: peer gone"));
4747 return;
4748 }
4749
4750 peer_rq = peer_tep->te_rq;
4751
4752 /*
4753 * Put it back if flow controlled
4754 * Note: Messages already on queue when we are closing is bounded
4755 * so we can ignore flow control.
4756 */
4757 if (!canputnext(peer_rq) && !closing) {
4758 TL_PUTBQ(tep, mp);
4759 return;
4760 }
4761
4762 /*
4763 * validate state on peer
4764 */
4765 switch (peer_tep->te_state) {
4766 case TS_DATA_XFER:
4767 case TS_WIND_ORDREL:
4768 /* valid states */
4769 break;
4770 default:
4771 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4772 "tl_exdata:rx side:invalid state"));
4773 tl_merror(peer_tep->te_wq, mp, EPROTO);
4774 return;
4775 }
4776 /*
4777 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4778 * (peer state stays same on this event)
4779 */
4780 /*
4781 * reuse message block
4782 */
4783 prim->type = T_EXDATA_IND;
4784
4785 /*
4786 * send data to connected peer
4787 */
4788 putnext(peer_rq, mp);
4789 }
4790
4791
4792
4793 static void
4794 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4795 {
4796 queue_t *wq = tep->te_wq;
4797 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4798 ssize_t msz = MBLKL(mp);
4799 tl_endpt_t *peer_tep;
4800 queue_t *peer_rq;
4801 boolean_t closing = tep->te_closing;
4802
4803 if (msz < sizeof (struct T_ordrel_req)) {
4804 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4805 "tl_ordrel:invalid message"));
4806 if (!closing) {
4807 tl_merror(wq, mp, EPROTO);
4808 } else {
4809 freemsg(mp);
4810 }
4811 return;
4812 }
4813
4814 /*
4815 * validate state
4816 */
4817 switch (tep->te_state) {
4818 case TS_DATA_XFER:
4819 case TS_WREQ_ORDREL:
4820 /* valid states */
4821 if (tep->te_conp != NULL)
4822 break;
4823
4824 if (tep->te_oconp == NULL)
4825 break;
4826
4827 /*
4828 * For a socket the T_CONN_CON is sent early thus
4829 * the peer might not yet have accepted the connection.
4830 * If we are closing queue the packet with the T_CONN_IND.
4831 * Otherwise defer processing the packet until the peer
4832 * accepts the connection.
4833 * Note that the queue is noenabled when we go into this
4834 * state.
4835 */
4836 if (!closing) {
4837 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4838 SL_TRACE | SL_ERROR,
4839 "tl_ordlrel: ocon"));
4840 TL_PUTBQ(tep, mp);
4841 return;
4842 }
4843 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4844 "tl_ordlrel: closing socket ocon"));
4845 prim->type = T_ORDREL_IND;
4846 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4847 return;
4848
4849 default:
4850 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4851 SL_TRACE | SL_ERROR,
4852 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4853 tep->te_state));
4854 if (!closing) {
4855 tl_merror(wq, mp, EPROTO);
4856 } else {
4857 freemsg(mp);
4858 }
4859 return;
4860 }
4861 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4862
4863 /*
4864 * get connected endpoint
4865 */
4866 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4867 /* Peer closed */
4868 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4869 "tl_ordrel: peer gone"));
4870 freemsg(mp);
4871 return;
4872 }
4873
4874 peer_rq = peer_tep->te_rq;
4875
4876 /*
4877 * Put it back if flow controlled except when we are closing.
4878 * Note: Messages already on queue when we are closing is bounded
4879 * so we can ignore flow control.
4880 */
4881 if (!canputnext(peer_rq) && !closing) {
4882 TL_PUTBQ(tep, mp);
4883 return;
4884 }
4885
4886 /*
4887 * validate state on peer
4888 */
4889 switch (peer_tep->te_state) {
4890 case TS_DATA_XFER:
4891 case TS_WIND_ORDREL:
4892 /* valid states */
4893 break;
4894 default:
4895 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
4896 "tl_ordrel:rx side:invalid state"));
4897 tl_merror(peer_tep->te_wq, mp, EPROTO);
4898 return;
4899 }
4900 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4901
4902 /*
4903 * reuse message block
4904 */
4905 prim->type = T_ORDREL_IND;
4906 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4907 "tl_ordrel: send ordrel_ind"));
4908
4909 /*
4910 * send data to connected peer
4911 */
4912 putnext(peer_rq, mp);
4913 }
4914
4915
4916 /*
4917 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4918 */
4919 static void
4920 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4921 {
4922 size_t err_sz;
4923 tl_endpt_t *tep;
4924 struct T_unitdata_req *udreq;
4925 mblk_t *err_mp;
4926 t_scalar_t alen;
4927 t_scalar_t olen;
4928 struct T_uderror_ind *uderr;
4929 uchar_t *addr_startp;
4930
4931 err_sz = sizeof (struct T_uderror_ind);
4932 tep = (tl_endpt_t *)wq->q_ptr;
4933 udreq = (struct T_unitdata_req *)mp->b_rptr;
4934 alen = udreq->DEST_length;
4935 olen = udreq->OPT_length;
4936
4937 if (alen > 0)
4938 err_sz = T_ALIGN(err_sz + alen);
4939 if (olen > 0)
4940 err_sz += olen;
4941
4942 err_mp = allocb(err_sz, BPRI_MED);
4943 if (err_mp == NULL) {
4944 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
4945 "tl_uderr:allocb failure"));
4946 /*
4947 * Note: no rollback of state needed as it does
4948 * not change in connectionless transport
4949 */
4950 tl_memrecover(wq, mp, err_sz);
4951 return;
4952 }
4953
4954 DB_TYPE(err_mp) = M_PROTO;
4955 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4956 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4957 uderr->PRIM_type = T_UDERROR_IND;
4958 uderr->ERROR_type = err;
4959 uderr->DEST_length = alen;
4960 uderr->OPT_length = olen;
4961 if (alen <= 0) {
4962 uderr->DEST_offset = 0;
4963 } else {
4964 uderr->DEST_offset =
4965 (t_scalar_t)sizeof (struct T_uderror_ind);
4966 addr_startp = mp->b_rptr + udreq->DEST_offset;
4967 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4968 (size_t)alen);
4969 }
4970 if (olen <= 0) {
4971 uderr->OPT_offset = 0;
4972 } else {
4973 uderr->OPT_offset =
4974 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4975 uderr->DEST_length);
4976 addr_startp = mp->b_rptr + udreq->OPT_offset;
4977 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4978 (size_t)olen);
4979 }
4980 freemsg(mp);
4981
4982 /*
4983 * send indication message
4984 */
4985 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4986
4987 qreply(wq, err_mp);
4988 }
4989
4990 static void
4991 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4992 {
4993 queue_t *wq = tep->te_wq;
4994
4995 if (!tep->te_closing && (wq->q_first != NULL)) {
4996 TL_PUTQ(tep, mp);
4997 } else {
4998 if (tep->te_rq != NULL)
4999 tl_unitdata(mp, tep);
5000 else
5001 freemsg(mp);
5002 }
5003
5004 tl_serializer_exit(tep);
5005 tl_refrele(tep);
5006 }
5007
5008 /*
5009 * Handle T_unitdata_req.
5010 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
5011 * If this is a socket pass through options unmodified.
5012 */
5013 static void
5014 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
5015 {
5016 queue_t *wq = tep->te_wq;
5017 soux_addr_t ux_addr;
5018 tl_addr_t destaddr;
5019 uchar_t *addr_startp;
5020 tl_endpt_t *peer_tep;
5021 struct T_unitdata_ind *udind;
5022 struct T_unitdata_req *udreq;
5023 ssize_t msz, ui_sz, reuse_mb_sz;
5024 t_scalar_t alen, aoff, olen, ooff;
5025 t_scalar_t oldolen = 0;
5026 cred_t *cr = NULL;
5027 pid_t cpid;
5028
5029 udreq = (struct T_unitdata_req *)mp->b_rptr;
5030 msz = MBLKL(mp);
5031
5032 /*
5033 * validate the state
5034 */
5035 if (tep->te_state != TS_IDLE) {
5036 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5037 SL_TRACE | SL_ERROR,
5038 "tl_wput:T_CONN_REQ:out of state"));
5039 tl_merror(wq, mp, EPROTO);
5040 return;
5041 }
5042 /*
5043 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5044 * (state does not change on this event)
5045 */
5046
5047 /*
5048 * validate the message
5049 * Note: dereference fields in struct inside message only
5050 * after validating the message length.
5051 */
5052 if (msz < sizeof (struct T_unitdata_req)) {
5053 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5054 "tl_unitdata:invalid message length"));
5055 tl_merror(wq, mp, EINVAL);
5056 return;
5057 }
5058 alen = udreq->DEST_length;
5059 aoff = udreq->DEST_offset;
5060 oldolen = olen = udreq->OPT_length;
5061 ooff = udreq->OPT_offset;
5062 if (olen == 0)
5063 ooff = 0;
5064
5065 if (IS_SOCKET(tep)) {
5066 if ((alen != TL_SOUX_ADDRLEN) ||
5067 (aoff < 0) ||
5068 (aoff + alen > msz) ||
5069 (olen < 0) || (ooff < 0) ||
5070 ((olen > 0) && ((ooff + olen) > msz))) {
5071 (void) (STRLOG(TL_ID, tep->te_minor,
5072 1, SL_TRACE | SL_ERROR,
5073 "tl_unitdata_req: invalid socket addr "
5074 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5075 (int)msz, alen, aoff, olen, ooff));
5076 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5077 return;
5078 }
5079 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5080
5081 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5082 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5083 (void) (STRLOG(TL_ID, tep->te_minor,
5084 1, SL_TRACE | SL_ERROR,
5085 "tl_conn_req: invalid socket magic"));
5086 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5087 return;
5088 }
5089 } else {
5090 if ((alen < 0) ||
5091 (aoff < 0) ||
5092 ((alen > 0) && ((aoff + alen) > msz)) ||
5093 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5094 ((aoff + alen) < 0) ||
5095 ((olen > 0) && ((ooff + olen) > msz)) ||
5096 (olen < 0) ||
5097 (ooff < 0) ||
5098 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5099 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5100 SL_TRACE | SL_ERROR,
5101 "tl_unitdata:invalid unit data message"));
5102 tl_merror(wq, mp, EINVAL);
5103 return;
5104 }
5105 }
5106
5107 /* Options not supported unless it's a socket */
5108 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5109 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5110 "tl_unitdata:option use(unsupported) or zero len addr"));
5111 tl_uderr(wq, mp, EPROTO);
5112 return;
5113 }
5114 #ifdef DEBUG
5115 /*
5116 * Mild form of ASSERT()ion to detect broken TPI apps.
5117 * if (!assertion)
5118 * log warning;
5119 */
5120 if (!(aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5121 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5122 "tl_unitdata:addr overlaps TPI message"));
5123 }
5124 #endif
5125 /*
5126 * get destination endpoint
5127 */
5128 destaddr.ta_alen = alen;
5129 destaddr.ta_abuf = mp->b_rptr + aoff;
5130 destaddr.ta_zoneid = tep->te_zoneid;
5131
5132 /*
5133 * Check whether the destination is the same that was used previously
5134 * and the destination endpoint is in the right state. If something is
5135 * wrong, find destination again and cache it.
5136 */
5137 peer_tep = tep->te_lastep;
5138
5139 if ((peer_tep == NULL) || peer_tep->te_closing ||
5140 (peer_tep->te_state != TS_IDLE) ||
5141 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5142 /*
5143 * Not the same as cached destination , need to find the right
5144 * destination.
5145 */
5146 peer_tep = (IS_SOCKET(tep) ?
5147 tl_sock_find_peer(tep, &ux_addr) :
5148 tl_find_peer(tep, &destaddr));
5149
5150 if (peer_tep == NULL) {
5151 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5152 SL_TRACE | SL_ERROR,
5153 "tl_unitdata:no one at destination address"));
5154 tl_uderr(wq, mp, ECONNRESET);
5155 return;
5156 }
5157
5158 /*
5159 * Cache the new peer.
5160 */
5161 if (tep->te_lastep != NULL)
5162 tl_refrele(tep->te_lastep);
5163
5164 tep->te_lastep = peer_tep;
5165 }
5166
5167 if (peer_tep->te_state != TS_IDLE) {
5168 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
5169 "tl_unitdata:provider in invalid state"));
5170 tl_uderr(wq, mp, EPROTO);
5171 return;
5172 }
5173
5174 ASSERT(peer_tep->te_rq != NULL);
5175
5176 /*
5177 * Put it back if flow controlled except when we are closing.
5178 * Note: Messages already on queue when we are closing is bounded
5179 * so we can ignore flow control.
5180 */
5181 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5182 /* record what we are flow controlled on */
5183 if (tep->te_flowq != NULL) {
5184 list_remove(&tep->te_flowq->te_flowlist, tep);
5185 }
5186 list_insert_head(&peer_tep->te_flowlist, tep);
5187 tep->te_flowq = peer_tep;
5188 TL_PUTBQ(tep, mp);
5189 return;
5190 }
5191 /*
5192 * prepare indication message
5193 */
5194
5195 /*
5196 * calculate length of message
5197 */
5198 if (peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5199 cr = msg_getcred(mp, &cpid);
5200 ASSERT(cr != NULL);
5201
5202 if (peer_tep->te_flag & TL_SETCRED) {
5203 ASSERT(olen == 0);
5204 olen = (t_scalar_t)sizeof (struct opthdr) +
5205 OPTLEN(sizeof (tl_credopt_t));
5206 /* 1 option only */
5207 } else if (peer_tep->te_flag & TL_SETUCRED) {
5208 ASSERT(olen == 0);
5209 olen = (t_scalar_t)sizeof (struct opthdr) +
5210 OPTLEN(ucredminsize(cr));
5211 /* 1 option only */
5212 } else {
5213 /* Possibly more than one option */
5214 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5215 OPTLEN(ucredminsize(cr));
5216 }
5217 }
5218
5219 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5220 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5221
5222 /*
5223 * If the unitdata_ind fits and we are not adding options
5224 * reuse the udreq mblk.
5225 *
5226 * Otherwise, it is possible we need to append an option if one of the
5227 * te_flag bits is set. This requires extra space in the data block for
5228 * the additional option but the traditional technique used below to
5229 * allocate a new block and copy into it will not work when there is a
5230 * message block with a free pointer (since we don't know anything
5231 * about the layout of the data, pointers referencing or within the
5232 * data, etc.). To handle this possibility the upper layers may have
5233 * preallocated some space to use for appending an option. We check the
5234 * overall mblock size against the size we need ('reuse_mb_sz' with the
5235 * original address length [alen] to ensure we won't overrun the
5236 * current mblk data size) to see if there is free space and thus
5237 * avoid allocating a new message block.
5238 */
5239 if (msz >= ui_sz && alen >= tep->te_alen &&
5240 !(peer_tep->te_flag & (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED))) {
5241 /*
5242 * Reuse the original mblk. Leave options in place.
5243 */
5244 udind = (struct T_unitdata_ind *)mp->b_rptr;
5245 udind->PRIM_type = T_UNITDATA_IND;
5246 udind->SRC_length = tep->te_alen;
5247 addr_startp = mp->b_rptr + udind->SRC_offset;
5248 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5249
5250 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5251 mp->b_datap->db_frtnp != NULL) {
5252 /*
5253 * We have a message block with a free pointer, but extra space
5254 * has been pre-allocated for us in case we need to append an
5255 * option. Reuse the original mblk, leaving existing options in
5256 * place.
5257 */
5258 udind = (struct T_unitdata_ind *)mp->b_rptr;
5259 udind->PRIM_type = T_UNITDATA_IND;
5260 udind->SRC_length = tep->te_alen;
5261 addr_startp = mp->b_rptr + udind->SRC_offset;
5262 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5263
5264 if (peer_tep->te_flag &
5265 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5266 ASSERT(cr != NULL);
5267 /*
5268 * We're appending one new option here after the
5269 * original ones.
5270 */
5271 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5272 cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5273 }
5274
5275 } else if (mp->b_datap->db_frtnp != NULL) {
5276 /*
5277 * The next block creates a new mp and tries to copy the data
5278 * block into it, but that cannot handle a message with a free
5279 * pointer (for more details see the comment in kstrputmsg()
5280 * where dupmsg() is called). Since we can never properly
5281 * duplicate the mp while also extending the data, just error
5282 * out now.
5283 */
5284 tl_uderr(wq, mp, EPROTO);
5285 return;
5286 } else {
5287 /* Allocate a new T_unitdata_ind message */
5288 mblk_t *ui_mp;
5289
5290 ui_mp = allocb(ui_sz, BPRI_MED);
5291 if (ui_mp == NULL) {
5292 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5293 "tl_unitdata:allocb failure:message queued"));
5294 tl_memrecover(wq, mp, ui_sz);
5295 return;
5296 }
5297
5298 /*
5299 * fill in T_UNITDATA_IND contents
5300 */
5301 DB_TYPE(ui_mp) = M_PROTO;
5302 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5303 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5304 udind->PRIM_type = T_UNITDATA_IND;
5305 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5306 udind->SRC_length = tep->te_alen;
5307 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5308 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5309 udind->OPT_offset =
5310 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5311 udind->OPT_length = olen;
5312 if (peer_tep->te_flag &
5313 (TL_SETCRED | TL_SETUCRED | TL_SOCKUCRED)) {
5314
5315 if (oldolen != 0) {
5316 bcopy((void *)((uintptr_t)udreq + ooff),
5317 (void *)((uintptr_t)udind +
5318 udind->OPT_offset),
5319 oldolen);
5320 }
5321 ASSERT(cr != NULL);
5322
5323 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5324 oldolen, cr, cpid,
5325 peer_tep->te_flag, peer_tep->te_credp);
5326 } else {
5327 bcopy((void *)((uintptr_t)udreq + ooff),
5328 (void *)((uintptr_t)udind + udind->OPT_offset),
5329 olen);
5330 }
5331
5332 /*
5333 * relink data blocks from mp to ui_mp
5334 */
5335 ui_mp->b_cont = mp->b_cont;
5336 freeb(mp);
5337 mp = ui_mp;
5338 }
5339 /*
5340 * send indication message
5341 */
5342 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5343 putnext(peer_tep->te_rq, mp);
5344 }
5345
5346
5347
5348 /*
5349 * Check if a given addr is in use.
5350 * Endpoint ptr returned or NULL if not found.
5351 * The name space is separate for each mode. This implies that
5352 * sockets get their own name space.
5353 */
5354 static tl_endpt_t *
5355 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5356 {
5357 tl_endpt_t *peer_tep = NULL;
5358 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5359 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5360
5361 ASSERT(!IS_SOCKET(tep));
5362
5363 ASSERT(ap != NULL && ap->ta_alen > 0);
5364 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5365 ASSERT(ap->ta_abuf != NULL);
5366 EQUIV(rc == 0, peer_tep != NULL);
5367 IMPLY(rc == 0,
5368 (tep->te_zoneid == peer_tep->te_zoneid) &&
5369 (tep->te_transport == peer_tep->te_transport));
5370
5371 if ((rc == 0) && (peer_tep->te_closing)) {
5372 tl_refrele(peer_tep);
5373 peer_tep = NULL;
5374 }
5375
5376 return (peer_tep);
5377 }
5378
5379 /*
5380 * Find peer for a socket based on unix domain address.
5381 * For implicit addresses our peer can be found by minor number in ai hash. For
5382 * explicit binds we look vnode address at addr_hash.
5383 */
5384 static tl_endpt_t *
5385 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5386 {
5387 tl_endpt_t *peer_tep = NULL;
5388 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5389 tep->te_aihash : tep->te_addrhash;
5390 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5391 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5392
5393 ASSERT(IS_SOCKET(tep));
5394 EQUIV(rc == 0, peer_tep != NULL);
5395 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5396
5397 if (peer_tep != NULL) {
5398 /* Don't attempt to use closing peer. */
5399 if (peer_tep->te_closing)
5400 goto errout;
5401
5402 /*
5403 * Cross-zone unix sockets are permitted, but for Trusted
5404 * Extensions only, the "server" for these must be in the
5405 * global zone.
5406 */
5407 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5408 is_system_labeled() &&
5409 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5410 goto errout;
5411 }
5412
5413 return (peer_tep);
5414
5415 errout:
5416 tl_refrele(peer_tep);
5417 return (NULL);
5418 }
5419
5420 /*
5421 * Generate a free addr and return it in struct pointed by ap
5422 * but allocating space for address buffer.
5423 * The generated address will be at least 4 bytes long and, if req->ta_alen
5424 * exceeds 4 bytes, be req->ta_alen bytes long.
5425 *
5426 * If address is found it will be inserted in the hash.
5427 *
5428 * If req->ta_alen is larger than the default alen (4 bytes) the last
5429 * alen-4 bytes will always be the same as in req.
5430 *
5431 * Return 0 for failure.
5432 * Return non-zero for success.
5433 */
5434 static boolean_t
5435 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5436 {
5437 t_scalar_t alen;
5438 uint32_t loopcnt; /* Limit loop to 2^32 */
5439
5440 ASSERT(tep->te_hash_hndl != NULL);
5441 ASSERT(!IS_SOCKET(tep));
5442
5443 if (tep->te_hash_hndl == NULL)
5444 return (B_FALSE);
5445
5446 /*
5447 * check if default addr is in use
5448 * if it is - bump it and try again
5449 */
5450 if (req == NULL) {
5451 alen = sizeof (uint32_t);
5452 } else {
5453 alen = max(req->ta_alen, sizeof (uint32_t));
5454 ASSERT(tep->te_zoneid == req->ta_zoneid);
5455 }
5456
5457 if (tep->te_alen < alen) {
5458 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5459
5460 /*
5461 * Not enough space in tep->ta_ap to hold the address,
5462 * allocate a bigger space.
5463 */
5464 if (abuf == NULL)
5465 return (B_FALSE);
5466
5467 if (tep->te_alen > 0)
5468 kmem_free(tep->te_abuf, tep->te_alen);
5469
5470 tep->te_alen = alen;
5471 tep->te_abuf = abuf;
5472 }
5473
5474 /* Copy in the address in req */
5475 if (req != NULL) {
5476 ASSERT(alen >= req->ta_alen);
5477 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5478 }
5479
5480 /*
5481 * First try minor number then try default addresses.
5482 */
5483 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5484
5485 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5486 if (mod_hash_insert_reserve(tep->te_addrhash,
5487 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5488 tep->te_hash_hndl) == 0) {
5489 /*
5490 * found free address
5491 */
5492 tep->te_flag |= TL_ADDRHASHED;
5493 tep->te_hash_hndl = NULL;
5494
5495 return (B_TRUE); /* successful return */
5496 }
5497 /*
5498 * Use default address.
5499 */
5500 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5501 atomic_inc_32(&tep->te_defaddr);
5502 }
5503
5504 /*
5505 * Failed to find anything.
5506 */
5507 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5508 "tl_get_any_addr:looped 2^32 times"));
5509 return (B_FALSE);
5510 }
5511
5512 /*
5513 * reallocb + set r/w ptrs to reflect size.
5514 */
5515 static mblk_t *
5516 tl_resizemp(mblk_t *mp, ssize_t new_size)
5517 {
5518 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5519 return (NULL);
5520
5521 mp->b_rptr = DB_BASE(mp);
5522 mp->b_wptr = mp->b_rptr + new_size;
5523 return (mp);
5524 }
5525
5526 static void
5527 tl_cl_backenable(tl_endpt_t *tep)
5528 {
5529 list_t *l = &tep->te_flowlist;
5530 tl_endpt_t *elp;
5531
5532 ASSERT(IS_CLTS(tep));
5533
5534 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5535 ASSERT(tep->te_ser == elp->te_ser);
5536 ASSERT(elp->te_flowq == tep);
5537 if (!elp->te_closing)
5538 TL_QENABLE(elp);
5539 elp->te_flowq = NULL;
5540 list_remove(l, elp);
5541 }
5542 }
5543
5544 /*
5545 * Unconnect endpoints.
5546 */
5547 static void
5548 tl_co_unconnect(tl_endpt_t *tep)
5549 {
5550 tl_endpt_t *peer_tep = tep->te_conp;
5551 tl_endpt_t *srv_tep = tep->te_oconp;
5552 list_t *l;
5553 tl_icon_t *tip;
5554 tl_endpt_t *cl_tep;
5555 mblk_t *d_mp;
5556
5557 ASSERT(IS_COTS(tep));
5558 /*
5559 * If our peer is closing, don't use it.
5560 */
5561 if ((peer_tep != NULL) && peer_tep->te_closing) {
5562 TL_UNCONNECT(tep->te_conp);
5563 peer_tep = NULL;
5564 }
5565 if ((srv_tep != NULL) && srv_tep->te_closing) {
5566 TL_UNCONNECT(tep->te_oconp);
5567 srv_tep = NULL;
5568 }
5569
5570 if (tep->te_nicon > 0) {
5571 l = &tep->te_iconp;
5572 /*
5573 * If incoming requests pending, change state
5574 * of clients on disconnect ind event and send
5575 * discon_ind pdu to modules above them
5576 * for server: all clients get disconnect
5577 */
5578
5579 while (tep->te_nicon > 0) {
5580 tip = list_head(l);
5581 cl_tep = tip->ti_tep;
5582
5583 if (cl_tep == NULL) {
5584 tl_freetip(tep, tip);
5585 continue;
5586 }
5587
5588 if (cl_tep->te_oconp != NULL) {
5589 ASSERT(cl_tep != cl_tep->te_oconp);
5590 TL_UNCONNECT(cl_tep->te_oconp);
5591 }
5592
5593 if (cl_tep->te_closing) {
5594 tl_freetip(tep, tip);
5595 continue;
5596 }
5597
5598 enableok(cl_tep->te_wq);
5599 TL_QENABLE(cl_tep);
5600 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5601 if (d_mp != NULL) {
5602 cl_tep->te_state = TS_IDLE;
5603 putnext(cl_tep->te_rq, d_mp);
5604 } else {
5605 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5606 SL_TRACE | SL_ERROR,
5607 "tl_co_unconnect:icmng: "
5608 "allocb failure"));
5609 }
5610 tl_freetip(tep, tip);
5611 }
5612 } else if (srv_tep != NULL) {
5613 /*
5614 * If outgoing request pending, change state
5615 * of server on discon ind event
5616 */
5617
5618 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5619 IS_COTSORD(srv_tep) &&
5620 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5621 /*
5622 * Queue ordrel_ind for server to be picked up
5623 * when the connection is accepted.
5624 */
5625 d_mp = tl_ordrel_ind_alloc();
5626 } else {
5627 /*
5628 * send discon_ind to server
5629 */
5630 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5631 }
5632 if (d_mp == NULL) {
5633 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5634 SL_TRACE | SL_ERROR,
5635 "tl_co_unconnect:outgoing:allocb failure"));
5636 TL_UNCONNECT(tep->te_oconp);
5637 goto discon_peer;
5638 }
5639
5640 /*
5641 * If this is a socket the T_DISCON_IND is queued with
5642 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5643 * from the list of pending connections.
5644 * Note that when te_oconp is set the peer better have
5645 * a t_connind_t for the client.
5646 */
5647 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5648 /*
5649 * Queue the disconnection message.
5650 */
5651 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5652 } else {
5653 tip = tl_icon_find(srv_tep, tep->te_seqno);
5654 if (tip == NULL) {
5655 freemsg(d_mp);
5656 } else {
5657 ASSERT(tep == tip->ti_tep);
5658 ASSERT(tep->te_ser == srv_tep->te_ser);
5659 /*
5660 * Delete tip from the server list.
5661 */
5662 if (srv_tep->te_nicon == 1) {
5663 srv_tep->te_state =
5664 NEXTSTATE(TE_DISCON_IND2,
5665 srv_tep->te_state);
5666 } else {
5667 srv_tep->te_state =
5668 NEXTSTATE(TE_DISCON_IND3,
5669 srv_tep->te_state);
5670 }
5671 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5672 T_DISCON_IND);
5673 putnext(srv_tep->te_rq, d_mp);
5674 tl_freetip(srv_tep, tip);
5675 }
5676 TL_UNCONNECT(tep->te_oconp);
5677 srv_tep = NULL;
5678 }
5679 } else if (peer_tep != NULL) {
5680 /*
5681 * unconnect existing connection
5682 * If connected, change state of peer on
5683 * discon ind event and send discon ind pdu
5684 * to module above it
5685 */
5686
5687 ASSERT(tep->te_ser == peer_tep->te_ser);
5688 if (IS_COTSORD(peer_tep) &&
5689 (peer_tep->te_state == TS_WIND_ORDREL ||
5690 peer_tep->te_state == TS_DATA_XFER)) {
5691 /*
5692 * send ordrel ind
5693 */
5694 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5695 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5696 peer_tep->te_state,
5697 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5698 d_mp = tl_ordrel_ind_alloc();
5699 if (d_mp == NULL) {
5700 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5701 SL_TRACE | SL_ERROR,
5702 "tl_co_unconnect:connected:"
5703 "allocb failure"));
5704 /*
5705 * Continue with cleaning up peer as
5706 * this side may go away with the close
5707 */
5708 TL_QENABLE(peer_tep);
5709 goto discon_peer;
5710 }
5711 peer_tep->te_state =
5712 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5713
5714 putnext(peer_tep->te_rq, d_mp);
5715 /*
5716 * Handle flow control case. This will generate
5717 * a t_discon_ind message with reason 0 if there
5718 * is data queued on the write side.
5719 */
5720 TL_QENABLE(peer_tep);
5721 } else if (IS_COTSORD(peer_tep) &&
5722 peer_tep->te_state == TS_WREQ_ORDREL) {
5723 /*
5724 * Sent an ordrel_ind. We send a discon with
5725 * with error 0 to inform that the peer is gone.
5726 */
5727 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5728 SL_TRACE | SL_ERROR,
5729 "tl_co_unconnect: discon in state %d",
5730 tep->te_state));
5731 tl_discon_ind(peer_tep, 0);
5732 } else {
5733 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5734 SL_TRACE | SL_ERROR,
5735 "tl_co_unconnect: state %d", tep->te_state));
5736 tl_discon_ind(peer_tep, ECONNRESET);
5737 }
5738
5739 discon_peer:
5740 /*
5741 * Disconnect cross-pointers only for close
5742 */
5743 if (tep->te_closing) {
5744 peer_tep = tep->te_conp;
5745 TL_REMOVE_PEER(peer_tep->te_conp);
5746 TL_REMOVE_PEER(tep->te_conp);
5747 }
5748 }
5749 }
5750
5751 /*
5752 * Note: The following routine does not recover from allocb()
5753 * failures
5754 * The reason should be from the <sys/errno.h> space.
5755 */
5756 static void
5757 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5758 {
5759 mblk_t *d_mp;
5760
5761 if (tep->te_closing)
5762 return;
5763
5764 /*
5765 * flush the queues.
5766 */
5767 flushq(tep->te_rq, FLUSHDATA);
5768 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5769
5770 /*
5771 * send discon ind
5772 */
5773 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5774 if (d_mp == NULL) {
5775 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE | SL_ERROR,
5776 "tl_discon_ind:allocb failure"));
5777 return;
5778 }
5779 tep->te_state = TS_IDLE;
5780 putnext(tep->te_rq, d_mp);
5781 }
5782
5783 /*
5784 * Note: The following routine does not recover from allocb()
5785 * failures
5786 * The reason should be from the <sys/errno.h> space.
5787 */
5788 static mblk_t *
5789 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5790 {
5791 mblk_t *mp;
5792 struct T_discon_ind *tdi;
5793
5794 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5795 DB_TYPE(mp) = M_PROTO;
5796 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5797 tdi = (struct T_discon_ind *)mp->b_rptr;
5798 tdi->PRIM_type = T_DISCON_IND;
5799 tdi->DISCON_reason = reason;
5800 tdi->SEQ_number = seqnum;
5801 }
5802 return (mp);
5803 }
5804
5805
5806 /*
5807 * Note: The following routine does not recover from allocb()
5808 * failures
5809 */
5810 static mblk_t *
5811 tl_ordrel_ind_alloc(void)
5812 {
5813 mblk_t *mp;
5814 struct T_ordrel_ind *toi;
5815
5816 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5817 DB_TYPE(mp) = M_PROTO;
5818 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5819 toi = (struct T_ordrel_ind *)mp->b_rptr;
5820 toi->PRIM_type = T_ORDREL_IND;
5821 }
5822 return (mp);
5823 }
5824
5825
5826 /*
5827 * Lookup the seqno in the list of queued connections.
5828 */
5829 static tl_icon_t *
5830 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5831 {
5832 list_t *l = &tep->te_iconp;
5833 tl_icon_t *tip = list_head(l);
5834
5835 ASSERT(seqno != 0);
5836
5837 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5838 ;
5839
5840 return (tip);
5841 }
5842
5843 /*
5844 * Queue data for a given T_CONN_IND while verifying that redundant
5845 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5846 * Used when the originator of the connection closes.
5847 */
5848 static void
5849 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5850 {
5851 tl_icon_t *tip;
5852 mblk_t **mpp, *mp;
5853 int prim, nprim;
5854
5855 if (nmp->b_datap->db_type == M_PROTO)
5856 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5857 else
5858 nprim = -1; /* M_DATA */
5859
5860 tip = tl_icon_find(tep, seqno);
5861 if (tip == NULL) {
5862 freemsg(nmp);
5863 return;
5864 }
5865
5866 ASSERT(tip->ti_seqno != 0);
5867 mpp = &tip->ti_mp;
5868 while (*mpp != NULL) {
5869 mp = *mpp;
5870
5871 if (mp->b_datap->db_type == M_PROTO)
5872 prim = ((union T_primitives *)mp->b_rptr)->type;
5873 else
5874 prim = -1; /* M_DATA */
5875
5876 /*
5877 * Allow nothing after a T_DISCON_IND
5878 */
5879 if (prim == T_DISCON_IND) {
5880 freemsg(nmp);
5881 return;
5882 }
5883 /*
5884 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5885 */
5886 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5887 freemsg(nmp);
5888 return;
5889 }
5890 mpp = &(mp->b_next);
5891 }
5892 *mpp = nmp;
5893 }
5894
5895 /*
5896 * Verify if a certain TPI primitive exists on the connind queue.
5897 * Use prim -1 for M_DATA.
5898 * Return non-zero if found.
5899 */
5900 static boolean_t
5901 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5902 {
5903 tl_icon_t *tip = tl_icon_find(tep, seqno);
5904 boolean_t found = B_FALSE;
5905
5906 if (tip != NULL) {
5907 mblk_t *mp;
5908 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5909 found = (DB_TYPE(mp) == M_PROTO &&
5910 ((union T_primitives *)mp->b_rptr)->type == prim);
5911 }
5912 }
5913 return (found);
5914 }
5915
5916 /*
5917 * Send the b_next mblk chain that has accumulated before the connection
5918 * was accepted. Perform the necessary state transitions.
5919 */
5920 static void
5921 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5922 {
5923 mblk_t *mp;
5924 union T_primitives *primp;
5925
5926 if (tep->te_closing) {
5927 tl_icon_freemsgs(mpp);
5928 return;
5929 }
5930
5931 ASSERT(tep->te_state == TS_DATA_XFER);
5932 ASSERT(tep->te_rq->q_first == NULL);
5933
5934 while ((mp = *mpp) != NULL) {
5935 *mpp = mp->b_next;
5936 mp->b_next = NULL;
5937
5938 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5939 switch (DB_TYPE(mp)) {
5940 default:
5941 freemsg(mp);
5942 break;
5943 case M_DATA:
5944 putnext(tep->te_rq, mp);
5945 break;
5946 case M_PROTO:
5947 primp = (union T_primitives *)mp->b_rptr;
5948 switch (primp->type) {
5949 case T_UNITDATA_IND:
5950 case T_DATA_IND:
5951 case T_OPTDATA_IND:
5952 case T_EXDATA_IND:
5953 putnext(tep->te_rq, mp);
5954 break;
5955 case T_ORDREL_IND:
5956 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5957 tep->te_state);
5958 putnext(tep->te_rq, mp);
5959 break;
5960 case T_DISCON_IND:
5961 tep->te_state = TS_IDLE;
5962 putnext(tep->te_rq, mp);
5963 break;
5964 default:
5965 #ifdef DEBUG
5966 cmn_err(CE_PANIC,
5967 "tl_icon_sendmsgs: unknown primitive");
5968 #endif /* DEBUG */
5969 freemsg(mp);
5970 break;
5971 }
5972 break;
5973 }
5974 }
5975 }
5976
5977 /*
5978 * Free the b_next mblk chain that has accumulated before the connection
5979 * was accepted.
5980 */
5981 static void
5982 tl_icon_freemsgs(mblk_t **mpp)
5983 {
5984 mblk_t *mp;
5985
5986 while ((mp = *mpp) != NULL) {
5987 *mpp = mp->b_next;
5988 mp->b_next = NULL;
5989 freemsg(mp);
5990 }
5991 }
5992
5993 /*
5994 * Send M_ERROR
5995 * Note: assumes caller ensured enough space in mp or enough
5996 * memory available. Does not attempt recovery from allocb()
5997 * failures
5998 */
5999
6000 static void
6001 tl_merror(queue_t *wq, mblk_t *mp, int error)
6002 {
6003 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6004
6005 if (tep->te_closing) {
6006 freemsg(mp);
6007 return;
6008 }
6009
6010 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6011 SL_TRACE | SL_ERROR,
6012 "tl_merror: tep=%p, err=%d", (void *)tep, error));
6013
6014 /*
6015 * flush all messages on queue. we are shutting
6016 * the stream down on fatal error
6017 */
6018 flushq(wq, FLUSHALL);
6019 if (IS_COTS(tep)) {
6020 /* connection oriented - unconnect endpoints */
6021 tl_co_unconnect(tep);
6022 }
6023 if (mp->b_cont) {
6024 freemsg(mp->b_cont);
6025 mp->b_cont = NULL;
6026 }
6027
6028 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6029 freemsg(mp);
6030 mp = allocb(1, BPRI_HI);
6031 if (mp == NULL) {
6032 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6033 SL_TRACE | SL_ERROR,
6034 "tl_merror:M_PROTO: out of memory"));
6035 return;
6036 }
6037 }
6038 if (mp) {
6039 DB_TYPE(mp) = M_ERROR;
6040 mp->b_rptr = DB_BASE(mp);
6041 *mp->b_rptr = (char)error;
6042 mp->b_wptr = mp->b_rptr + sizeof (char);
6043 qreply(wq, mp);
6044 } else {
6045 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6046 }
6047 }
6048
6049 static void
6050 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6051 {
6052 ASSERT(cr != NULL);
6053
6054 if (flag & TL_SETCRED) {
6055 struct opthdr *opt = (struct opthdr *)buf;
6056 tl_credopt_t *tlcred;
6057
6058 opt->level = TL_PROT_LEVEL;
6059 opt->name = TL_OPT_PEER_CRED;
6060 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6061
6062 tlcred = (tl_credopt_t *)(opt + 1);
6063 tlcred->tc_uid = crgetuid(cr);
6064 tlcred->tc_gid = crgetgid(cr);
6065 tlcred->tc_ruid = crgetruid(cr);
6066 tlcred->tc_rgid = crgetrgid(cr);
6067 tlcred->tc_suid = crgetsuid(cr);
6068 tlcred->tc_sgid = crgetsgid(cr);
6069 tlcred->tc_ngroups = crgetngroups(cr);
6070 } else if (flag & TL_SETUCRED) {
6071 struct opthdr *opt = (struct opthdr *)buf;
6072
6073 opt->level = TL_PROT_LEVEL;
6074 opt->name = TL_OPT_PEER_UCRED;
6075 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6076
6077 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6078 } else {
6079 struct T_opthdr *topt = (struct T_opthdr *)buf;
6080 ASSERT(flag & TL_SOCKUCRED);
6081
6082 topt->level = SOL_SOCKET;
6083 topt->name = SCM_UCRED;
6084 topt->len = ucredminsize(cr) + sizeof (*topt);
6085 topt->status = 0;
6086 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6087 }
6088 }
6089
6090 /* ARGSUSED */
6091 static int
6092 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6093 {
6094 /* no default value processed in protocol specific code currently */
6095 return (-1);
6096 }
6097
6098 /* ARGSUSED */
6099 static int
6100 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6101 {
6102 int len;
6103 tl_endpt_t *tep;
6104 int *valp;
6105
6106 tep = (tl_endpt_t *)wq->q_ptr;
6107
6108 len = 0;
6109
6110 /*
6111 * Assumes: option level and name sanity check done elsewhere
6112 */
6113
6114 switch (level) {
6115 case SOL_SOCKET:
6116 if (!IS_SOCKET(tep))
6117 break;
6118 switch (name) {
6119 case SO_RECVUCRED:
6120 len = sizeof (int);
6121 valp = (int *)ptr;
6122 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6123 break;
6124 default:
6125 break;
6126 }
6127 break;
6128 case TL_PROT_LEVEL:
6129 switch (name) {
6130 case TL_OPT_PEER_CRED:
6131 case TL_OPT_PEER_UCRED:
6132 /*
6133 * option not supposed to retrieved directly
6134 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6135 * when some internal flags set by other options
6136 * Direct retrieval always designed to fail(ignored)
6137 * for this option.
6138 */
6139 break;
6140 }
6141 }
6142 return (len);
6143 }
6144
6145 /* ARGSUSED */
6146 static int
6147 tl_set_opt(queue_t *wq, uint_t mgmt_flags, int level, int name, uint_t inlen,
6148 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, void *thisdg_attrs,
6149 cred_t *cr)
6150 {
6151 int error;
6152 tl_endpt_t *tep;
6153
6154 tep = (tl_endpt_t *)wq->q_ptr;
6155
6156 error = 0; /* NOERROR */
6157
6158 /*
6159 * Assumes: option level and name sanity checks done elsewhere
6160 */
6161
6162 switch (level) {
6163 case SOL_SOCKET:
6164 if (!IS_SOCKET(tep)) {
6165 error = EINVAL;
6166 break;
6167 }
6168 /*
6169 * TBD: fill in other AF_UNIX socket options and then stop
6170 * returning error.
6171 */
6172 switch (name) {
6173 case SO_RECVUCRED:
6174 /*
6175 * We only support this for datagram sockets;
6176 * getpeerucred handles the connection oriented
6177 * transports.
6178 */
6179 if (!IS_CLTS(tep)) {
6180 error = EINVAL;
6181 break;
6182 }
6183 if (*(int *)invalp == 0)
6184 tep->te_flag &= ~TL_SOCKUCRED;
6185 else
6186 tep->te_flag |= TL_SOCKUCRED;
6187 break;
6188 default:
6189 error = EINVAL;
6190 break;
6191 }
6192 break;
6193 case TL_PROT_LEVEL:
6194 switch (name) {
6195 case TL_OPT_PEER_CRED:
6196 case TL_OPT_PEER_UCRED:
6197 /*
6198 * option not supposed to be set directly
6199 * Its value in initialized for each endpoint at
6200 * driver open time.
6201 * Direct setting always designed to fail for this
6202 * option.
6203 */
6204 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6205 SL_TRACE | SL_ERROR,
6206 "tl_set_opt: option is not supported"));
6207 error = EPROTO;
6208 break;
6209 }
6210 }
6211 return (error);
6212 }
6213
6214
6215 static void
6216 tl_timer(void *arg)
6217 {
6218 queue_t *wq = arg;
6219 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6220
6221 ASSERT(tep);
6222
6223 tep->te_timoutid = 0;
6224
6225 enableok(wq);
6226 /*
6227 * Note: can call wsrv directly here and save context switch
6228 * Consider change when qtimeout (not timeout) is active
6229 */
6230 qenable(wq);
6231 }
6232
6233 static void
6234 tl_buffer(void *arg)
6235 {
6236 queue_t *wq = arg;
6237 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6238
6239 ASSERT(tep);
6240
6241 tep->te_bufcid = 0;
6242 tep->te_nowsrv = B_FALSE;
6243
6244 enableok(wq);
6245 /*
6246 * Note: can call wsrv directly here and save context switch
6247 * Consider change when qbufcall (not bufcall) is active
6248 */
6249 qenable(wq);
6250 }
6251
6252 static void
6253 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6254 {
6255 tl_endpt_t *tep;
6256
6257 tep = (tl_endpt_t *)wq->q_ptr;
6258
6259 if (tep->te_closing) {
6260 freemsg(mp);
6261 return;
6262 }
6263 noenable(wq);
6264
6265 (void) insq(wq, wq->q_first, mp);
6266
6267 if (tep->te_bufcid || tep->te_timoutid) {
6268 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE | SL_ERROR,
6269 "tl_memrecover:recover %p pending", (void *)wq));
6270 return;
6271 }
6272
6273 tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq);
6274 if (tep->te_bufcid == NULL) {
6275 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6276 drv_usectohz(TL_BUFWAIT));
6277 }
6278 }
6279
6280 static void
6281 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6282 {
6283 ASSERT(tip->ti_seqno != 0);
6284
6285 if (tip->ti_mp != NULL) {
6286 tl_icon_freemsgs(&tip->ti_mp);
6287 tip->ti_mp = NULL;
6288 }
6289 if (tip->ti_tep != NULL) {
6290 tl_refrele(tip->ti_tep);
6291 tip->ti_tep = NULL;
6292 }
6293 list_remove(&tep->te_iconp, tip);
6294 kmem_free(tip, sizeof (tl_icon_t));
6295 tep->te_nicon--;
6296 }
6297
6298 /*
6299 * Remove address from address hash.
6300 */
6301 static void
6302 tl_addr_unbind(tl_endpt_t *tep)
6303 {
6304 tl_endpt_t *elp;
6305
6306 if (tep->te_flag & TL_ADDRHASHED) {
6307 if (IS_SOCKET(tep)) {
6308 (void) mod_hash_remove(tep->te_addrhash,
6309 (mod_hash_key_t)tep->te_vp,
6310 (mod_hash_val_t *)&elp);
6311 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6312 tep->te_magic = SOU_MAGIC_IMPLICIT;
6313 } else {
6314 (void) mod_hash_remove(tep->te_addrhash,
6315 (mod_hash_key_t)&tep->te_ap,
6316 (mod_hash_val_t *)&elp);
6317 (void) kmem_free(tep->te_abuf, tep->te_alen);
6318 tep->te_alen = -1;
6319 tep->te_abuf = NULL;
6320 }
6321 tep->te_flag &= ~TL_ADDRHASHED;
6322 }
6323 }