1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 */
28
29 /*
30 * Multithreaded STREAMS Local Transport Provider.
31 *
32 * OVERVIEW
33 * ========
34 *
35 * This driver provides TLI as well as socket semantics. It provides
36 * connectionless, connection oriented, and connection oriented with orderly
37 * release transports for TLI and sockets. Each transport type has separate name
38 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
39 * this removes any name space conflicts when binding to socket style transport
40 * addresses.
41 *
42 * NOTE: There is one exception: Socket ticots and ticotsord transports share
43 * the same namespace. In fact, sockets always use ticotsord type transport.
44 *
45 * The driver mode is specified during open() by the minor number used for
46 * open.
47 *
48 * The sockets in addition have the following semantic differences:
49 * No support for passing up credentials (TL_SET[U]CRED).
50 *
51 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
52 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
53 * T_OPTDATA_IND.
54 *
55 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
56 * a T_CONN_RES is received from the acceptor. This means that a socket
57 * connect will complete before the peer has called accept.
58 *
59 *
60 * MULTITHREADING
61 * ==============
62 *
63 * The driver does not use STREAMS protection mechanisms. Instead it uses a
64 * generic "serializer" abstraction. Most of the operations are executed behind
65 * the serializer and are, essentially single-threaded. All functions executed
66 * behind the same serializer are strictly serialized. So if one thread calls
67 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
68 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
69 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
70 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
71 * same time.
72 *
73 * Connectionless transport use a single serializer per transport type (one for
74 * TLI and one for sockets. Connection-oriented transports use finer-grained
75 * serializers.
76 *
77 * All COTS-type endpoints start their life with private serializers. During
78 * connection request processing the endpoint serializer is switched to the
79 * listener's serializer and the rest of T_CONN_REQ processing is done on the
80 * listener serializer. During T_CONN_RES processing the eager serializer is
81 * switched from listener to acceptor serializer and after that point all
82 * processing for eager and acceptor happens on this serializer. To avoid races
83 * with endpoint closes while its serializer may be changing closes are blocked
84 * while serializers are manipulated.
85 *
86 * References accounting
87 * ---------------------
88 *
89 * Endpoints are reference counted and freed when the last reference is
90 * dropped. Functions within the serializer may access an endpoint state even
91 * after an endpoint closed. The te_closing being set on the endpoint indicates
92 * that the endpoint entered its close routine.
93 *
94 * One reference is held for each opened endpoint instance. The reference
95 * counter is incremented when the endpoint is linked to another endpoint and
96 * decremented when the link disappears. It is also incremented when the
97 * endpoint is found by the hash table lookup. This increment is atomic with the
98 * lookup itself and happens while the hash table read lock is held.
99 *
100 * Close synchronization
101 * ---------------------
102 *
103 * During close the endpoint as marked as closing using te_closing flag. It is
104 * usually enough to check for te_closing flag since all other state changes
105 * happen after this flag is set and the close entered serializer. Immediately
106 * after setting te_closing flag tl_close() enters serializer and waits until
107 * the callback finishes. This allows all functions called within serializer to
108 * simply check te_closing without any locks.
109 *
110 * Serializer management.
111 * ---------------------
112 *
113 * For COTS transports serializers are created when the endpoint is constructed
114 * and destroyed when the endpoint is destructed. CLTS transports use global
115 * serializers - one for sockets and one for TLI.
116 *
117 * COTS serializers have separate reference counts to deal with several
118 * endpoints sharing the same serializer. There is a subtle problem related to
119 * the serializer destruction. The serializer should never be destroyed by any
120 * function executed inside serializer. This means that close has to wait till
121 * all serializer activity for this endpoint is finished before it can drop the
122 * last reference on the endpoint (which may as well free the serializer). This
123 * is only relevant for COTS transports which manage serializers
124 * dynamically. For CLTS transports close may complete without waiting for all
125 * serializer activity to finish since serializer is only destroyed at driver
126 * detach time.
127 *
128 * COTS endpoints keep track of the number of outstanding requests on the
129 * serializer for the endpoint. The code handling accept() avoids changing
130 * client serializer if it has any pending messages on the serializer and
131 * instead moves acceptor to listener's serializer.
132 *
133 *
134 * Use of hash tables
135 * ------------------
136 *
137 * The driver uses modhash hash table implementation. Each transport uses two
138 * hash tables - one for finding endpoints by acceptor ID and another one for
139 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
140 * pair of hash tables since sockets only use TICOTSORD.
141 *
142 * All hash tables lookups increment a reference count for returned endpoints,
143 * so we may safely check the endpoint state even when the endpoint is removed
144 * from the hash by another thread immediately after it is found.
145 *
146 *
147 * CLOSE processing
148 * ================
149 *
150 * The driver enters serializer twice on close(). The close sequence is the
151 * following:
152 *
153 * 1) Wait until closing is safe (te_closewait becomes zero)
154 * This step is needed to prevent close during serializer switches. In most
155 * cases (close happening after connection establishment) te_closewait is
156 * zero.
157 * 1) Set te_closing.
158 * 2) Call tl_close_ser() within serializer and wait for it to complete.
159 *
160 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
161 * It also needs to clear write-side q_next pointers - this should be done
162 * before qprocsoff().
163 *
164 * This synchronous serializer entry during close is needed to ensure that
165 * the queue is valid everywhere inside the serializer.
166 *
167 * Note that in many cases close will execute tl_close_ser() synchronously,
168 * so it will not wait at all.
169 *
170 * 3) Calls qprocsoff().
171 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
172 * complete (for COTS transports). For CLTS transport there is no wait.
173 *
174 * tl_close_finish_ser() Finishes the close process and wakes up waiting
175 * close if there is any.
176 *
177 * Note that in most cases close will enter te_close_ser_finish()
178 * synchronously and will not wait at all.
179 *
180 *
181 * Flow Control
182 * ============
183 *
184 * The driver implements both read and write side service routines. No one calls
185 * putq() on the read queue. The read side service routine tl_rsrv() is called
186 * when the read side stream is back-enabled. It enters serializer synchronously
187 * (waits till serializer processing is complete). Within serializer it
188 * back-enables all endpoints blocked by the queue for connection-less
189 * transports and enables write side service processing for the peer for
190 * connection-oriented transports.
191 *
192 * Read and write side service routines use special mblk_sized space in the
193 * endpoint structure to enter perimeter.
194 *
195 * Write-side flow control
196 * -----------------------
197 *
198 * Write side flow control is a bit tricky. The driver needs to deal with two
199 * message queues - the explicit STREAMS message queue maintained by
200 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
201 * queues should be synchronized to preserve message ordering and should
202 * maintain a single order determined by the order in which messages enter
203 * tl_wput(). In order to maintain the ordering between these two queues the
204 * STREAMS queue is only manipulated within the serializer, so the ordering is
205 * provided by the serializer.
206 *
207 * Functions called from the tl_wsrv() sometimes may call putbq(). To
208 * immediately stop any further processing of the STREAMS message queues the
209 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
210 * side service processing stops when the flag is set.
211 *
212 * The tl_wsrv() function enters serializer synchronously and waits for it to
213 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
214 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
215 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
216 * always bounded by the amount of messages on the STREAMS queue at the time
217 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
218 * queue from another serialized entry which can't happen in parallel. This
219 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
220 * of it draining forever while writer places new messages on the STREAMS
221 * queue).
222 *
223 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
224 *
225 *
226 * Unix Domain Sockets
227 * ===================
228 *
229 * The driver knows the structure of Unix Domain sockets addresses and treats
230 * them differently from generic TLI addresses. For sockets implicit binds are
231 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
232 * instead of using address length of zero. Explicit binds specify
233 * SOU_MAGIC_EXPLICIT as magic.
234 *
235 * For implicit binds we always use minor number as soua_vp part of the address
236 * and avoid any hash table lookups. This saves two hash tables lookups per
237 * anonymous bind.
238 *
239 * For explicit address we hash the vnode pointer instead of hashing the
240 * full-scale address+zone+length. Hashing by pointer is more efficient then
241 * hashing by the full address.
242 *
243 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
244 * tep structure, so it should be never freed.
245 *
246 * Also for sockets the driver always uses minor number as acceptor id.
247 *
248 * TPI VIOLATIONS
249 * --------------
250 *
251 * This driver violates TPI in several respects for Unix Domain Sockets:
252 *
253 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
254 * is requested and the endpoint is already in use. There is no point in
255 * generating an unused address since this address will be rejected by
256 * sockfs anyway. For implicit binds it always generates a new address
257 * (sets soua_vp to its minor number).
258 *
259 * 2) It always uses minor number as acceptor ID and never uses queue
260 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
261 * message and they do not use the queue pointer.
262 *
263 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
264 * followed by listen(). The listen() should be issued with non-zero
265 * backlog, so sotpi_listen() issues unbind request followed by bind
266 * request to the same address but with a non-zero qlen value. Both
267 * tl_bind() and tl_unbind() require write lock on the hash table to
268 * insert/remove the address. The driver does not remove the address from
269 * the hash for endpoints that are bound to the explicit address and have
270 * backlog of zero. During T_BIND_REQ processing if the address requested
271 * is equal to the address the endpoint already has it updates the backlog
272 * without reinserting the address in the hash table. This optimization
273 * avoids two hash table updates for each listener created. It always
274 * avoids the problem of a "stolen" address when another listener may use
275 * the same address between the unbind and bind and suddenly listen() fails
276 * because address is in use even though the bind() succeeded.
277 *
278 *
279 * CONNECTIONLESS TRANSPORTS
280 * =========================
281 *
282 * Connectionless transports all share the same serializer (one for TLI and one
283 * for Sockets). Functions executing behind serializer can check or modify state
284 * of any endpoint.
285 *
286 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
287 * te_lastep field. The next time X talks to some address A it checks whether A
288 * is the same as Y's address and if it is there is no need to lookup Y. If the
289 * address is different or the state of Y is not appropriate (e.g. closed or not
290 * idle) X does a lookup using tl_find_peer() and caches the new address.
291 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
292 * on the endpoint found.
293 *
294 * During close of endpoint Y it doesn't try to remove itself from other
295 * endpoints caches. They will detect that Y is gone and will search the peer
296 * endpoint again.
297 *
298 * Flow Control Handling.
299 * ----------------------
300 *
301 * Each connectionless endpoint keeps a list of endpoints which are
302 * flow-controlled by its queue. It also keeps a pointer to the queue which
303 * flow-controls itself. Whenever flow control releases for endpoint X it
304 * enables all queues from the list. During close it also back-enables everyone
305 * in the list. If X is flow-controlled when it is closing it removes it from
306 * the peers list.
307 *
308 * DATA STRUCTURES
309 * ===============
310 *
311 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
312 * endpoint state. For connection-oriented transports it has a keeps a list
313 * of pending connections (tl_icon_t). For connectionless transports it keeps a
314 * list of endpoints flow controlled by this one.
315 *
316 * Each transport type is represented by a per-transport data structure
317 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
318 * endpoint address hash tables for each transport. It also contains pointer to
319 * transport serializer for connectionless transports.
320 *
321 * Each endpoint keeps a link to its transport structure, so the code can find
322 * all per-transport information quickly.
323 */
324
325 #include <sys/types.h>
326 #include <sys/inttypes.h>
327 #include <sys/stream.h>
328 #include <sys/stropts.h>
329 #define _SUN_TPI_VERSION 2
330 #include <sys/tihdr.h>
331 #include <sys/strlog.h>
332 #include <sys/debug.h>
333 #include <sys/cred.h>
334 #include <sys/errno.h>
335 #include <sys/kmem.h>
336 #include <sys/id_space.h>
337 #include <sys/modhash.h>
338 #include <sys/mkdev.h>
339 #include <sys/tl.h>
340 #include <sys/stat.h>
341 #include <sys/conf.h>
342 #include <sys/modctl.h>
343 #include <sys/strsun.h>
344 #include <sys/socket.h>
345 #include <sys/socketvar.h>
346 #include <sys/sysmacros.h>
347 #include <sys/xti_xtiopt.h>
348 #include <sys/ddi.h>
349 #include <sys/sunddi.h>
350 #include <sys/zone.h>
351 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
352 #include <inet/optcom.h>
353 #include <sys/strsubr.h>
354 #include <sys/ucred.h>
355 #include <sys/suntpi.h>
356 #include <sys/list.h>
357 #include <sys/serializer.h>
358
359 /*
360 * TBD List
361 * 14 Eliminate state changes through table
362 * 16. AF_UNIX socket options
363 * 17. connect() for ticlts
364 * 18. support for "netstat" to show AF_UNIX plus TLI local
365 * transport connections
366 * 21. sanity check to flushing on sending M_ERROR
367 */
368
369 /*
370 * CONSTANT DECLARATIONS
371 * --------------------
372 */
373
374 /*
375 * Local declarations
376 */
377 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
378
379 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
380 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
381 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
382 /*
383 * Hash tables size.
384 */
385 #define TL_HASH_SIZE 311
386
387 /*
388 * Definitions for module_info
389 */
390 #define TL_ID (104) /* module ID number */
391 #define TL_NAME "tl" /* module name */
392 #define TL_MINPSZ (0) /* min packet size */
393 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
394 #define TL_HIWAT (16*1024) /* hi water mark */
395 #define TL_LOWAT (256) /* lo water mark */
396 /*
397 * Definition of minor numbers/modes for new transport provider modes.
398 * We view the socket use as a separate mode to get a separate name space.
399 */
400 #define TL_TICOTS 0 /* connection oriented transport */
401 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
402 #define TL_TICLTS 2 /* connectionless transport */
403 #define TL_UNUSED 3
404 #define TL_SOCKET 4 /* Socket */
405 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
406 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
407 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
408
409 #define TL_MINOR_MASK 0x7
410 #define TL_MINOR_START (TL_TICLTS + 1)
411
412 /*
413 * LOCAL MACROS
414 */
415 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
416
417 /*
418 * EXTERNAL VARIABLE DECLARATIONS
419 * -----------------------------
420 */
421 /*
422 * state table defined in the OS space.c
423 */
424 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
425
426 /*
427 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
428 */
429 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
430 static int tl_close(queue_t *, int, cred_t *);
431 static void tl_wput(queue_t *, mblk_t *);
432 static void tl_wsrv(queue_t *);
433 static void tl_rsrv(queue_t *);
434
435 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
436 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
437 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
438
439
440 /*
441 * GLOBAL DATA STRUCTURES AND VARIABLES
442 * -----------------------------------
443 */
444
445 /*
446 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
447 * For now, we only manage the SO_RECVUCRED option but we also have
448 * harmless dummy options to make things work with some common code we access.
449 */
450 opdes_t tl_opt_arr[] = {
451 /* The SO_TYPE is needed for the hack below */
452 {
453 SO_TYPE,
454 SOL_SOCKET,
455 OA_R,
456 OA_R,
457 OP_NP,
458 0,
459 sizeof (t_scalar_t),
460 0
461 },
462 {
463 SO_RECVUCRED,
464 SOL_SOCKET,
465 OA_RW,
466 OA_RW,
467 OP_NP,
468 0,
469 sizeof (int),
470 0
471 }
472 };
473
474 /*
475 * Table of all supported levels
476 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
477 * any supported options so we need this info separately.
478 *
479 * This is needed only for topmost tpi providers.
480 */
481 optlevel_t tl_valid_levels_arr[] = {
482 XTI_GENERIC,
483 SOL_SOCKET,
484 TL_PROT_LEVEL
485 };
486
487 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
488 /*
489 * Current upper bound on the amount of space needed to return all options.
490 * Additional options with data size of sizeof(long) are handled automatically.
491 * Others need hand job.
492 */
493 #define TL_MAX_OPT_BUF_LEN \
494 ((A_CNT(tl_opt_arr) << 2) + \
495 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
496 + 64 + sizeof (struct T_optmgmt_ack))
497
498 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
499
500 /*
501 * transport addr structure
502 */
503 typedef struct tl_addr {
504 zoneid_t ta_zoneid; /* Zone scope of address */
505 t_scalar_t ta_alen; /* length of abuf */
506 void *ta_abuf; /* the addr itself */
507 } tl_addr_t;
508
509 /*
510 * Refcounted version of serializer.
511 */
512 typedef struct tl_serializer {
513 uint_t ts_refcnt;
514 serializer_t *ts_serializer;
515 } tl_serializer_t;
516
517 /*
518 * Each transport type has a separate state.
519 * Per-transport state.
520 */
521 typedef struct tl_transport_state {
522 char *tr_name;
523 minor_t tr_minor;
524 uint32_t tr_defaddr;
525 mod_hash_t *tr_ai_hash;
526 mod_hash_t *tr_addr_hash;
527 tl_serializer_t *tr_serializer;
528 } tl_transport_state_t;
529
530 #define TL_DFADDR 0x1000
531
532 static tl_transport_state_t tl_transports[] = {
533 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
534 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
535 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
536 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
537 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
538 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
539 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
540 };
541
542 #define TL_MAXTRANSPORT A_CNT(tl_transports)
543
544 struct tl_endpt;
545 typedef struct tl_endpt tl_endpt_t;
546
547 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
548
549 /*
550 * Data structure used to represent pending connects.
551 * Records enough information so that the connecting peer can close
552 * before the connection gets accepted.
553 */
554 typedef struct tl_icon {
555 list_node_t ti_node;
556 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
557 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
558 t_scalar_t ti_seqno; /* Sequence number */
559 } tl_icon_t;
560
561 typedef struct so_ux_addr soux_addr_t;
562 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
563
564 /*
565 * Maximum number of unaccepted connection indications allowed per listener.
566 */
567 #define TL_MAXQLEN 4096
568 int tl_maxqlen = TL_MAXQLEN;
569
570 /*
571 * transport endpoint structure
572 */
573 struct tl_endpt {
574 queue_t *te_rq; /* stream read queue */
575 queue_t *te_wq; /* stream write queue */
576 uint32_t te_refcnt;
577 int32_t te_state; /* TPI state of endpoint */
578 minor_t te_minor; /* minor number */
579 #define te_seqno te_minor
580 uint_t te_flag; /* flag field */
581 boolean_t te_nowsrv;
582 tl_serializer_t *te_ser; /* Serializer to use */
583 #define te_serializer te_ser->ts_serializer
584
585 soux_addr_t te_uxaddr; /* Socket address */
586 #define te_magic te_uxaddr.soua_magic
587 #define te_vp te_uxaddr.soua_vp
588 tl_addr_t te_ap; /* addr bound to this endpt */
589 #define te_zoneid te_ap.ta_zoneid
590 #define te_alen te_ap.ta_alen
591 #define te_abuf te_ap.ta_abuf
592
593 tl_transport_state_t *te_transport;
594 #define te_addrhash te_transport->tr_addr_hash
595 #define te_aihash te_transport->tr_ai_hash
596 #define te_defaddr te_transport->tr_defaddr
597 cred_t *te_credp; /* endpoint user credentials */
598 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
599
600 /*
601 * State specific for connection-oriented and connectionless transports.
602 */
603 union {
604 /* Connection-oriented state. */
605 struct {
606 t_uscalar_t _te_nicon; /* count of conn requests */
607 t_uscalar_t _te_qlen; /* max conn requests */
608 tl_endpt_t *_te_oconp; /* conn request pending */
609 tl_endpt_t *_te_conp; /* connected endpt */
610 #ifndef _ILP32
611 void *_te_pad;
612 #endif
613 list_t _te_iconp; /* list of conn ind. pending */
614 } _te_cots_state;
615 /* Connection-less state. */
616 struct {
617 tl_endpt_t *_te_lastep; /* last dest. endpoint */
618 tl_endpt_t *_te_flowq; /* flow controlled on whom */
619 list_node_t _te_flows; /* lists of connections */
620 list_t _te_flowlist; /* Who flowcontrols on me */
621 } _te_clts_state;
622 } _te_transport_state;
623 #define te_nicon _te_transport_state._te_cots_state._te_nicon
624 #define te_qlen _te_transport_state._te_cots_state._te_qlen
625 #define te_oconp _te_transport_state._te_cots_state._te_oconp
626 #define te_conp _te_transport_state._te_cots_state._te_conp
627 #define te_iconp _te_transport_state._te_cots_state._te_iconp
628 #define te_lastep _te_transport_state._te_clts_state._te_lastep
629 #define te_flowq _te_transport_state._te_clts_state._te_flowq
630 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
631 #define te_flows _te_transport_state._te_clts_state._te_flows
632
633 bufcall_id_t te_bufcid; /* outstanding bufcall id */
634 timeout_id_t te_timoutid; /* outstanding timeout id */
635 pid_t te_cpid; /* cached pid of endpoint */
636 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
637 /*
638 * Pieces of the endpoint state needed for closing.
639 */
640 kmutex_t te_closelock;
641 kcondvar_t te_closecv;
642 uint8_t te_closing; /* The endpoint started closing */
643 uint8_t te_closewait; /* Wait in close until zero */
644 mblk_t te_closemp; /* for entering serializer on close */
645 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
646 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
647 kmutex_t te_srv_lock;
648 kcondvar_t te_srv_cv;
649 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
650 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
651 /*
652 * Pieces of the endpoint state needed for serializer transitions.
653 */
654 kmutex_t te_ser_lock; /* Protects the count below */
655 uint_t te_ser_count; /* Number of messages on serializer */
656 };
657
658 /*
659 * Flag values. Lower 4 bits specify that transport used.
660 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
661 * they allow to identify the endpoint more easily.
662 */
663 #define TL_LISTENER 0x00010 /* the listener endpoint */
664 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
665 #define TL_EAGER 0x00040 /* connecting endpoint */
666 #define TL_ACCEPTED 0x00080 /* accepted connection */
667 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
668 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
669 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
670 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
671 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
672 /*
673 * Boolean checks for the endpoint type.
674 */
675 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
676 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
677 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
678 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
679
680 /*
681 * Certain operations are always used together. These macros reduce the chance
682 * of missing a part of a combination.
683 */
684 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
685 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
686
687 #define TL_PUTBQ(x, mp) { \
688 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
689 (x)->te_nowsrv = B_TRUE; \
690 (void) putbq((x)->te_wq, mp); \
691 }
692
693 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
694 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
695
696 /*
697 * STREAMS driver glue data structures.
698 */
699 static struct module_info tl_minfo = {
700 TL_ID, /* mi_idnum */
701 TL_NAME, /* mi_idname */
702 TL_MINPSZ, /* mi_minpsz */
703 TL_MAXPSZ, /* mi_maxpsz */
704 TL_HIWAT, /* mi_hiwat */
705 TL_LOWAT /* mi_lowat */
706 };
707
708 static struct qinit tl_rinit = {
709 NULL, /* qi_putp */
710 (int (*)())tl_rsrv, /* qi_srvp */
711 tl_open, /* qi_qopen */
712 tl_close, /* qi_qclose */
713 NULL, /* qi_qadmin */
714 &tl_minfo, /* qi_minfo */
715 NULL /* qi_mstat */
716 };
717
718 static struct qinit tl_winit = {
719 (int (*)())tl_wput, /* qi_putp */
720 (int (*)())tl_wsrv, /* qi_srvp */
721 NULL, /* qi_qopen */
722 NULL, /* qi_qclose */
723 NULL, /* qi_qadmin */
724 &tl_minfo, /* qi_minfo */
725 NULL /* qi_mstat */
726 };
727
728 static struct streamtab tlinfo = {
729 &tl_rinit, /* st_rdinit */
730 &tl_winit, /* st_wrinit */
731 NULL, /* st_muxrinit */
732 NULL /* st_muxwrinit */
733 };
734
735 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
736 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
737
738 static struct modldrv modldrv = {
739 &mod_driverops, /* Type of module -- pseudo driver here */
740 "TPI Local Transport (tl)",
741 &tl_devops, /* driver ops */
742 };
743
744 /*
745 * Module linkage information for the kernel.
746 */
747 static struct modlinkage modlinkage = {
748 MODREV_1,
749 &modldrv,
750 NULL
751 };
752
753 /*
754 * Templates for response to info request
755 * Check sanity of unlimited connect data etc.
756 */
757
758 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
759 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
760
761 static struct T_info_ack tl_cots_info_ack =
762 {
763 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
764 T_INFINITE, /* TSDU size */
765 T_INFINITE, /* ETSDU size */
766 T_INFINITE, /* CDATA_size */
767 T_INFINITE, /* DDATA_size */
768 T_INFINITE, /* ADDR_size */
769 T_INFINITE, /* OPT_size */
770 0, /* TIDU_size - fill at run time */
771 T_COTS, /* SERV_type */
772 -1, /* CURRENT_state */
773 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
774 };
775
776 static struct T_info_ack tl_clts_info_ack =
777 {
778 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
779 0, /* TSDU_size - fill at run time */
780 -2, /* ETSDU_size -2 => not supported */
781 -2, /* CDATA_size -2 => not supported */
782 -2, /* DDATA_size -2 => not supported */
783 -1, /* ADDR_size -1 => infinite */
784 -1, /* OPT_size */
785 0, /* TIDU_size - fill at run time */
786 T_CLTS, /* SERV_type */
787 -1, /* CURRENT_state */
788 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
789 };
790
791 /*
792 * private copy of devinfo pointer used in tl_info
793 */
794 static dev_info_t *tl_dip;
795
796 /*
797 * Endpoints cache.
798 */
799 static kmem_cache_t *tl_cache;
800 /*
801 * Minor number space.
802 */
803 static id_space_t *tl_minors;
804
805 /*
806 * Default Data Unit size.
807 */
808 static t_scalar_t tl_tidusz;
809
810 /*
811 * Size of hash tables.
812 */
813 static size_t tl_hash_size = TL_HASH_SIZE;
814
815 /*
816 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
817 * for sockets.
818 */
819 static int tl_disable_early_connect = 0;
820 static int tl_client_closing_when_accepting;
821
822 static int tl_serializer_noswitch;
823
824 /*
825 * LOCAL FUNCTION PROTOTYPES
826 * -------------------------
827 */
828 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
829 static void tl_do_proto(mblk_t *, tl_endpt_t *);
830 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
831 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
832 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
833 t_scalar_t);
834 static void tl_bind(mblk_t *, tl_endpt_t *);
835 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
836 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
837 static void tl_unbind(mblk_t *, tl_endpt_t *);
838 static void tl_optmgmt(queue_t *, mblk_t *);
839 static void tl_conn_req(queue_t *, mblk_t *);
840 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
841 static void tl_conn_res(mblk_t *, tl_endpt_t *);
842 static void tl_discon_req(mblk_t *, tl_endpt_t *);
843 static void tl_capability_req(mblk_t *, tl_endpt_t *);
844 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
845 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
846 static void tl_info_req(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req(mblk_t *, tl_endpt_t *);
848 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
849 static void tl_data(mblk_t *, tl_endpt_t *);
850 static void tl_exdata(mblk_t *, tl_endpt_t *);
851 static void tl_ordrel(mblk_t *, tl_endpt_t *);
852 static void tl_unitdata(mblk_t *, tl_endpt_t *);
853 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
854 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
855 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
856 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
857 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
858 static void tl_cl_backenable(tl_endpt_t *);
859 static void tl_co_unconnect(tl_endpt_t *);
860 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
861 static void tl_discon_ind(tl_endpt_t *, uint32_t);
862 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
863 static mblk_t *tl_ordrel_ind_alloc(void);
864 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
865 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
866 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
867 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
868 static void tl_icon_freemsgs(mblk_t **);
869 static void tl_merror(queue_t *, mblk_t *, int);
870 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
871 static int tl_default_opt(queue_t *, int, int, uchar_t *);
872 static int tl_get_opt(queue_t *, int, int, uchar_t *);
873 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
874 uchar_t *, void *, cred_t *);
875 static void tl_memrecover(queue_t *, mblk_t *, size_t);
876 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
877 static void tl_free(tl_endpt_t *);
878 static int tl_constructor(void *, void *, int);
879 static void tl_destructor(void *, void *);
880 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
881 static tl_serializer_t *tl_serializer_alloc(int);
882 static void tl_serializer_refhold(tl_serializer_t *);
883 static void tl_serializer_refrele(tl_serializer_t *);
884 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
885 static void tl_serializer_exit(tl_endpt_t *);
886 static boolean_t tl_noclose(tl_endpt_t *);
887 static void tl_closeok(tl_endpt_t *);
888 static void tl_refhold(tl_endpt_t *);
889 static void tl_refrele(tl_endpt_t *);
890 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
891 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
892 static void tl_close_ser(mblk_t *, tl_endpt_t *);
893 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
894 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
895 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
896 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
897 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
898 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
900 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
901 static void tl_addr_unbind(tl_endpt_t *);
902
903 /*
904 * Intialize option database object for TL
905 */
906
907 optdb_obj_t tl_opt_obj = {
908 tl_default_opt, /* TL default value function pointer */
909 tl_get_opt, /* TL get function pointer */
910 tl_set_opt, /* TL set function pointer */
911 TL_OPT_ARR_CNT, /* TL option database count of entries */
912 tl_opt_arr, /* TL option database */
913 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
914 tl_valid_levels_arr /* TL valid level array */
915 };
916
917 /*
918 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
919 * ---------------------------------------
920 */
921
922 /*
923 * Loadable module routines
924 */
925 int
926 _init(void)
927 {
928 return (mod_install(&modlinkage));
929 }
930
931 int
932 _fini(void)
933 {
934 return (mod_remove(&modlinkage));
935 }
936
937 int
938 _info(struct modinfo *modinfop)
939 {
940 return (mod_info(&modlinkage, modinfop));
941 }
942
943 /*
944 * Driver Entry Points and Other routines
945 */
946 static int
947 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
948 {
949 int i;
950 char name[32];
951
952 /*
953 * Resume from a checkpoint state.
954 */
955 if (cmd == DDI_RESUME)
956 return (DDI_SUCCESS);
957
958 if (cmd != DDI_ATTACH)
959 return (DDI_FAILURE);
960
961 /*
962 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
963 * streams message sizes can be unlimited. We use a defined constant
964 * instead.
965 */
966 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
967
968 /*
969 * Create subdevices for each transport.
970 */
971 for (i = 0; i < TL_UNUSED; i++) {
972 if (ddi_create_minor_node(devi,
973 tl_transports[i].tr_name,
974 S_IFCHR, tl_transports[i].tr_minor,
975 DDI_PSEUDO, NULL) == DDI_FAILURE) {
976 ddi_remove_minor_node(devi, NULL);
977 return (DDI_FAILURE);
978 }
979 }
980
981 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
982 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
983
984 if (tl_cache == NULL) {
985 ddi_remove_minor_node(devi, NULL);
986 return (DDI_FAILURE);
987 }
988
989 tl_minors = id_space_create("tl_minor_space",
990 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
991
992 /*
993 * Create ID space for minor numbers
994 */
995 for (i = 0; i < TL_MAXTRANSPORT; i++) {
996 tl_transport_state_t *t = &tl_transports[i];
997
998 if (i == TL_UNUSED)
999 continue;
1000
1001 /* Socket COTSORD shares namespace with COTS */
1002 if (i == TL_SOCK_COTSORD) {
1003 t->tr_ai_hash =
1004 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1005 ASSERT(t->tr_ai_hash != NULL);
1006 t->tr_addr_hash =
1007 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1008 ASSERT(t->tr_addr_hash != NULL);
1009 continue;
1010 }
1011
1012 /*
1013 * Create hash tables.
1014 */
1015 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1016 t->tr_name);
1017 #ifdef _ILP32
1018 if (i & TL_SOCKET)
1019 t->tr_ai_hash =
1020 mod_hash_create_idhash(name, tl_hash_size - 1,
1021 mod_hash_null_valdtor);
1022 else
1023 t->tr_ai_hash =
1024 mod_hash_create_ptrhash(name, tl_hash_size,
1025 mod_hash_null_valdtor, sizeof (queue_t));
1026 #else
1027 t->tr_ai_hash =
1028 mod_hash_create_idhash(name, tl_hash_size - 1,
1029 mod_hash_null_valdtor);
1030 #endif /* _ILP32 */
1031
1032 if (i & TL_SOCKET) {
1033 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1034 t->tr_name);
1035 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1036 tl_hash_size, mod_hash_null_valdtor,
1037 sizeof (uintptr_t));
1038 } else {
1039 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1040 t->tr_name);
1041 t->tr_addr_hash = mod_hash_create_extended(name,
1042 tl_hash_size, mod_hash_null_keydtor,
1043 mod_hash_null_valdtor,
1044 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1045 }
1046
1047 /* Create serializer for connectionless transports. */
1048 if (i & TL_TICLTS)
1049 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1050 }
1051
1052 tl_dip = devi;
1053
1054 return (DDI_SUCCESS);
1055 }
1056
1057 static int
1058 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1059 {
1060 int i;
1061
1062 if (cmd == DDI_SUSPEND)
1063 return (DDI_SUCCESS);
1064
1065 if (cmd != DDI_DETACH)
1066 return (DDI_FAILURE);
1067
1068 /*
1069 * Destroy arenas and hash tables.
1070 */
1071 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1072 tl_transport_state_t *t = &tl_transports[i];
1073
1074 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1075 continue;
1076
1077 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1078 if (t->tr_serializer != NULL) {
1079 tl_serializer_refrele(t->tr_serializer);
1080 t->tr_serializer = NULL;
1081 }
1082
1083 #ifdef _ILP32
1084 if (i & TL_SOCKET)
1085 mod_hash_destroy_idhash(t->tr_ai_hash);
1086 else
1087 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1088 #else
1089 mod_hash_destroy_idhash(t->tr_ai_hash);
1090 #endif /* _ILP32 */
1091 t->tr_ai_hash = NULL;
1092 if (i & TL_SOCKET)
1093 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1094 else
1095 mod_hash_destroy_hash(t->tr_addr_hash);
1096 t->tr_addr_hash = NULL;
1097 }
1098
1099 kmem_cache_destroy(tl_cache);
1100 tl_cache = NULL;
1101 id_space_destroy(tl_minors);
1102 tl_minors = NULL;
1103 ddi_remove_minor_node(devi, NULL);
1104 return (DDI_SUCCESS);
1105 }
1106
1107 /* ARGSUSED */
1108 static int
1109 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1110 {
1111
1112 int retcode = DDI_FAILURE;
1113
1114 switch (infocmd) {
1115
1116 case DDI_INFO_DEVT2DEVINFO:
1117 if (tl_dip != NULL) {
1118 *result = (void *)tl_dip;
1119 retcode = DDI_SUCCESS;
1120 }
1121 break;
1122
1123 case DDI_INFO_DEVT2INSTANCE:
1124 *result = (void *)0;
1125 retcode = DDI_SUCCESS;
1126 break;
1127
1128 default:
1129 break;
1130 }
1131 return (retcode);
1132 }
1133
1134 /*
1135 * Endpoint reference management.
1136 */
1137 static void
1138 tl_refhold(tl_endpt_t *tep)
1139 {
1140 atomic_add_32(&tep->te_refcnt, 1);
1141 }
1142
1143 static void
1144 tl_refrele(tl_endpt_t *tep)
1145 {
1146 ASSERT(tep->te_refcnt != 0);
1147
1148 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1149 tl_free(tep);
1150 }
1151
1152 /*ARGSUSED*/
1153 static int
1154 tl_constructor(void *buf, void *cdrarg, int kmflags)
1155 {
1156 tl_endpt_t *tep = buf;
1157
1158 bzero(tep, sizeof (tl_endpt_t));
1159 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1160 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1161 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1162 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1163 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1164
1165 return (0);
1166 }
1167
1168 /*ARGSUSED*/
1169 static void
1170 tl_destructor(void *buf, void *cdrarg)
1171 {
1172 tl_endpt_t *tep = buf;
1173
1174 mutex_destroy(&tep->te_closelock);
1175 cv_destroy(&tep->te_closecv);
1176 mutex_destroy(&tep->te_srv_lock);
1177 cv_destroy(&tep->te_srv_cv);
1178 mutex_destroy(&tep->te_ser_lock);
1179 }
1180
1181 static void
1182 tl_free(tl_endpt_t *tep)
1183 {
1184 ASSERT(tep->te_refcnt == 0);
1185 ASSERT(tep->te_transport != NULL);
1186 ASSERT(tep->te_rq == NULL);
1187 ASSERT(tep->te_wq == NULL);
1188 ASSERT(tep->te_ser != NULL);
1189 ASSERT(tep->te_ser_count == 0);
1190 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1191
1192 if (IS_SOCKET(tep)) {
1193 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1194 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1195 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1196 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1197 } else if (tep->te_abuf != NULL) {
1198 kmem_free(tep->te_abuf, tep->te_alen);
1199 tep->te_alen = -1; /* uninitialized */
1200 tep->te_abuf = NULL;
1201 } else {
1202 ASSERT(tep->te_alen == -1);
1203 }
1204
1205 id_free(tl_minors, tep->te_minor);
1206 ASSERT(tep->te_credp == NULL);
1207
1208 if (tep->te_hash_hndl != NULL)
1209 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1210
1211 if (IS_COTS(tep)) {
1212 TL_REMOVE_PEER(tep->te_conp);
1213 TL_REMOVE_PEER(tep->te_oconp);
1214 tl_serializer_refrele(tep->te_ser);
1215 tep->te_ser = NULL;
1216 ASSERT(tep->te_nicon == 0);
1217 ASSERT(list_head(&tep->te_iconp) == NULL);
1218 } else {
1219 ASSERT(tep->te_lastep == NULL);
1220 ASSERT(list_head(&tep->te_flowlist) == NULL);
1221 ASSERT(tep->te_flowq == NULL);
1222 }
1223
1224 ASSERT(tep->te_bufcid == 0);
1225 ASSERT(tep->te_timoutid == 0);
1226 bzero(&tep->te_ap, sizeof (tep->te_ap));
1227 tep->te_acceptor_id = 0;
1228
1229 ASSERT(tep->te_closewait == 0);
1230 ASSERT(!tep->te_rsrv_active);
1231 ASSERT(!tep->te_wsrv_active);
1232 tep->te_closing = 0;
1233 tep->te_nowsrv = B_FALSE;
1234 tep->te_flag = 0;
1235
1236 kmem_cache_free(tl_cache, tep);
1237 }
1238
1239 /*
1240 * Allocate/free reference-counted wrappers for serializers.
1241 */
1242 static tl_serializer_t *
1243 tl_serializer_alloc(int flags)
1244 {
1245 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1246 serializer_t *ser;
1247
1248 if (s == NULL)
1249 return (NULL);
1250
1251 ser = serializer_create(flags);
1252
1253 if (ser == NULL) {
1254 kmem_free(s, sizeof (tl_serializer_t));
1255 return (NULL);
1256 }
1257
1258 s->ts_refcnt = 1;
1259 s->ts_serializer = ser;
1260 return (s);
1261 }
1262
1263 static void
1264 tl_serializer_refhold(tl_serializer_t *s)
1265 {
1266 atomic_add_32(&s->ts_refcnt, 1);
1267 }
1268
1269 static void
1270 tl_serializer_refrele(tl_serializer_t *s)
1271 {
1272 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1273 serializer_destroy(s->ts_serializer);
1274 kmem_free(s, sizeof (tl_serializer_t));
1275 }
1276 }
1277
1278 /*
1279 * Post a request on the endpoint serializer. For COTS transports keep track of
1280 * the number of pending requests.
1281 */
1282 static void
1283 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1284 {
1285 if (IS_COTS(tep)) {
1286 mutex_enter(&tep->te_ser_lock);
1287 tep->te_ser_count++;
1288 mutex_exit(&tep->te_ser_lock);
1289 }
1290 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1291 }
1292
1293 /*
1294 * Complete processing the request on the serializer. Decrement the counter for
1295 * pending requests for COTS transports.
1296 */
1297 static void
1298 tl_serializer_exit(tl_endpt_t *tep)
1299 {
1300 if (IS_COTS(tep)) {
1301 mutex_enter(&tep->te_ser_lock);
1302 ASSERT(tep->te_ser_count != 0);
1303 tep->te_ser_count--;
1304 mutex_exit(&tep->te_ser_lock);
1305 }
1306 }
1307
1308 /*
1309 * Hash management functions.
1310 */
1311
1312 /*
1313 * Return TRUE if two addresses are equal, false otherwise.
1314 */
1315 static boolean_t
1316 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1317 {
1318 return ((ap1->ta_alen > 0) &&
1319 (ap1->ta_alen == ap2->ta_alen) &&
1320 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1321 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1322 }
1323
1324 /*
1325 * This function is called whenever an endpoint is found in the hash table.
1326 */
1327 /* ARGSUSED0 */
1328 static void
1329 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1330 {
1331 tl_refhold((tl_endpt_t *)val);
1332 }
1333
1334 /*
1335 * Address hash function.
1336 */
1337 /* ARGSUSED */
1338 static uint_t
1339 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1340 {
1341 tl_addr_t *ap = (tl_addr_t *)key;
1342 size_t len = ap->ta_alen;
1343 uchar_t *p = ap->ta_abuf;
1344 uint_t i, g;
1345
1346 ASSERT((len > 0) && (p != NULL));
1347
1348 for (i = ap->ta_zoneid; len -- != 0; p++) {
1349 i = (i << 4) + (*p);
1350 if ((g = (i & 0xf0000000U)) != 0) {
1351 i ^= (g >> 24);
1352 i ^= g;
1353 }
1354 }
1355 return (i);
1356 }
1357
1358 /*
1359 * This function is used by hash lookups. It compares two generic addresses.
1360 */
1361 static int
1362 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1363 {
1364 #ifdef DEBUG
1365 tl_addr_t *ap1 = (tl_addr_t *)key1;
1366 tl_addr_t *ap2 = (tl_addr_t *)key2;
1367
1368 ASSERT(key1 != NULL);
1369 ASSERT(key2 != NULL);
1370
1371 ASSERT(ap1->ta_abuf != NULL);
1372 ASSERT(ap2->ta_abuf != NULL);
1373 ASSERT(ap1->ta_alen > 0);
1374 ASSERT(ap2->ta_alen > 0);
1375 #endif
1376
1377 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1378 }
1379
1380 /*
1381 * Prevent endpoint from closing if possible.
1382 * Return B_TRUE on success, B_FALSE on failure.
1383 */
1384 static boolean_t
1385 tl_noclose(tl_endpt_t *tep)
1386 {
1387 boolean_t rc = B_FALSE;
1388
1389 mutex_enter(&tep->te_closelock);
1390 if (! tep->te_closing) {
1391 ASSERT(tep->te_closewait == 0);
1392 tep->te_closewait++;
1393 rc = B_TRUE;
1394 }
1395 mutex_exit(&tep->te_closelock);
1396 return (rc);
1397 }
1398
1399 /*
1400 * Allow endpoint to close if needed.
1401 */
1402 static void
1403 tl_closeok(tl_endpt_t *tep)
1404 {
1405 ASSERT(tep->te_closewait > 0);
1406 mutex_enter(&tep->te_closelock);
1407 ASSERT(tep->te_closewait == 1);
1408 tep->te_closewait--;
1409 cv_signal(&tep->te_closecv);
1410 mutex_exit(&tep->te_closelock);
1411 }
1412
1413 /*
1414 * STREAMS open entry point.
1415 */
1416 /* ARGSUSED */
1417 static int
1418 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1419 {
1420 tl_endpt_t *tep;
1421 minor_t minor = getminor(*devp);
1422
1423 /*
1424 * Driver is called directly. Both CLONEOPEN and MODOPEN
1425 * are illegal
1426 */
1427 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1428 return (ENXIO);
1429
1430 if (rq->q_ptr != NULL)
1431 return (0);
1432
1433 /* Minor number should specify the mode used for the driver. */
1434 if ((minor >= TL_UNUSED))
1435 return (ENXIO);
1436
1437 if (oflag & SO_SOCKSTR) {
1438 minor |= TL_SOCKET;
1439 }
1440
1441 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1442 tep->te_refcnt = 1;
1443 tep->te_cpid = curproc->p_pid;
1444 rq->q_ptr = WR(rq)->q_ptr = tep;
1445 tep->te_state = TS_UNBND;
1446 tep->te_credp = credp;
1447 crhold(credp);
1448 tep->te_zoneid = getzoneid();
1449
1450 tep->te_flag = minor & TL_MINOR_MASK;
1451 tep->te_transport = &tl_transports[minor];
1452
1453 /* Allocate a unique minor number for this instance. */
1454 tep->te_minor = (minor_t)id_alloc(tl_minors);
1455
1456 /* Reserve hash handle for bind(). */
1457 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1458
1459 /* Transport-specific initialization */
1460 if (IS_COTS(tep)) {
1461 /* Use private serializer */
1462 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1463
1464 /* Create list for pending connections */
1465 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1466 offsetof(tl_icon_t, ti_node));
1467 tep->te_qlen = 0;
1468 tep->te_nicon = 0;
1469 tep->te_oconp = NULL;
1470 tep->te_conp = NULL;
1471 } else {
1472 /* Use shared serializer */
1473 tep->te_ser = tep->te_transport->tr_serializer;
1474 bzero(&tep->te_flows, sizeof (list_node_t));
1475 /* Create list for flow control */
1476 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1477 offsetof(tl_endpt_t, te_flows));
1478 tep->te_flowq = NULL;
1479 tep->te_lastep = NULL;
1480
1481 }
1482
1483 /* Initialize endpoint address */
1484 if (IS_SOCKET(tep)) {
1485 /* Socket-specific address handling. */
1486 tep->te_alen = TL_SOUX_ADDRLEN;
1487 tep->te_abuf = &tep->te_uxaddr;
1488 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1489 tep->te_magic = SOU_MAGIC_IMPLICIT;
1490 } else {
1491 tep->te_alen = -1;
1492 tep->te_abuf = NULL;
1493 }
1494
1495 /* clone the driver */
1496 *devp = makedevice(getmajor(*devp), tep->te_minor);
1497
1498 tep->te_rq = rq;
1499 tep->te_wq = WR(rq);
1500
1501 #ifdef _ILP32
1502 if (IS_SOCKET(tep))
1503 tep->te_acceptor_id = tep->te_minor;
1504 else
1505 tep->te_acceptor_id = (t_uscalar_t)rq;
1506 #else
1507 tep->te_acceptor_id = tep->te_minor;
1508 #endif /* _ILP32 */
1509
1510
1511 qprocson(rq);
1512
1513 /*
1514 * Insert acceptor ID in the hash. The AI hash always sleeps on
1515 * insertion so insertion can't fail.
1516 */
1517 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1518 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1519 (mod_hash_val_t)tep);
1520
1521 return (0);
1522 }
1523
1524 /* ARGSUSED1 */
1525 static int
1526 tl_close(queue_t *rq, int flag, cred_t *credp)
1527 {
1528 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1529 tl_endpt_t *elp = NULL;
1530 queue_t *wq = tep->te_wq;
1531 int rc;
1532
1533 ASSERT(wq == WR(rq));
1534
1535 /*
1536 * Remove the endpoint from acceptor hash.
1537 */
1538 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1539 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1540 (mod_hash_val_t *)&elp);
1541 ASSERT(rc == 0 && tep == elp);
1542 if ((rc != 0) || (tep != elp)) {
1543 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1544 SL_TRACE|SL_ERROR,
1545 "tl_close:inconsistency in AI hash"));
1546 }
1547
1548 /*
1549 * Wait till close is safe, then mark endpoint as closing.
1550 */
1551 mutex_enter(&tep->te_closelock);
1552 while (tep->te_closewait)
1553 cv_wait(&tep->te_closecv, &tep->te_closelock);
1554 tep->te_closing = B_TRUE;
1555 /*
1556 * Will wait for the serializer part of the close to finish, so set
1557 * te_closewait now.
1558 */
1559 tep->te_closewait = 1;
1560 tep->te_nowsrv = B_FALSE;
1561 mutex_exit(&tep->te_closelock);
1562
1563 /*
1564 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1565 * It is safe because close will wait for tl_close_ser to finish.
1566 */
1567 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1568
1569 /*
1570 * Wait for the first phase of close to complete before qprocsoff().
1571 */
1572 mutex_enter(&tep->te_closelock);
1573 while (tep->te_closewait)
1574 cv_wait(&tep->te_closecv, &tep->te_closelock);
1575 mutex_exit(&tep->te_closelock);
1576
1577 qprocsoff(rq);
1578
1579 if (tep->te_bufcid) {
1580 qunbufcall(rq, tep->te_bufcid);
1581 tep->te_bufcid = 0;
1582 }
1583 if (tep->te_timoutid) {
1584 (void) quntimeout(rq, tep->te_timoutid);
1585 tep->te_timoutid = 0;
1586 }
1587
1588 /*
1589 * Finish close behind serializer.
1590 *
1591 * For a CLTS endpoint increase a refcount and continue close processing
1592 * with serializer protection. This processing may happen asynchronously
1593 * with the completion of tl_close().
1594 *
1595 * Fot a COTS endpoint wait before destroying tep since the serializer
1596 * may go away together with tep and we need to destroy serializer
1597 * outside of serializer context.
1598 */
1599 ASSERT(tep->te_closewait == 0);
1600 if (IS_COTS(tep))
1601 tep->te_closewait = 1;
1602 else
1603 tl_refhold(tep);
1604
1605 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1606
1607 /*
1608 * For connection-oriented transports wait for all serializer activity
1609 * to settle down.
1610 */
1611 if (IS_COTS(tep)) {
1612 mutex_enter(&tep->te_closelock);
1613 while (tep->te_closewait)
1614 cv_wait(&tep->te_closecv, &tep->te_closelock);
1615 mutex_exit(&tep->te_closelock);
1616 }
1617
1618 crfree(tep->te_credp);
1619 tep->te_credp = NULL;
1620 tep->te_wq = NULL;
1621 tl_refrele(tep);
1622 /*
1623 * tep is likely to be destroyed now, so can't reference it any more.
1624 */
1625
1626 rq->q_ptr = wq->q_ptr = NULL;
1627 return (0);
1628 }
1629
1630 /*
1631 * First phase of close processing done behind the serializer.
1632 *
1633 * Do not drop the reference in the end - tl_close() wants this reference to
1634 * stay.
1635 */
1636 /* ARGSUSED0 */
1637 static void
1638 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1639 {
1640 ASSERT(tep->te_closing);
1641 ASSERT(tep->te_closewait == 1);
1642 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1643
1644 tep->te_flag |= TL_CLOSE_SER;
1645
1646 /*
1647 * Drain out all messages on queue except for TL_TICOTS where the
1648 * abortive release semantics permit discarding of data on close
1649 */
1650 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1651 tl_wsrv_ser(NULL, tep);
1652 }
1653
1654 /* Remove address from hash table. */
1655 tl_addr_unbind(tep);
1656 /*
1657 * qprocsoff() gets confused when q->q_next is not NULL on the write
1658 * queue of the driver, so clear these before qprocsoff() is called.
1659 * Also clear q_next for the peer since this queue is going away.
1660 */
1661 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1662 tl_endpt_t *peer_tep = tep->te_conp;
1663
1664 tep->te_wq->q_next = NULL;
1665 if ((peer_tep != NULL) && !peer_tep->te_closing)
1666 peer_tep->te_wq->q_next = NULL;
1667 }
1668
1669 tep->te_rq = NULL;
1670
1671 /* wake up tl_close() */
1672 tl_closeok(tep);
1673 tl_serializer_exit(tep);
1674 }
1675
1676 /*
1677 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1678 * the reference for CLTS.
1679 *
1680 * Called from serializer. Should drop reference count for CLTS only.
1681 */
1682 /* ARGSUSED0 */
1683 static void
1684 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1685 {
1686 ASSERT(tep->te_closing);
1687 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1688 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1689
1690 tep->te_state = -1; /* Uninitialized */
1691 if (IS_COTS(tep)) {
1692 tl_co_unconnect(tep);
1693 } else {
1694 /* Connectionless specific cleanup */
1695 TL_REMOVE_PEER(tep->te_lastep);
1696 /*
1697 * Backenable anybody that is flow controlled waiting for
1698 * this endpoint.
1699 */
1700 tl_cl_backenable(tep);
1701 if (tep->te_flowq != NULL) {
1702 list_remove(&(tep->te_flowq->te_flowlist), tep);
1703 tep->te_flowq = NULL;
1704 }
1705 }
1706
1707 tl_serializer_exit(tep);
1708 if (IS_COTS(tep))
1709 tl_closeok(tep);
1710 else
1711 tl_refrele(tep);
1712 }
1713
1714 /*
1715 * STREAMS write-side put procedure.
1716 * Enter serializer for most of the processing.
1717 *
1718 * The T_CONN_REQ is processed outside of serializer.
1719 */
1720 static void
1721 tl_wput(queue_t *wq, mblk_t *mp)
1722 {
1723 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1724 ssize_t msz = MBLKL(mp);
1725 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1726 tlproc_t *tl_proc = NULL;
1727
1728 switch (DB_TYPE(mp)) {
1729 case M_DATA:
1730 /* Only valid for connection-oriented transports */
1731 if (IS_CLTS(tep)) {
1732 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1733 SL_TRACE|SL_ERROR,
1734 "tl_wput:M_DATA invalid for ticlts driver"));
1735 tl_merror(wq, mp, EPROTO);
1736 return;
1737 }
1738 tl_proc = tl_wput_data_ser;
1739 break;
1740
1741 case M_IOCTL:
1742 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1743 case TL_IOC_CREDOPT:
1744 /* FALLTHROUGH */
1745 case TL_IOC_UCREDOPT:
1746 /*
1747 * Serialize endpoint state change.
1748 */
1749 tl_proc = tl_do_ioctl_ser;
1750 break;
1751
1752 default:
1753 miocnak(wq, mp, 0, EINVAL);
1754 return;
1755 }
1756 break;
1757
1758 case M_FLUSH:
1759 /*
1760 * do canonical M_FLUSH processing
1761 */
1762 if (*mp->b_rptr & FLUSHW) {
1763 flushq(wq, FLUSHALL);
1764 *mp->b_rptr &= ~FLUSHW;
1765 }
1766 if (*mp->b_rptr & FLUSHR) {
1767 flushq(RD(wq), FLUSHALL);
1768 qreply(wq, mp);
1769 } else {
1770 freemsg(mp);
1771 }
1772 return;
1773
1774 case M_PROTO:
1775 if (msz < sizeof (prim->type)) {
1776 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1777 SL_TRACE|SL_ERROR,
1778 "tl_wput:M_PROTO data too short"));
1779 tl_merror(wq, mp, EPROTO);
1780 return;
1781 }
1782 switch (prim->type) {
1783 case T_OPTMGMT_REQ:
1784 case T_SVR4_OPTMGMT_REQ:
1785 /*
1786 * Process TPI option management requests immediately
1787 * in put procedure regardless of in-order processing
1788 * of already queued messages.
1789 * (Note: This driver supports AF_UNIX socket
1790 * implementation. Unless we implement this processing,
1791 * setsockopt() on socket endpoint will block on flow
1792 * controlled endpoints which it should not. That is
1793 * required for successful execution of VSU socket tests
1794 * and is consistent with BSD socket behavior).
1795 */
1796 tl_optmgmt(wq, mp);
1797 return;
1798 case O_T_BIND_REQ:
1799 case T_BIND_REQ:
1800 tl_proc = tl_bind_ser;
1801 break;
1802 case T_CONN_REQ:
1803 if (IS_CLTS(tep)) {
1804 tl_merror(wq, mp, EPROTO);
1805 return;
1806 }
1807 tl_conn_req(wq, mp);
1808 return;
1809 case T_DATA_REQ:
1810 case T_OPTDATA_REQ:
1811 case T_EXDATA_REQ:
1812 case T_ORDREL_REQ:
1813 tl_proc = tl_putq_ser;
1814 break;
1815 case T_UNITDATA_REQ:
1816 if (IS_COTS(tep) ||
1817 (msz < sizeof (struct T_unitdata_req))) {
1818 tl_merror(wq, mp, EPROTO);
1819 return;
1820 }
1821 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1822 tl_proc = tl_unitdata_ser;
1823 } else {
1824 tl_proc = tl_putq_ser;
1825 }
1826 break;
1827 default:
1828 /*
1829 * process in service procedure if message already
1830 * queued (maintain in-order processing)
1831 */
1832 if (wq->q_first != NULL) {
1833 tl_proc = tl_putq_ser;
1834 } else {
1835 tl_proc = tl_wput_ser;
1836 }
1837 break;
1838 }
1839 break;
1840
1841 case M_PCPROTO:
1842 /*
1843 * Check that the message has enough data to figure out TPI
1844 * primitive.
1845 */
1846 if (msz < sizeof (prim->type)) {
1847 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1848 SL_TRACE|SL_ERROR,
1849 "tl_wput:M_PCROTO data too short"));
1850 tl_merror(wq, mp, EPROTO);
1851 return;
1852 }
1853 switch (prim->type) {
1854 case T_CAPABILITY_REQ:
1855 tl_capability_req(mp, tep);
1856 return;
1857 case T_INFO_REQ:
1858 tl_proc = tl_info_req_ser;
1859 break;
1860 case T_ADDR_REQ:
1861 tl_proc = tl_addr_req_ser;
1862 break;
1863
1864 default:
1865 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1866 SL_TRACE|SL_ERROR,
1867 "tl_wput:unknown TPI msg primitive"));
1868 tl_merror(wq, mp, EPROTO);
1869 return;
1870 }
1871 break;
1872 default:
1873 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1874 "tl_wput:default:unexpected Streams message"));
1875 freemsg(mp);
1876 return;
1877 }
1878
1879 /*
1880 * Continue processing via serializer.
1881 */
1882 ASSERT(tl_proc != NULL);
1883 tl_refhold(tep);
1884 tl_serializer_enter(tep, tl_proc, mp);
1885 }
1886
1887 /*
1888 * Place message on the queue while preserving order.
1889 */
1890 static void
1891 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1892 {
1893 if (tep->te_closing) {
1894 tl_wput_ser(mp, tep);
1895 } else {
1896 TL_PUTQ(tep, mp);
1897 tl_serializer_exit(tep);
1898 tl_refrele(tep);
1899 }
1900
1901 }
1902
1903 static void
1904 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1905 {
1906 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1907
1908 switch (DB_TYPE(mp)) {
1909 case M_DATA:
1910 tl_data(mp, tep);
1911 break;
1912 case M_PROTO:
1913 tl_do_proto(mp, tep);
1914 break;
1915 default:
1916 freemsg(mp);
1917 break;
1918 }
1919 }
1920
1921 /*
1922 * Write side put procedure called from serializer.
1923 */
1924 static void
1925 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1926 {
1927 tl_wput_common_ser(mp, tep);
1928 tl_serializer_exit(tep);
1929 tl_refrele(tep);
1930 }
1931
1932 /*
1933 * M_DATA processing. Called from serializer.
1934 */
1935 static void
1936 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1937 {
1938 tl_endpt_t *peer_tep = tep->te_conp;
1939 queue_t *peer_rq;
1940
1941 ASSERT(DB_TYPE(mp) == M_DATA);
1942 ASSERT(IS_COTS(tep));
1943
1944 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1945
1946 /*
1947 * fastpath for data. Ignore flow control if tep is closing.
1948 */
1949 if ((peer_tep != NULL) &&
1950 !peer_tep->te_closing &&
1951 ((tep->te_state == TS_DATA_XFER) ||
1952 (tep->te_state == TS_WREQ_ORDREL)) &&
1953 (tep->te_wq != NULL) &&
1954 (tep->te_wq->q_first == NULL) &&
1955 ((peer_tep->te_state == TS_DATA_XFER) ||
1956 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1957 ((peer_rq = peer_tep->te_rq) != NULL) &&
1958 (canputnext(peer_rq) || tep->te_closing)) {
1959 putnext(peer_rq, mp);
1960 } else if (tep->te_closing) {
1961 /*
1962 * It is possible that by the time we got here tep started to
1963 * close. If the write queue is not empty, and the state is
1964 * TS_DATA_XFER the data should be delivered in order, so we
1965 * call putq() instead of freeing the data.
1966 */
1967 if ((tep->te_wq != NULL) &&
1968 ((tep->te_state == TS_DATA_XFER) ||
1969 (tep->te_state == TS_WREQ_ORDREL))) {
1970 TL_PUTQ(tep, mp);
1971 } else {
1972 freemsg(mp);
1973 }
1974 } else {
1975 TL_PUTQ(tep, mp);
1976 }
1977
1978 tl_serializer_exit(tep);
1979 tl_refrele(tep);
1980 }
1981
1982 /*
1983 * Write side service routine.
1984 *
1985 * All actual processing happens within serializer which is entered
1986 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1987 * messages that need processing may have arrived, so tl_wsrv repeats until
1988 * queue is empty or te_nowsrv is set.
1989 */
1990 static void
1991 tl_wsrv(queue_t *wq)
1992 {
1993 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1994
1995 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1996 mutex_enter(&tep->te_srv_lock);
1997 ASSERT(tep->te_wsrv_active == B_FALSE);
1998 tep->te_wsrv_active = B_TRUE;
1999 mutex_exit(&tep->te_srv_lock);
2000
2001 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2002
2003 /*
2004 * Wait for serializer job to complete.
2005 */
2006 mutex_enter(&tep->te_srv_lock);
2007 while (tep->te_wsrv_active) {
2008 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2009 }
2010 cv_signal(&tep->te_srv_cv);
2011 mutex_exit(&tep->te_srv_lock);
2012 }
2013 }
2014
2015 /*
2016 * Serialized write side processing of the STREAMS queue.
2017 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2018 * is NULL.
2019 */
2020 static void
2021 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2022 {
2023 mblk_t *mp;
2024 queue_t *wq = tep->te_wq;
2025
2026 ASSERT(wq != NULL);
2027 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2028 tl_wput_common_ser(mp, tep);
2029 }
2030
2031 /*
2032 * Wakeup service routine unless called from close.
2033 * If ser_mp is specified, the caller is tl_wsrv().
2034 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2035 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2036 * be no matching tl_serializer_exit() in this case.
2037 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2038 * waiting on te_srv_cv.
2039 */
2040 if (ser_mp != NULL) {
2041 /*
2042 * We are called from tl_wsrv.
2043 */
2044 mutex_enter(&tep->te_srv_lock);
2045 ASSERT(tep->te_wsrv_active);
2046 tep->te_wsrv_active = B_FALSE;
2047 cv_signal(&tep->te_srv_cv);
2048 mutex_exit(&tep->te_srv_lock);
2049 tl_serializer_exit(tep);
2050 }
2051 }
2052
2053 /*
2054 * Called when the stream is backenabled. Enter serializer and qenable everyone
2055 * flow controlled by tep.
2056 *
2057 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2058 * is possible that two instances of tl_rsrv will be running reusing the same
2059 * rsrv mblk.
2060 */
2061 static void
2062 tl_rsrv(queue_t *rq)
2063 {
2064 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2065
2066 ASSERT(rq->q_first == NULL);
2067 ASSERT(tep->te_rsrv_active == 0);
2068
2069 tep->te_rsrv_active = B_TRUE;
2070 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2071 /*
2072 * Wait for serializer job to complete.
2073 */
2074 mutex_enter(&tep->te_srv_lock);
2075 while (tep->te_rsrv_active) {
2076 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2077 }
2078 cv_signal(&tep->te_srv_cv);
2079 mutex_exit(&tep->te_srv_lock);
2080 }
2081
2082 /* ARGSUSED */
2083 static void
2084 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2085 {
2086 tl_endpt_t *peer_tep;
2087
2088 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2089 tl_cl_backenable(tep);
2090 } else if (
2091 IS_COTS(tep) &&
2092 ((peer_tep = tep->te_conp) != NULL) &&
2093 !peer_tep->te_closing &&
2094 ((tep->te_state == TS_DATA_XFER) ||
2095 (tep->te_state == TS_WIND_ORDREL)||
2096 (tep->te_state == TS_WREQ_ORDREL))) {
2097 TL_QENABLE(peer_tep);
2098 }
2099
2100 /*
2101 * Wakeup read side service routine.
2102 */
2103 mutex_enter(&tep->te_srv_lock);
2104 ASSERT(tep->te_rsrv_active);
2105 tep->te_rsrv_active = B_FALSE;
2106 cv_signal(&tep->te_srv_cv);
2107 mutex_exit(&tep->te_srv_lock);
2108 tl_serializer_exit(tep);
2109 }
2110
2111 /*
2112 * process M_PROTO messages. Always called from serializer.
2113 */
2114 static void
2115 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2116 {
2117 ssize_t msz = MBLKL(mp);
2118 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2119
2120 /* Message size was validated by tl_wput(). */
2121 ASSERT(msz >= sizeof (prim->type));
2122
2123 switch (prim->type) {
2124 case T_UNBIND_REQ:
2125 tl_unbind(mp, tep);
2126 break;
2127
2128 case T_ADDR_REQ:
2129 tl_addr_req(mp, tep);
2130 break;
2131
2132 case O_T_CONN_RES:
2133 case T_CONN_RES:
2134 if (IS_CLTS(tep)) {
2135 tl_merror(tep->te_wq, mp, EPROTO);
2136 break;
2137 }
2138 tl_conn_res(mp, tep);
2139 break;
2140
2141 case T_DISCON_REQ:
2142 if (IS_CLTS(tep)) {
2143 tl_merror(tep->te_wq, mp, EPROTO);
2144 break;
2145 }
2146 tl_discon_req(mp, tep);
2147 break;
2148
2149 case T_DATA_REQ:
2150 if (IS_CLTS(tep)) {
2151 tl_merror(tep->te_wq, mp, EPROTO);
2152 break;
2153 }
2154 tl_data(mp, tep);
2155 break;
2156
2157 case T_OPTDATA_REQ:
2158 if (IS_CLTS(tep)) {
2159 tl_merror(tep->te_wq, mp, EPROTO);
2160 break;
2161 }
2162 tl_data(mp, tep);
2163 break;
2164
2165 case T_EXDATA_REQ:
2166 if (IS_CLTS(tep)) {
2167 tl_merror(tep->te_wq, mp, EPROTO);
2168 break;
2169 }
2170 tl_exdata(mp, tep);
2171 break;
2172
2173 case T_ORDREL_REQ:
2174 if (! IS_COTSORD(tep)) {
2175 tl_merror(tep->te_wq, mp, EPROTO);
2176 break;
2177 }
2178 tl_ordrel(mp, tep);
2179 break;
2180
2181 case T_UNITDATA_REQ:
2182 if (IS_COTS(tep)) {
2183 tl_merror(tep->te_wq, mp, EPROTO);
2184 break;
2185 }
2186 tl_unitdata(mp, tep);
2187 break;
2188
2189 default:
2190 tl_merror(tep->te_wq, mp, EPROTO);
2191 break;
2192 }
2193 }
2194
2195 /*
2196 * Process ioctl from serializer.
2197 * This is a wrapper around tl_do_ioctl().
2198 */
2199 static void
2200 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2201 {
2202 if (! tep->te_closing)
2203 tl_do_ioctl(mp, tep);
2204 else
2205 freemsg(mp);
2206
2207 tl_serializer_exit(tep);
2208 tl_refrele(tep);
2209 }
2210
2211 static void
2212 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2213 {
2214 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2215 int cmd = iocbp->ioc_cmd;
2216 queue_t *wq = tep->te_wq;
2217 int error;
2218 int thisopt, otheropt;
2219
2220 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2221
2222 switch (cmd) {
2223 case TL_IOC_CREDOPT:
2224 if (cmd == TL_IOC_CREDOPT) {
2225 thisopt = TL_SETCRED;
2226 otheropt = TL_SETUCRED;
2227 } else {
2228 /* FALLTHROUGH */
2229 case TL_IOC_UCREDOPT:
2230 thisopt = TL_SETUCRED;
2231 otheropt = TL_SETCRED;
2232 }
2233 /*
2234 * The credentials passing does not apply to sockets.
2235 * Only one of the cred options can be set at a given time.
2236 */
2237 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2238 miocnak(wq, mp, 0, EINVAL);
2239 return;
2240 }
2241
2242 /*
2243 * Turn on generation of credential options for
2244 * T_conn_req, T_conn_con, T_unidata_ind.
2245 */
2246 error = miocpullup(mp, sizeof (uint32_t));
2247 if (error != 0) {
2248 miocnak(wq, mp, 0, error);
2249 return;
2250 }
2251 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2252 miocnak(wq, mp, 0, EINVAL);
2253 return;
2254 }
2255
2256 if (*(uint32_t *)mp->b_cont->b_rptr)
2257 tep->te_flag |= thisopt;
2258 else
2259 tep->te_flag &= ~thisopt;
2260
2261 miocack(wq, mp, 0, 0);
2262 break;
2263
2264 default:
2265 /* Should not be here */
2266 miocnak(wq, mp, 0, EINVAL);
2267 break;
2268 }
2269 }
2270
2271
2272 /*
2273 * send T_ERROR_ACK
2274 * Note: assumes enough memory or caller passed big enough mp
2275 * - no recovery from allocb failures
2276 */
2277
2278 static void
2279 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2280 t_scalar_t unix_err, t_scalar_t type)
2281 {
2282 struct T_error_ack *err_ack;
2283 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2284 M_PCPROTO, T_ERROR_ACK);
2285
2286 if (ackmp == NULL) {
2287 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2288 "tl_error_ack:out of mblk memory"));
2289 tl_merror(wq, NULL, ENOSR);
2290 return;
2291 }
2292 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2293 err_ack->ERROR_prim = type;
2294 err_ack->TLI_error = tli_err;
2295 err_ack->UNIX_error = unix_err;
2296
2297 /*
2298 * send error ack message
2299 */
2300 qreply(wq, ackmp);
2301 }
2302
2303
2304
2305 /*
2306 * send T_OK_ACK
2307 * Note: assumes enough memory or caller passed big enough mp
2308 * - no recovery from allocb failures
2309 */
2310 static void
2311 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2312 {
2313 struct T_ok_ack *ok_ack;
2314 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2315 M_PCPROTO, T_OK_ACK);
2316
2317 if (ackmp == NULL) {
2318 tl_merror(wq, NULL, ENOMEM);
2319 return;
2320 }
2321
2322 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2323 ok_ack->CORRECT_prim = type;
2324
2325 (void) qreply(wq, ackmp);
2326 }
2327
2328 /*
2329 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2330 * This is a wrapper around tl_bind().
2331 */
2332 static void
2333 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2334 {
2335 if (! tep->te_closing)
2336 tl_bind(mp, tep);
2337 else
2338 freemsg(mp);
2339
2340 tl_serializer_exit(tep);
2341 tl_refrele(tep);
2342 }
2343
2344 /*
2345 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2346 * Assumes that the endpoint is in the unbound.
2347 */
2348 static void
2349 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2350 {
2351 queue_t *wq = tep->te_wq;
2352 struct T_bind_ack *b_ack;
2353 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2354 mblk_t *ackmp, *bamp;
2355 soux_addr_t ux_addr;
2356 t_uscalar_t qlen = 0;
2357 t_scalar_t alen, aoff;
2358 tl_addr_t addr_req;
2359 void *addr_startp;
2360 ssize_t msz = MBLKL(mp), basize;
2361 t_scalar_t tli_err = 0, unix_err = 0;
2362 t_scalar_t save_prim_type = bind->PRIM_type;
2363 t_scalar_t save_state = tep->te_state;
2364
2365 if (tep->te_state != TS_UNBND) {
2366 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2367 SL_TRACE|SL_ERROR,
2368 "tl_wput:bind_request:out of state, state=%d",
2369 tep->te_state));
2370 tli_err = TOUTSTATE;
2371 goto error;
2372 }
2373
2374 if (msz < sizeof (struct T_bind_req)) {
2375 tli_err = TSYSERR; unix_err = EINVAL;
2376 goto error;
2377 }
2378
2379 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2380
2381 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2382 (bind->PRIM_type == T_BIND_REQ));
2383
2384 alen = bind->ADDR_length;
2385 aoff = bind->ADDR_offset;
2386
2387 /* negotiate max conn req pending */
2388 if (IS_COTS(tep)) {
2389 qlen = bind->CONIND_number;
2390 if (qlen > tl_maxqlen)
2391 qlen = tl_maxqlen;
2392 }
2393
2394 /*
2395 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2396 * and bound again.
2397 */
2398 if ((tep->te_hash_hndl == NULL) &&
2399 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2400 mod_hash_reserve_nosleep(tep->te_addrhash,
2401 &tep->te_hash_hndl) != 0) {
2402 tli_err = TSYSERR; unix_err = ENOSR;
2403 goto error;
2404 }
2405
2406 /*
2407 * Verify address correctness.
2408 */
2409 if (IS_SOCKET(tep)) {
2410 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2411
2412 if ((alen != TL_SOUX_ADDRLEN) ||
2413 (aoff < 0) ||
2414 (aoff + alen > msz)) {
2415 (void) (STRLOG(TL_ID, tep->te_minor,
2416 1, SL_TRACE|SL_ERROR,
2417 "tl_bind: invalid socket addr"));
2418 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2419 tli_err = TSYSERR; unix_err = EINVAL;
2420 goto error;
2421 }
2422 /* Copy address from message to local buffer. */
2423 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2424 /*
2425 * Check that we got correct address from sockets
2426 */
2427 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2428 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2429 (void) (STRLOG(TL_ID, tep->te_minor,
2430 1, SL_TRACE|SL_ERROR,
2431 "tl_bind: invalid socket magic"));
2432 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2433 tli_err = TSYSERR; unix_err = EINVAL;
2434 goto error;
2435 }
2436 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2437 (ux_addr.soua_vp != NULL)) {
2438 (void) (STRLOG(TL_ID, tep->te_minor,
2439 1, SL_TRACE|SL_ERROR,
2440 "tl_bind: implicit addr non-empty"));
2441 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442 tli_err = TSYSERR; unix_err = EINVAL;
2443 goto error;
2444 }
2445 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2446 (ux_addr.soua_vp == NULL)) {
2447 (void) (STRLOG(TL_ID, tep->te_minor,
2448 1, SL_TRACE|SL_ERROR,
2449 "tl_bind: explicit addr empty"));
2450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 tli_err = TSYSERR; unix_err = EINVAL;
2452 goto error;
2453 }
2454 } else {
2455 if ((alen > 0) && ((aoff < 0) ||
2456 ((ssize_t)(aoff + alen) > msz) ||
2457 ((aoff + alen) < 0))) {
2458 (void) (STRLOG(TL_ID, tep->te_minor,
2459 1, SL_TRACE|SL_ERROR,
2460 "tl_bind: invalid message"));
2461 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462 tli_err = TSYSERR; unix_err = EINVAL;
2463 goto error;
2464 }
2465 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2466 (void) (STRLOG(TL_ID, tep->te_minor,
2467 1, SL_TRACE|SL_ERROR,
2468 "tl_bind: bad addr in message"));
2469 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2470 tli_err = TBADADDR;
2471 goto error;
2472 }
2473 #ifdef DEBUG
2474 /*
2475 * Mild form of ASSERT()ion to detect broken TPI apps.
2476 * if (! assertion)
2477 * log warning;
2478 */
2479 if (! ((alen == 0 && aoff == 0) ||
2480 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2481 (void) (STRLOG(TL_ID, tep->te_minor,
2482 3, SL_TRACE|SL_ERROR,
2483 "tl_bind: addr overlaps TPI message"));
2484 }
2485 #endif
2486 }
2487
2488 /*
2489 * Bind the address provided or allocate one if requested.
2490 * Allow rebinds with a new qlen value.
2491 */
2492 if (IS_SOCKET(tep)) {
2493 /*
2494 * For anonymous requests the te_ap is already set up properly
2495 * so use minor number as an address.
2496 * For explicit requests need to check whether the address is
2497 * already in use.
2498 */
2499 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2500 int rc;
2501
2502 if (tep->te_flag & TL_ADDRHASHED) {
2503 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2504 if (tep->te_vp == ux_addr.soua_vp)
2505 goto skip_addr_bind;
2506 else /* Rebind to a new address. */
2507 tl_addr_unbind(tep);
2508 }
2509 /*
2510 * Insert address in the hash if it is not already
2511 * there. Since we use preallocated handle, the insert
2512 * can fail only if the key is already present.
2513 */
2514 rc = mod_hash_insert_reserve(tep->te_addrhash,
2515 (mod_hash_key_t)ux_addr.soua_vp,
2516 (mod_hash_val_t)tep, tep->te_hash_hndl);
2517
2518 if (rc != 0) {
2519 ASSERT(rc == MH_ERR_DUPLICATE);
2520 /*
2521 * Violate O_T_BIND_REQ semantics and fail with
2522 * TADDRBUSY - sockets will not use any address
2523 * other than supplied one for explicit binds.
2524 */
2525 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2526 SL_TRACE|SL_ERROR,
2527 "tl_bind:requested addr %p is busy",
2528 ux_addr.soua_vp));
2529 tli_err = TADDRBUSY; unix_err = 0;
2530 goto error;
2531 }
2532 tep->te_uxaddr = ux_addr;
2533 tep->te_flag |= TL_ADDRHASHED;
2534 tep->te_hash_hndl = NULL;
2535 }
2536 } else if (alen == 0) {
2537 /*
2538 * assign any free address
2539 */
2540 if (! tl_get_any_addr(tep, NULL)) {
2541 (void) (STRLOG(TL_ID, tep->te_minor,
2542 1, SL_TRACE|SL_ERROR,
2543 "tl_bind:failed to get buffer for any "
2544 "address"));
2545 tli_err = TSYSERR; unix_err = ENOSR;
2546 goto error;
2547 }
2548 } else {
2549 addr_req.ta_alen = alen;
2550 addr_req.ta_abuf = (mp->b_rptr + aoff);
2551 addr_req.ta_zoneid = tep->te_zoneid;
2552
2553 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2554 if (tep->te_abuf == NULL) {
2555 tli_err = TSYSERR; unix_err = ENOSR;
2556 goto error;
2557 }
2558 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2559 tep->te_alen = alen;
2560
2561 if (mod_hash_insert_reserve(tep->te_addrhash,
2562 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2563 tep->te_hash_hndl) != 0) {
2564 if (save_prim_type == T_BIND_REQ) {
2565 /*
2566 * The bind semantics for this primitive
2567 * require a failure if the exact address
2568 * requested is busy
2569 */
2570 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2571 SL_TRACE|SL_ERROR,
2572 "tl_bind:requested addr is busy"));
2573 tli_err = TADDRBUSY; unix_err = 0;
2574 goto error;
2575 }
2576
2577 /*
2578 * O_T_BIND_REQ semantics say if address if requested
2579 * address is busy, bind to any available free address
2580 */
2581 if (! tl_get_any_addr(tep, &addr_req)) {
2582 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2583 SL_TRACE|SL_ERROR,
2584 "tl_bind:unable to get any addr buf"));
2585 tli_err = TSYSERR; unix_err = ENOMEM;
2586 goto error;
2587 }
2588 } else {
2589 tep->te_flag |= TL_ADDRHASHED;
2590 tep->te_hash_hndl = NULL;
2591 }
2592 }
2593
2594 ASSERT(tep->te_alen >= 0);
2595
2596 skip_addr_bind:
2597 /*
2598 * prepare T_BIND_ACK TPI message
2599 */
2600 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2601 bamp = reallocb(mp, basize, 0);
2602 if (bamp == NULL) {
2603 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2604 "tl_wput:tl_bind: allocb failed"));
2605 /*
2606 * roll back state changes
2607 */
2608 tl_addr_unbind(tep);
2609 tep->te_state = TS_UNBND;
2610 tl_memrecover(wq, mp, basize);
2611 return;
2612 }
2613
2614 DB_TYPE(bamp) = M_PCPROTO;
2615 bamp->b_wptr = bamp->b_rptr + basize;
2616 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2617 b_ack->PRIM_type = T_BIND_ACK;
2618 b_ack->CONIND_number = qlen;
2619 b_ack->ADDR_length = tep->te_alen;
2620 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2621 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2622 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2623
2624 if (IS_COTS(tep)) {
2625 tep->te_qlen = qlen;
2626 if (qlen > 0)
2627 tep->te_flag |= TL_LISTENER;
2628 }
2629
2630 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2631 /*
2632 * send T_BIND_ACK message
2633 */
2634 (void) qreply(wq, bamp);
2635 return;
2636
2637 error:
2638 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2639 if (ackmp == NULL) {
2640 /*
2641 * roll back state changes
2642 */
2643 tep->te_state = save_state;
2644 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2645 return;
2646 }
2647 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2648 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2649 }
2650
2651 /*
2652 * Process T_UNBIND_REQ.
2653 * Called from serializer.
2654 */
2655 static void
2656 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2657 {
2658 queue_t *wq;
2659 mblk_t *ackmp;
2660
2661 if (tep->te_closing) {
2662 freemsg(mp);
2663 return;
2664 }
2665
2666 wq = tep->te_wq;
2667
2668 /*
2669 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2670 * ==> allocate for T_ERROR_ACK (known max)
2671 */
2672 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2673 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2674 return;
2675 }
2676 /*
2677 * memory resources committed
2678 * Note: no message validation. T_UNBIND_REQ message is
2679 * same size as PRIM_type field so already verified earlier.
2680 */
2681
2682 /*
2683 * validate state
2684 */
2685 if (tep->te_state != TS_IDLE) {
2686 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2687 SL_TRACE|SL_ERROR,
2688 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2689 tep->te_state));
2690 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2691 return;
2692 }
2693 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2694
2695 /*
2696 * TPI says on T_UNBIND_REQ:
2697 * send up a M_FLUSH to flush both
2698 * read and write queues
2699 */
2700 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2701
2702 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2703 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2704
2705 /*
2706 * Sockets use bind with qlen==0 followed by bind() to
2707 * the same address with qlen > 0 for listeners.
2708 * We allow rebind with a new qlen value.
2709 */
2710 tl_addr_unbind(tep);
2711 }
2712
2713 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2714 /*
2715 * send T_OK_ACK
2716 */
2717 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2718 }
2719
2720
2721 /*
2722 * Option management code from drv/ip is used here
2723 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2724 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2725 * However, that is what we want as that option is 'unorthodox'
2726 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2727 * and not in T_SVR4_OPTMGMT_REQ/ACK
2728 * Note2: use of optcom_req means this routine is an exception to
2729 * recovery from allocb() failures.
2730 */
2731
2732 static void
2733 tl_optmgmt(queue_t *wq, mblk_t *mp)
2734 {
2735 tl_endpt_t *tep;
2736 mblk_t *ackmp;
2737 union T_primitives *prim;
2738 cred_t *cr;
2739
2740 tep = (tl_endpt_t *)wq->q_ptr;
2741 prim = (union T_primitives *)mp->b_rptr;
2742
2743 /*
2744 * All Solaris components should pass a db_credp
2745 * for this TPI message, hence we ASSERT.
2746 * But in case there is some other M_PROTO that looks
2747 * like a TPI message sent by some other kernel
2748 * component, we check and return an error.
2749 */
2750 cr = msg_getcred(mp, NULL);
2751 ASSERT(cr != NULL);
2752 if (cr == NULL) {
2753 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2754 return;
2755 }
2756
2757 /* all states OK for AF_UNIX options ? */
2758 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2759 prim->type == T_SVR4_OPTMGMT_REQ) {
2760 /*
2761 * Broken TLI semantics that options can only be managed
2762 * in TS_IDLE state. Needed for Sparc ABI test suite that
2763 * tests this TLI (mis)feature using this device driver.
2764 */
2765 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2766 SL_TRACE|SL_ERROR,
2767 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2768 tep->te_state));
2769 /*
2770 * preallocate memory for T_ERROR_ACK
2771 */
2772 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2773 if (! ackmp) {
2774 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2775 return;
2776 }
2777
2778 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2779 freemsg(mp);
2780 return;
2781 }
2782
2783 /*
2784 * call common option management routine from drv/ip
2785 */
2786 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2787 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2788 } else {
2789 ASSERT(prim->type == T_OPTMGMT_REQ);
2790 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2791 }
2792 }
2793
2794 /*
2795 * Handle T_conn_req - the driver part of accept().
2796 * If TL_SET[U]CRED generate the credentials options.
2797 * If this is a socket pass through options unmodified.
2798 * For sockets generate the T_CONN_CON here instead of
2799 * waiting for the T_CONN_RES.
2800 */
2801 static void
2802 tl_conn_req(queue_t *wq, mblk_t *mp)
2803 {
2804 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2805 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2806 ssize_t msz = MBLKL(mp);
2807 t_scalar_t alen, aoff, olen, ooff, err = 0;
2808 tl_endpt_t *peer_tep = NULL;
2809 mblk_t *ackmp;
2810 mblk_t *dimp;
2811 struct T_discon_ind *di;
2812 soux_addr_t ux_addr;
2813 tl_addr_t dst;
2814
2815 ASSERT(IS_COTS(tep));
2816
2817 if (tep->te_closing) {
2818 freemsg(mp);
2819 return;
2820 }
2821
2822 /*
2823 * preallocate memory for:
2824 * 1. max of T_ERROR_ACK and T_OK_ACK
2825 * ==> known max T_ERROR_ACK
2826 * 2. max of T_DISCON_IND and T_CONN_IND
2827 */
2828 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2829 if (! ackmp) {
2830 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2831 return;
2832 }
2833 /*
2834 * memory committed for T_OK_ACK/T_ERROR_ACK now
2835 * will be committed for T_DISCON_IND/T_CONN_IND later
2836 */
2837
2838 if (tep->te_state != TS_IDLE) {
2839 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2840 SL_TRACE|SL_ERROR,
2841 "tl_wput:T_CONN_REQ:out of state, state=%d",
2842 tep->te_state));
2843 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2844 freemsg(mp);
2845 return;
2846 }
2847
2848 /*
2849 * validate the message
2850 * Note: dereference fields in struct inside message only
2851 * after validating the message length.
2852 */
2853 if (msz < sizeof (struct T_conn_req)) {
2854 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2855 "tl_conn_req:invalid message length"));
2856 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2857 freemsg(mp);
2858 return;
2859 }
2860 alen = creq->DEST_length;
2861 aoff = creq->DEST_offset;
2862 olen = creq->OPT_length;
2863 ooff = creq->OPT_offset;
2864 if (olen == 0)
2865 ooff = 0;
2866
2867 if (IS_SOCKET(tep)) {
2868 if ((alen != TL_SOUX_ADDRLEN) ||
2869 (aoff < 0) ||
2870 (aoff + alen > msz) ||
2871 (alen > msz - sizeof (struct T_conn_req))) {
2872 (void) (STRLOG(TL_ID, tep->te_minor,
2873 1, SL_TRACE|SL_ERROR,
2874 "tl_conn_req: invalid socket addr"));
2875 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2876 freemsg(mp);
2877 return;
2878 }
2879 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2880 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2881 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2882 (void) (STRLOG(TL_ID, tep->te_minor,
2883 1, SL_TRACE|SL_ERROR,
2884 "tl_conn_req: invalid socket magic"));
2885 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2886 freemsg(mp);
2887 return;
2888 }
2889 } else {
2890 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2891 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2892 ooff + olen < 0)) ||
2893 olen < 0 || ooff < 0) {
2894 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2895 SL_TRACE|SL_ERROR,
2896 "tl_conn_req:invalid message"));
2897 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2898 freemsg(mp);
2899 return;
2900 }
2901
2902 if (alen <= 0 || aoff < 0 ||
2903 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2904 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2905 SL_TRACE|SL_ERROR,
2906 "tl_conn_req:bad addr in message, "
2907 "alen=%d, msz=%ld",
2908 alen, msz));
2909 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2910 freemsg(mp);
2911 return;
2912 }
2913 #ifdef DEBUG
2914 /*
2915 * Mild form of ASSERT()ion to detect broken TPI apps.
2916 * if (! assertion)
2917 * log warning;
2918 */
2919 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2920 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2921 SL_TRACE|SL_ERROR,
2922 "tl_conn_req: addr overlaps TPI message"));
2923 }
2924 #endif
2925 if (olen) {
2926 /*
2927 * no opts in connect req
2928 * supported in this provider except for sockets.
2929 */
2930 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2931 SL_TRACE|SL_ERROR,
2932 "tl_conn_req:options not supported "
2933 "in message"));
2934 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2935 freemsg(mp);
2936 return;
2937 }
2938 }
2939
2940 /*
2941 * Prevent tep from closing on us.
2942 */
2943 if (! tl_noclose(tep)) {
2944 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2945 "tl_conn_req:endpoint is closing"));
2946 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2947 freemsg(mp);
2948 return;
2949 }
2950
2951 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2952 /*
2953 * get endpoint to connect to
2954 * check that peer with DEST addr is bound to addr
2955 * and has CONIND_number > 0
2956 */
2957 dst.ta_alen = alen;
2958 dst.ta_abuf = mp->b_rptr + aoff;
2959 dst.ta_zoneid = tep->te_zoneid;
2960
2961 /*
2962 * Verify if remote addr is in use
2963 */
2964 peer_tep = (IS_SOCKET(tep) ?
2965 tl_sock_find_peer(tep, &ux_addr) :
2966 tl_find_peer(tep, &dst));
2967
2968 if (peer_tep == NULL) {
2969 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2970 "tl_conn_req:no one at connect address"));
2971 err = ECONNREFUSED;
2972 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2973 /*
2974 * validate that number of incoming connection is
2975 * not to capacity on destination endpoint
2976 */
2977 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2978 "tl_conn_req: qlen overflow connection refused"));
2979 err = ECONNREFUSED;
2980 }
2981
2982 /*
2983 * Send T_DISCON_IND in case of error
2984 */
2985 if (err != 0) {
2986 if (peer_tep != NULL)
2987 tl_refrele(peer_tep);
2988 /* We are still expected to send T_OK_ACK */
2989 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2990 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2991 tl_closeok(tep);
2992 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2993 M_PROTO, T_DISCON_IND);
2994 if (dimp == NULL) {
2995 tl_merror(wq, NULL, ENOSR);
2996 return;
2997 }
2998 di = (struct T_discon_ind *)dimp->b_rptr;
2999 di->DISCON_reason = err;
3000 di->SEQ_number = BADSEQNUM;
3001
3002 tep->te_state = TS_IDLE;
3003 /*
3004 * send T_DISCON_IND message
3005 */
3006 putnext(tep->te_rq, dimp);
3007 return;
3008 }
3009
3010 ASSERT(IS_COTS(peer_tep));
3011
3012 /*
3013 * Found the listener. At this point processing will continue on
3014 * listener serializer. Close of the endpoint should be blocked while we
3015 * switch serializers.
3016 */
3017 tl_serializer_refhold(peer_tep->te_ser);
3018 tl_serializer_refrele(tep->te_ser);
3019 tep->te_ser = peer_tep->te_ser;
3020 ASSERT(tep->te_oconp == NULL);
3021 tep->te_oconp = peer_tep;
3022
3023 /*
3024 * It is safe to close now. Close may continue on listener serializer.
3025 */
3026 tl_closeok(tep);
3027
3028 /*
3029 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3030 * data, so we link mp to ackmp.
3031 */
3032 ackmp->b_cont = mp;
3033 mp = ackmp;
3034
3035 tl_refhold(tep);
3036 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3037 }
3038
3039 /*
3040 * Finish T_CONN_REQ processing on listener serializer.
3041 */
3042 static void
3043 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3044 {
3045 queue_t *wq;
3046 tl_endpt_t *peer_tep = tep->te_oconp;
3047 mblk_t *confmp, *cimp, *indmp;
3048 void *opts = NULL;
3049 mblk_t *ackmp = mp;
3050 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3051 struct T_conn_ind *ci;
3052 tl_icon_t *tip;
3053 void *addr_startp;
3054 t_scalar_t olen = creq->OPT_length;
3055 t_scalar_t ooff = creq->OPT_offset;
3056 size_t ci_msz;
3057 size_t size;
3058 cred_t *cr = NULL;
3059 pid_t cpid;
3060
3061 if (tep->te_closing) {
3062 TL_UNCONNECT(tep->te_oconp);
3063 tl_serializer_exit(tep);
3064 tl_refrele(tep);
3065 freemsg(mp);
3066 return;
3067 }
3068
3069 wq = tep->te_wq;
3070 tep->te_flag |= TL_EAGER;
3071
3072 /*
3073 * Extract preallocated ackmp from mp.
3074 */
3075 mp = mp->b_cont;
3076 ackmp->b_cont = NULL;
3077
3078 if (olen == 0)
3079 ooff = 0;
3080
3081 if (peer_tep->te_closing ||
3082 !((peer_tep->te_state == TS_IDLE) ||
3083 (peer_tep->te_state == TS_WRES_CIND))) {
3084 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3085 "tl_conn_req:peer in bad state (%d)",
3086 peer_tep->te_state));
3087 TL_UNCONNECT(tep->te_oconp);
3088 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3089 freemsg(ackmp);
3090 tl_serializer_exit(tep);
3091 tl_refrele(tep);
3092 return;
3093 }
3094
3095 /*
3096 * preallocate now for T_DISCON_IND or T_CONN_IND
3097 */
3098 /*
3099 * calculate length of T_CONN_IND message
3100 */
3101 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3102 cr = msg_getcred(mp, &cpid);
3103 ASSERT(cr != NULL);
3104 if (peer_tep->te_flag & TL_SETCRED) {
3105 ooff = 0;
3106 olen = (t_scalar_t) sizeof (struct opthdr) +
3107 OPTLEN(sizeof (tl_credopt_t));
3108 /* 1 option only */
3109 } else {
3110 ooff = 0;
3111 olen = (t_scalar_t)sizeof (struct opthdr) +
3112 OPTLEN(ucredminsize(cr));
3113 /* 1 option only */
3114 }
3115 }
3116 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3117 ci_msz = T_ALIGN(ci_msz) + olen;
3118 size = max(ci_msz, sizeof (struct T_discon_ind));
3119
3120 /*
3121 * Save options from mp - we'll need them for T_CONN_IND.
3122 */
3123 if (ooff != 0) {
3124 opts = kmem_alloc(olen, KM_NOSLEEP);
3125 if (opts == NULL) {
3126 /*
3127 * roll back state changes
3128 */
3129 tep->te_state = TS_IDLE;
3130 tl_memrecover(wq, mp, size);
3131 freemsg(ackmp);
3132 TL_UNCONNECT(tep->te_oconp);
3133 tl_serializer_exit(tep);
3134 tl_refrele(tep);
3135 return;
3136 }
3137 /* Copy options to a temp buffer */
3138 bcopy(mp->b_rptr + ooff, opts, olen);
3139 }
3140
3141 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3142 /*
3143 * Generate a T_CONN_CON that has the identical address
3144 * (and options) as the T_CONN_REQ.
3145 * NOTE: assumes that the T_conn_req and T_conn_con structures
3146 * are isomorphic.
3147 */
3148 confmp = copyb(mp);
3149 if (! confmp) {
3150 /*
3151 * roll back state changes
3152 */
3153 tep->te_state = TS_IDLE;
3154 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3155 freemsg(ackmp);
3156 if (opts != NULL)
3157 kmem_free(opts, olen);
3158 TL_UNCONNECT(tep->te_oconp);
3159 tl_serializer_exit(tep);
3160 tl_refrele(tep);
3161 return;
3162 }
3163 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3164 T_CONN_CON;
3165 } else {
3166 confmp = NULL;
3167 }
3168 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3169 /*
3170 * roll back state changes
3171 */
3172 tep->te_state = TS_IDLE;
3173 tl_memrecover(wq, mp, size);
3174 freemsg(ackmp);
3175 if (opts != NULL)
3176 kmem_free(opts, olen);
3177 freemsg(confmp);
3178 TL_UNCONNECT(tep->te_oconp);
3179 tl_serializer_exit(tep);
3180 tl_refrele(tep);
3181 return;
3182 }
3183
3184 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3185 if (tip == NULL) {
3186 /*
3187 * roll back state changes
3188 */
3189 tep->te_state = TS_IDLE;
3190 tl_memrecover(wq, indmp, sizeof (*tip));
3191 freemsg(ackmp);
3192 if (opts != NULL)
3193 kmem_free(opts, olen);
3194 freemsg(confmp);
3195 TL_UNCONNECT(tep->te_oconp);
3196 tl_serializer_exit(tep);
3197 tl_refrele(tep);
3198 return;
3199 }
3200 tip->ti_mp = NULL;
3201
3202 /*
3203 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3204 * and tl_icon_t cell.
3205 */
3206
3207 /*
3208 * ack validity of request and send the peer credential in the ACK.
3209 */
3210 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3211
3212 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3213 confmp != NULL) {
3214 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3215 }
3216
3217 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3218
3219 /*
3220 * prepare message to send T_CONN_IND
3221 */
3222 /*
3223 * allocate the message - original data blocks retained
3224 * in the returned mblk
3225 */
3226 cimp = tl_resizemp(indmp, size);
3227 if (! cimp) {
3228 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3229 "tl_conn_req:con_ind:allocb failure"));
3230 tl_merror(wq, indmp, ENOMEM);
3231 TL_UNCONNECT(tep->te_oconp);
3232 tl_serializer_exit(tep);
3233 tl_refrele(tep);
3234 if (opts != NULL)
3235 kmem_free(opts, olen);
3236 freemsg(confmp);
3237 ASSERT(tip->ti_mp == NULL);
3238 kmem_free(tip, sizeof (*tip));
3239 return;
3240 }
3241
3242 DB_TYPE(cimp) = M_PROTO;
3243 ci = (struct T_conn_ind *)cimp->b_rptr;
3244 ci->PRIM_type = T_CONN_IND;
3245 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3246 ci->SRC_length = tep->te_alen;
3247 ci->SEQ_number = tep->te_seqno;
3248
3249 addr_startp = cimp->b_rptr + ci->SRC_offset;
3250 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3251 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3252
3253 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254 ci->SRC_length);
3255 ci->OPT_length = olen; /* because only 1 option */
3256 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3257 cr, cpid,
3258 peer_tep->te_flag, peer_tep->te_credp);
3259 } else if (ooff != 0) {
3260 /* Copy option from T_CONN_REQ */
3261 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3262 ci->SRC_length);
3263 ci->OPT_length = olen;
3264 ASSERT(opts != NULL);
3265 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3266 } else {
3267 ci->OPT_offset = 0;
3268 ci->OPT_length = 0;
3269 }
3270 if (opts != NULL)
3271 kmem_free(opts, olen);
3272
3273 /*
3274 * register connection request with server peer
3275 * append to list of incoming connections
3276 * increment references for both peer_tep and tep: peer_tep is placed on
3277 * te_oconp and tep is placed on listeners queue.
3278 */
3279 tip->ti_tep = tep;
3280 tip->ti_seqno = tep->te_seqno;
3281 list_insert_tail(&peer_tep->te_iconp, tip);
3282 peer_tep->te_nicon++;
3283
3284 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3285 /*
3286 * send the T_CONN_IND message
3287 */
3288 putnext(peer_tep->te_rq, cimp);
3289
3290 /*
3291 * Send a T_CONN_CON message for sockets.
3292 * Disable the queues until we have reached the correct state!
3293 */
3294 if (confmp != NULL) {
3295 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3296 noenable(wq);
3297 putnext(tep->te_rq, confmp);
3298 }
3299 /*
3300 * Now we need to increment tep reference because tep is referenced by
3301 * server list of pending connections. We also need to decrement
3302 * reference before exiting serializer. Two operations void each other
3303 * so we don't modify reference at all.
3304 */
3305 ASSERT(tep->te_refcnt >= 2);
3306 ASSERT(peer_tep->te_refcnt >= 2);
3307 tl_serializer_exit(tep);
3308 }
3309
3310
3311
3312 /*
3313 * Handle T_conn_res on listener stream. Called on listener serializer.
3314 * tl_conn_req has already generated the T_CONN_CON.
3315 * tl_conn_res is called on listener serializer.
3316 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3317 * Switch eager serializer to acceptor's.
3318 *
3319 * If TL_SET[U]CRED generate the credentials options.
3320 * For sockets tl_conn_req has already generated the T_CONN_CON.
3321 */
3322 static void
3323 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3324 {
3325 queue_t *wq;
3326 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3327 ssize_t msz = MBLKL(mp);
3328 t_scalar_t olen, ooff, err = 0;
3329 t_scalar_t prim = cres->PRIM_type;
3330 uchar_t *addr_startp;
3331 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3332 tl_icon_t *tip;
3333 size_t size;
3334 mblk_t *ackmp, *respmp;
3335 mblk_t *dimp, *ccmp = NULL;
3336 struct T_discon_ind *di;
3337 struct T_conn_con *cc;
3338 boolean_t client_noclose_set = B_FALSE;
3339 boolean_t switch_client_serializer = B_TRUE;
3340
3341 ASSERT(IS_COTS(tep));
3342
3343 if (tep->te_closing) {
3344 freemsg(mp);
3345 return;
3346 }
3347
3348 wq = tep->te_wq;
3349
3350 /*
3351 * preallocate memory for:
3352 * 1. max of T_ERROR_ACK and T_OK_ACK
3353 * ==> known max T_ERROR_ACK
3354 * 2. max of T_DISCON_IND and T_CONN_CON
3355 */
3356 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3357 if (! ackmp) {
3358 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3359 return;
3360 }
3361 /*
3362 * memory committed for T_OK_ACK/T_ERROR_ACK now
3363 * will be committed for T_DISCON_IND/T_CONN_CON later
3364 */
3365
3366
3367 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3368
3369 /*
3370 * validate state
3371 */
3372 if (tep->te_state != TS_WRES_CIND) {
3373 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3374 SL_TRACE|SL_ERROR,
3375 "tl_wput:T_CONN_RES:out of state, state=%d",
3376 tep->te_state));
3377 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3378 freemsg(mp);
3379 return;
3380 }
3381
3382 /*
3383 * validate the message
3384 * Note: dereference fields in struct inside message only
3385 * after validating the message length.
3386 */
3387 if (msz < sizeof (struct T_conn_res)) {
3388 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3389 "tl_conn_res:invalid message length"));
3390 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3391 freemsg(mp);
3392 return;
3393 }
3394 olen = cres->OPT_length;
3395 ooff = cres->OPT_offset;
3396 if (((olen > 0) && ((ooff + olen) > msz))) {
3397 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398 "tl_conn_res:invalid message"));
3399 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3400 freemsg(mp);
3401 return;
3402 }
3403 if (olen) {
3404 /*
3405 * no opts in connect res
3406 * supported in this provider
3407 */
3408 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3409 "tl_conn_res:options not supported in message"));
3410 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3411 freemsg(mp);
3412 return;
3413 }
3414
3415 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3416 ASSERT(tep->te_state == TS_WACK_CRES);
3417
3418 if (cres->SEQ_number < TL_MINOR_START &&
3419 cres->SEQ_number >= BADSEQNUM) {
3420 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3421 "tl_conn_res:remote endpoint sequence number bad"));
3422 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3423 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3424 freemsg(mp);
3425 return;
3426 }
3427
3428 /*
3429 * find accepting endpoint. Will have extra reference if found.
3430 */
3431 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3432 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3433 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3434 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3435 "tl_conn_res:bad accepting endpoint"));
3436 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3437 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3438 freemsg(mp);
3439 return;
3440 }
3441
3442 /*
3443 * Prevent acceptor from closing.
3444 */
3445 if (! tl_noclose(acc_ep)) {
3446 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3447 "tl_conn_res:bad accepting endpoint"));
3448 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3449 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3450 tl_refrele(acc_ep);
3451 freemsg(mp);
3452 return;
3453 }
3454
3455 acc_ep->te_flag |= TL_ACCEPTOR;
3456
3457 /*
3458 * validate that accepting endpoint, if different from listening
3459 * has address bound => state is TS_IDLE
3460 * TROUBLE in XPG4 !!?
3461 */
3462 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3463 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3464 "tl_conn_res:accepting endpoint has no address bound,"
3465 "state=%d", acc_ep->te_state));
3466 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3467 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468 freemsg(mp);
3469 tl_closeok(acc_ep);
3470 tl_refrele(acc_ep);
3471 return;
3472 }
3473
3474 /*
3475 * validate if accepting endpt same as listening, then
3476 * no other incoming connection should be on the queue
3477 */
3478
3479 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3480 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3481 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3482 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3483 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3484 freemsg(mp);
3485 tl_closeok(acc_ep);
3486 tl_refrele(acc_ep);
3487 return;
3488 }
3489
3490 /*
3491 * Mark for deletion, the entry corresponding to client
3492 * on list of pending connections made by the listener
3493 * search list to see if client is one of the
3494 * recorded as a listener.
3495 */
3496 tip = tl_icon_find(tep, cres->SEQ_number);
3497 if (tip == NULL) {
3498 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3499 "tl_conn_res:no client in listener list"));
3500 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3501 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3502 freemsg(mp);
3503 tl_closeok(acc_ep);
3504 tl_refrele(acc_ep);
3505 return;
3506 }
3507
3508 /*
3509 * If ti_tep is NULL the client has already closed. In this case
3510 * the code below will avoid any action on the client side
3511 * but complete the server and acceptor state transitions.
3512 */
3513 ASSERT(tip->ti_tep == NULL ||
3514 tip->ti_tep->te_seqno == cres->SEQ_number);
3515 cl_ep = tip->ti_tep;
3516
3517 /*
3518 * If the client is present it is switched from listener's to acceptor's
3519 * serializer. We should block client closes while serializers are
3520 * being switched.
3521 *
3522 * It is possible that the client is present but is currently being
3523 * closed. There are two possible cases:
3524 *
3525 * 1) The client has already entered tl_close_finish_ser() and sent
3526 * T_ORDREL_IND. In this case we can just ignore the client (but we
3527 * still need to send all messages from tip->ti_mp to the acceptor).
3528 *
3529 * 2) The client started the close but has not entered
3530 * tl_close_finish_ser() yet. In this case, the client is already
3531 * proceeding asynchronously on the listener's serializer, so we're
3532 * forced to change the acceptor to use the listener's serializer to
3533 * ensure that any operations on the acceptor are serialized with
3534 * respect to the close that's in-progress.
3535 */
3536 if (cl_ep != NULL) {
3537 if (tl_noclose(cl_ep)) {
3538 client_noclose_set = B_TRUE;
3539 } else {
3540 /*
3541 * Client is closing. If it it has sent the
3542 * T_ORDREL_IND, we can simply ignore it - otherwise,
3543 * we have to let let the client continue until it is
3544 * sent.
3545 *
3546 * If we do continue using the client, acceptor will
3547 * switch to client's serializer which is used by client
3548 * for its close.
3549 */
3550 tl_client_closing_when_accepting++;
3551 switch_client_serializer = B_FALSE;
3552 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3553 cl_ep->te_state == -1)
3554 cl_ep = NULL;
3555 }
3556 }
3557
3558 if (cl_ep != NULL) {
3559 /*
3560 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3561 * (latter for sockets only)
3562 */
3563 if (cl_ep->te_state != TS_WCON_CREQ &&
3564 (cl_ep->te_state != TS_DATA_XFER &&
3565 IS_SOCKET(cl_ep))) {
3566 err = ECONNREFUSED;
3567 /*
3568 * T_DISCON_IND sent later after committing memory
3569 * and acking validity of request
3570 */
3571 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3572 "tl_conn_res:peer in bad state"));
3573 }
3574
3575 /*
3576 * preallocate now for T_DISCON_IND or T_CONN_CONN
3577 * ack validity of request (T_OK_ACK) after memory committed
3578 */
3579
3580 if (err)
3581 size = sizeof (struct T_discon_ind);
3582 else {
3583 /*
3584 * calculate length of T_CONN_CON message
3585 */
3586 olen = 0;
3587 if (cl_ep->te_flag & TL_SETCRED) {
3588 olen = (t_scalar_t)sizeof (struct opthdr) +
3589 OPTLEN(sizeof (tl_credopt_t));
3590 } else if (cl_ep->te_flag & TL_SETUCRED) {
3591 olen = (t_scalar_t)sizeof (struct opthdr) +
3592 OPTLEN(ucredminsize(acc_ep->te_credp));
3593 }
3594 size = T_ALIGN(sizeof (struct T_conn_con) +
3595 acc_ep->te_alen) + olen;
3596 }
3597 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3598 /*
3599 * roll back state changes
3600 */
3601 tep->te_state = TS_WRES_CIND;
3602 tl_memrecover(wq, mp, size);
3603 freemsg(ackmp);
3604 if (client_noclose_set)
3605 tl_closeok(cl_ep);
3606 tl_closeok(acc_ep);
3607 tl_refrele(acc_ep);
3608 return;
3609 }
3610 mp = NULL;
3611 }
3612
3613 /*
3614 * Now ack validity of request
3615 */
3616 if (tep->te_nicon == 1) {
3617 if (tep == acc_ep)
3618 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3619 else
3620 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3621 } else
3622 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3623
3624 /*
3625 * send T_DISCON_IND now if client state validation failed earlier
3626 */
3627 if (err) {
3628 tl_ok_ack(wq, ackmp, prim);
3629 /*
3630 * flush the queues - why always ?
3631 */
3632 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3633
3634 dimp = tl_resizemp(respmp, size);
3635 if (! dimp) {
3636 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3637 SL_TRACE|SL_ERROR,
3638 "tl_conn_res:con_ind:allocb failure"));
3639 tl_merror(wq, respmp, ENOMEM);
3640 tl_closeok(acc_ep);
3641 if (client_noclose_set)
3642 tl_closeok(cl_ep);
3643 tl_refrele(acc_ep);
3644 return;
3645 }
3646 if (dimp->b_cont) {
3647 /* no user data in provider generated discon ind */
3648 freemsg(dimp->b_cont);
3649 dimp->b_cont = NULL;
3650 }
3651
3652 DB_TYPE(dimp) = M_PROTO;
3653 di = (struct T_discon_ind *)dimp->b_rptr;
3654 di->PRIM_type = T_DISCON_IND;
3655 di->DISCON_reason = err;
3656 di->SEQ_number = BADSEQNUM;
3657
3658 tep->te_state = TS_IDLE;
3659 /*
3660 * send T_DISCON_IND message
3661 */
3662 putnext(acc_ep->te_rq, dimp);
3663 if (client_noclose_set)
3664 tl_closeok(cl_ep);
3665 tl_closeok(acc_ep);
3666 tl_refrele(acc_ep);
3667 return;
3668 }
3669
3670 /*
3671 * now start connecting the accepting endpoint
3672 */
3673 if (tep != acc_ep)
3674 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3675
3676 if (cl_ep == NULL) {
3677 /*
3678 * The client has already closed. Send up any queued messages
3679 * and change the state accordingly.
3680 */
3681 tl_ok_ack(wq, ackmp, prim);
3682 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3683
3684 /*
3685 * remove endpoint from incoming connection
3686 * delete client from list of incoming connections
3687 */
3688 tl_freetip(tep, tip);
3689 freemsg(mp);
3690 tl_closeok(acc_ep);
3691 tl_refrele(acc_ep);
3692 return;
3693 } else if (tip->ti_mp != NULL) {
3694 /*
3695 * The client could have queued a T_DISCON_IND which needs
3696 * to be sent up.
3697 * Note that t_discon_req can not operate the same as
3698 * t_data_req since it is not possible for it to putbq
3699 * the message and return -1 due to the use of qwriter.
3700 */
3701 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3702 }
3703
3704 /*
3705 * prepare connect confirm T_CONN_CON message
3706 */
3707
3708 /*
3709 * allocate the message - original data blocks
3710 * retained in the returned mblk
3711 */
3712 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3713 ccmp = tl_resizemp(respmp, size);
3714 if (ccmp == NULL) {
3715 tl_ok_ack(wq, ackmp, prim);
3716 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3717 SL_TRACE|SL_ERROR,
3718 "tl_conn_res:conn_con:allocb failure"));
3719 tl_merror(wq, respmp, ENOMEM);
3720 tl_closeok(acc_ep);
3721 if (client_noclose_set)
3722 tl_closeok(cl_ep);
3723 tl_refrele(acc_ep);
3724 return;
3725 }
3726
3727 DB_TYPE(ccmp) = M_PROTO;
3728 cc = (struct T_conn_con *)ccmp->b_rptr;
3729 cc->PRIM_type = T_CONN_CON;
3730 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3731 cc->RES_length = acc_ep->te_alen;
3732 addr_startp = ccmp->b_rptr + cc->RES_offset;
3733 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3734 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3735 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3736 cc->RES_length);
3737 cc->OPT_length = olen;
3738 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3739 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3740 cl_ep->te_credp);
3741 } else {
3742 cc->OPT_offset = 0;
3743 cc->OPT_length = 0;
3744 }
3745 /*
3746 * Forward the credential in the packet so it can be picked up
3747 * at the higher layers for more complete credential processing
3748 */
3749 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3750 } else {
3751 freemsg(respmp);
3752 respmp = NULL;
3753 }
3754
3755 /*
3756 * make connection linking
3757 * accepting and client endpoints
3758 * No need to increment references:
3759 * on client: it should already have one from tip->ti_tep linkage.
3760 * on acceptor is should already have one from the table lookup.
3761 *
3762 * At this point both client and acceptor can't close. Set client
3763 * serializer to acceptor's.
3764 */
3765 ASSERT(cl_ep->te_refcnt >= 2);
3766 ASSERT(acc_ep->te_refcnt >= 2);
3767 ASSERT(cl_ep->te_conp == NULL);
3768 ASSERT(acc_ep->te_conp == NULL);
3769 cl_ep->te_conp = acc_ep;
3770 acc_ep->te_conp = cl_ep;
3771 ASSERT(cl_ep->te_ser == tep->te_ser);
3772 if (switch_client_serializer) {
3773 mutex_enter(&cl_ep->te_ser_lock);
3774 if (cl_ep->te_ser_count > 0) {
3775 switch_client_serializer = B_FALSE;
3776 tl_serializer_noswitch++;
3777 } else {
3778 /*
3779 * Move client to the acceptor's serializer.
3780 */
3781 tl_serializer_refhold(acc_ep->te_ser);
3782 tl_serializer_refrele(cl_ep->te_ser);
3783 cl_ep->te_ser = acc_ep->te_ser;
3784 }
3785 mutex_exit(&cl_ep->te_ser_lock);
3786 }
3787 if (!switch_client_serializer) {
3788 /*
3789 * It is not possible to switch client to use acceptor's.
3790 * Move acceptor to client's serializer (which is the same as
3791 * listener's).
3792 */
3793 tl_serializer_refhold(cl_ep->te_ser);
3794 tl_serializer_refrele(acc_ep->te_ser);
3795 acc_ep->te_ser = cl_ep->te_ser;
3796 }
3797
3798 TL_REMOVE_PEER(cl_ep->te_oconp);
3799 TL_REMOVE_PEER(acc_ep->te_oconp);
3800
3801 /*
3802 * remove endpoint from incoming connection
3803 * delete client from list of incoming connections
3804 */
3805 tip->ti_tep = NULL;
3806 tl_freetip(tep, tip);
3807 tl_ok_ack(wq, ackmp, prim);
3808
3809 /*
3810 * data blocks already linked in reallocb()
3811 */
3812
3813 /*
3814 * link queues so that I_SENDFD will work
3815 */
3816 if (! IS_SOCKET(tep)) {
3817 acc_ep->te_wq->q_next = cl_ep->te_rq;
3818 cl_ep->te_wq->q_next = acc_ep->te_rq;
3819 }
3820
3821 /*
3822 * send T_CONN_CON up on client side unless it was already
3823 * done (for a socket). In cases any data or ordrel req has been
3824 * queued make sure that the service procedure runs.
3825 */
3826 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3827 enableok(cl_ep->te_wq);
3828 TL_QENABLE(cl_ep);
3829 if (ccmp != NULL)
3830 freemsg(ccmp);
3831 } else {
3832 /*
3833 * change client state on TE_CONN_CON event
3834 */
3835 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3836 putnext(cl_ep->te_rq, ccmp);
3837 }
3838
3839 /* Mark the both endpoints as accepted */
3840 cl_ep->te_flag |= TL_ACCEPTED;
3841 acc_ep->te_flag |= TL_ACCEPTED;
3842
3843 /*
3844 * Allow client and acceptor to close.
3845 */
3846 tl_closeok(acc_ep);
3847 if (client_noclose_set)
3848 tl_closeok(cl_ep);
3849 }
3850
3851
3852
3853
3854 static void
3855 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3856 {
3857 queue_t *wq;
3858 struct T_discon_req *dr;
3859 ssize_t msz;
3860 tl_endpt_t *peer_tep = tep->te_conp;
3861 tl_endpt_t *srv_tep = tep->te_oconp;
3862 tl_icon_t *tip;
3863 size_t size;
3864 mblk_t *ackmp, *dimp, *respmp;
3865 struct T_discon_ind *di;
3866 t_scalar_t save_state, new_state;
3867
3868 if (tep->te_closing) {
3869 freemsg(mp);
3870 return;
3871 }
3872
3873 if ((peer_tep != NULL) && peer_tep->te_closing) {
3874 TL_UNCONNECT(tep->te_conp);
3875 peer_tep = NULL;
3876 }
3877 if ((srv_tep != NULL) && srv_tep->te_closing) {
3878 TL_UNCONNECT(tep->te_oconp);
3879 srv_tep = NULL;
3880 }
3881
3882 wq = tep->te_wq;
3883
3884 /*
3885 * preallocate memory for:
3886 * 1. max of T_ERROR_ACK and T_OK_ACK
3887 * ==> known max T_ERROR_ACK
3888 * 2. for T_DISCON_IND
3889 */
3890 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3891 if (! ackmp) {
3892 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3893 return;
3894 }
3895 /*
3896 * memory committed for T_OK_ACK/T_ERROR_ACK now
3897 * will be committed for T_DISCON_IND later
3898 */
3899
3900 dr = (struct T_discon_req *)mp->b_rptr;
3901 msz = MBLKL(mp);
3902
3903 /*
3904 * validate the state
3905 */
3906 save_state = new_state = tep->te_state;
3907 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3908 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3909 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3910 SL_TRACE|SL_ERROR,
3911 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3912 tep->te_state));
3913 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3914 freemsg(mp);
3915 return;
3916 }
3917 /*
3918 * Defer committing the state change until it is determined if
3919 * the message will be queued with the tl_icon or not.
3920 */
3921 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3922
3923 /* validate the message */
3924 if (msz < sizeof (struct T_discon_req)) {
3925 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3926 "tl_discon_req:invalid message"));
3927 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3928 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3929 freemsg(mp);
3930 return;
3931 }
3932
3933 /*
3934 * if server, then validate that client exists
3935 * by connection sequence number etc.
3936 */
3937 if (tep->te_nicon > 0) { /* server */
3938
3939 /*
3940 * search server list for disconnect client
3941 */
3942 tip = tl_icon_find(tep, dr->SEQ_number);
3943 if (tip == NULL) {
3944 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3945 SL_TRACE|SL_ERROR,
3946 "tl_discon_req:no disconnect endpoint"));
3947 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3948 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3949 freemsg(mp);
3950 return;
3951 }
3952 /*
3953 * If ti_tep is NULL the client has already closed. In this case
3954 * the code below will avoid any action on the client side.
3955 */
3956
3957 IMPLY(tip->ti_tep != NULL,
3958 tip->ti_tep->te_seqno == dr->SEQ_number);
3959 peer_tep = tip->ti_tep;
3960 }
3961
3962 /*
3963 * preallocate now for T_DISCON_IND
3964 * ack validity of request (T_OK_ACK) after memory committed
3965 */
3966 size = sizeof (struct T_discon_ind);
3967 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3968 tl_memrecover(wq, mp, size);
3969 freemsg(ackmp);
3970 return;
3971 }
3972
3973 /*
3974 * prepare message to ack validity of request
3975 */
3976 if (tep->te_nicon == 0)
3977 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3978 else
3979 if (tep->te_nicon == 1)
3980 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3981 else
3982 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3983
3984 /*
3985 * Flushing queues according to TPI. Using the old state.
3986 */
3987 if ((tep->te_nicon <= 1) &&
3988 ((save_state == TS_DATA_XFER) ||
3989 (save_state == TS_WIND_ORDREL) ||
3990 (save_state == TS_WREQ_ORDREL)))
3991 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3992
3993 /* send T_OK_ACK up */
3994 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3995
3996 /*
3997 * now do disconnect business
3998 */
3999 if (tep->te_nicon > 0) { /* listener */
4000 if (peer_tep != NULL && !peer_tep->te_closing) {
4001 /*
4002 * disconnect incoming connect request pending to tep
4003 */
4004 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4005 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4006 SL_TRACE|SL_ERROR,
4007 "tl_discon_req: reallocb failed"));
4008 tep->te_state = new_state;
4009 tl_merror(wq, respmp, ENOMEM);
4010 return;
4011 }
4012 di = (struct T_discon_ind *)dimp->b_rptr;
4013 di->SEQ_number = BADSEQNUM;
4014 save_state = peer_tep->te_state;
4015 peer_tep->te_state = TS_IDLE;
4016
4017 TL_REMOVE_PEER(peer_tep->te_oconp);
4018 enableok(peer_tep->te_wq);
4019 TL_QENABLE(peer_tep);
4020 } else {
4021 freemsg(respmp);
4022 dimp = NULL;
4023 }
4024
4025 /*
4026 * remove endpoint from incoming connection list
4027 * - remove disconnect client from list on server
4028 */
4029 tl_freetip(tep, tip);
4030 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4031 /*
4032 * disconnect an outgoing request pending from tep
4033 */
4034
4035 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4036 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4037 SL_TRACE|SL_ERROR,
4038 "tl_discon_req: reallocb failed"));
4039 tep->te_state = new_state;
4040 tl_merror(wq, respmp, ENOMEM);
4041 return;
4042 }
4043 di = (struct T_discon_ind *)dimp->b_rptr;
4044 DB_TYPE(dimp) = M_PROTO;
4045 di->PRIM_type = T_DISCON_IND;
4046 di->DISCON_reason = ECONNRESET;
4047 di->SEQ_number = tep->te_seqno;
4048
4049 /*
4050 * If this is a socket the T_DISCON_IND is queued with
4051 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4052 * from the list of pending connections.
4053 * Note that when te_oconp is set the peer better have
4054 * a t_connind_t for the client.
4055 */
4056 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4057 /*
4058 * No need to check that
4059 * ti_tep == NULL since the T_DISCON_IND
4060 * takes precedence over other queued
4061 * messages.
4062 */
4063 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4064 peer_tep = NULL;
4065 dimp = NULL;
4066 /*
4067 * Can't clear te_oconp since tl_co_unconnect needs
4068 * it as a hint not to free the tep.
4069 * Keep the state unchanged since tl_conn_res inspects
4070 * it.
4071 */
4072 new_state = tep->te_state;
4073 } else {
4074 /* Found - delete it */
4075 tip = tl_icon_find(peer_tep, tep->te_seqno);
4076 if (tip != NULL) {
4077 ASSERT(tep == tip->ti_tep);
4078 save_state = peer_tep->te_state;
4079 if (peer_tep->te_nicon == 1)
4080 peer_tep->te_state =
4081 NEXTSTATE(TE_DISCON_IND2,
4082 peer_tep->te_state);
4083 else
4084 peer_tep->te_state =
4085 NEXTSTATE(TE_DISCON_IND3,
4086 peer_tep->te_state);
4087 tl_freetip(peer_tep, tip);
4088 }
4089 ASSERT(tep->te_oconp != NULL);
4090 TL_UNCONNECT(tep->te_oconp);
4091 }
4092 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4093 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4094 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4095 SL_TRACE|SL_ERROR,
4096 "tl_discon_req: reallocb failed"));
4097 tep->te_state = new_state;
4098 tl_merror(wq, respmp, ENOMEM);
4099 return;
4100 }
4101 di = (struct T_discon_ind *)dimp->b_rptr;
4102 di->SEQ_number = BADSEQNUM;
4103
4104 save_state = peer_tep->te_state;
4105 peer_tep->te_state = TS_IDLE;
4106 } else {
4107 /* Not connected */
4108 tep->te_state = new_state;
4109 freemsg(respmp);
4110 return;
4111 }
4112
4113 /* Commit state changes */
4114 tep->te_state = new_state;
4115
4116 if (peer_tep == NULL) {
4117 ASSERT(dimp == NULL);
4118 goto done;
4119 }
4120 /*
4121 * Flush queues on peer before sending up
4122 * T_DISCON_IND according to TPI
4123 */
4124
4125 if ((save_state == TS_DATA_XFER) ||
4126 (save_state == TS_WIND_ORDREL) ||
4127 (save_state == TS_WREQ_ORDREL))
4128 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4129
4130 DB_TYPE(dimp) = M_PROTO;
4131 di->PRIM_type = T_DISCON_IND;
4132 di->DISCON_reason = ECONNRESET;
4133
4134 /*
4135 * data blocks already linked into dimp by reallocb()
4136 */
4137 /*
4138 * send indication message to peer user module
4139 */
4140 ASSERT(dimp != NULL);
4141 putnext(peer_tep->te_rq, dimp);
4142 done:
4143 if (tep->te_conp) { /* disconnect pointers if connected */
4144 ASSERT(! peer_tep->te_closing);
4145
4146 /*
4147 * Messages may be queued on peer's write queue
4148 * waiting to be processed by its write service
4149 * procedure. Before the pointer to the peer transport
4150 * structure is set to NULL, qenable the peer's write
4151 * queue so that the queued up messages are processed.
4152 */
4153 if ((save_state == TS_DATA_XFER) ||
4154 (save_state == TS_WIND_ORDREL) ||
4155 (save_state == TS_WREQ_ORDREL))
4156 TL_QENABLE(peer_tep);
4157 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4158 TL_UNCONNECT(peer_tep->te_conp);
4159 if (! IS_SOCKET(tep)) {
4160 /*
4161 * unlink the streams
4162 */
4163 tep->te_wq->q_next = NULL;
4164 peer_tep->te_wq->q_next = NULL;
4165 }
4166 TL_UNCONNECT(tep->te_conp);
4167 }
4168 }
4169
4170 static void
4171 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4172 {
4173 if (!tep->te_closing)
4174 tl_addr_req(mp, tep);
4175 else
4176 freemsg(mp);
4177
4178 tl_serializer_exit(tep);
4179 tl_refrele(tep);
4180 }
4181
4182 static void
4183 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4184 {
4185 queue_t *wq;
4186 size_t ack_sz;
4187 mblk_t *ackmp;
4188 struct T_addr_ack *taa;
4189
4190 if (tep->te_closing) {
4191 freemsg(mp);
4192 return;
4193 }
4194
4195 wq = tep->te_wq;
4196
4197 /*
4198 * Note: T_ADDR_REQ message has only PRIM_type field
4199 * so it is already validated earlier.
4200 */
4201
4202 if (IS_CLTS(tep) ||
4203 (tep->te_state > TS_WREQ_ORDREL) ||
4204 (tep->te_state < TS_DATA_XFER)) {
4205 /*
4206 * Either connectionless or connection oriented but not
4207 * in connected data transfer state or half-closed states.
4208 */
4209 ack_sz = sizeof (struct T_addr_ack);
4210 if (tep->te_state >= TS_IDLE)
4211 /* is bound */
4212 ack_sz += tep->te_alen;
4213 ackmp = reallocb(mp, ack_sz, 0);
4214 if (ackmp == NULL) {
4215 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4216 SL_TRACE|SL_ERROR,
4217 "tl_addr_req: reallocb failed"));
4218 tl_memrecover(wq, mp, ack_sz);
4219 return;
4220 }
4221
4222 taa = (struct T_addr_ack *)ackmp->b_rptr;
4223
4224 bzero(taa, sizeof (struct T_addr_ack));
4225
4226 taa->PRIM_type = T_ADDR_ACK;
4227 ackmp->b_datap->db_type = M_PCPROTO;
4228 ackmp->b_wptr = (uchar_t *)&taa[1];
4229
4230 if (tep->te_state >= TS_IDLE) {
4231 /* endpoint is bound */
4232 taa->LOCADDR_length = tep->te_alen;
4233 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4234
4235 bcopy(tep->te_abuf, ackmp->b_wptr,
4236 tep->te_alen);
4237 ackmp->b_wptr += tep->te_alen;
4238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4239 }
4240
4241 (void) qreply(wq, ackmp);
4242 } else {
4243 ASSERT(tep->te_state == TS_DATA_XFER ||
4244 tep->te_state == TS_WIND_ORDREL ||
4245 tep->te_state == TS_WREQ_ORDREL);
4246 /* connection oriented in data transfer */
4247 tl_connected_cots_addr_req(mp, tep);
4248 }
4249 }
4250
4251
4252 static void
4253 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4254 {
4255 tl_endpt_t *peer_tep;
4256 size_t ack_sz;
4257 mblk_t *ackmp;
4258 struct T_addr_ack *taa;
4259 uchar_t *addr_startp;
4260
4261 if (tep->te_closing) {
4262 freemsg(mp);
4263 return;
4264 }
4265
4266 ASSERT(tep->te_state >= TS_IDLE);
4267
4268 ack_sz = sizeof (struct T_addr_ack);
4269 ack_sz += T_ALIGN(tep->te_alen);
4270 peer_tep = tep->te_conp;
4271 ack_sz += peer_tep->te_alen;
4272
4273 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4274 if (ackmp == NULL) {
4275 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4276 "tl_connected_cots_addr_req: reallocb failed"));
4277 tl_memrecover(tep->te_wq, mp, ack_sz);
4278 return;
4279 }
4280
4281 taa = (struct T_addr_ack *)ackmp->b_rptr;
4282
4283 /* endpoint is bound */
4284 taa->LOCADDR_length = tep->te_alen;
4285 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4286
4287 addr_startp = (uchar_t *)&taa[1];
4288
4289 bcopy(tep->te_abuf, addr_startp,
4290 tep->te_alen);
4291
4292 taa->REMADDR_length = peer_tep->te_alen;
4293 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4294 taa->LOCADDR_length);
4295 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4296 bcopy(peer_tep->te_abuf, addr_startp,
4297 peer_tep->te_alen);
4298 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4299 taa->REMADDR_offset + peer_tep->te_alen;
4300 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4301
4302 putnext(tep->te_rq, ackmp);
4303 }
4304
4305 static void
4306 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4307 {
4308 if (IS_CLTS(tep)) {
4309 *ia = tl_clts_info_ack;
4310 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4311 } else {
4312 *ia = tl_cots_info_ack;
4313 if (IS_COTSORD(tep))
4314 ia->SERV_type = T_COTS_ORD;
4315 }
4316 ia->TIDU_size = tl_tidusz;
4317 ia->CURRENT_state = tep->te_state;
4318 }
4319
4320 /*
4321 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4322 * tl_wput.
4323 */
4324 static void
4325 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4326 {
4327 mblk_t *ackmp;
4328 t_uscalar_t cap_bits1;
4329 struct T_capability_ack *tcap;
4330
4331 if (tep->te_closing) {
4332 freemsg(mp);
4333 return;
4334 }
4335
4336 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4337
4338 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4339 M_PCPROTO, T_CAPABILITY_ACK);
4340 if (ackmp == NULL) {
4341 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4342 "tl_capability_req: reallocb failed"));
4343 tl_memrecover(tep->te_wq, mp,
4344 sizeof (struct T_capability_ack));
4345 return;
4346 }
4347
4348 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4349 tcap->CAP_bits1 = 0;
4350
4351 if (cap_bits1 & TC1_INFO) {
4352 tl_copy_info(&tcap->INFO_ack, tep);
4353 tcap->CAP_bits1 |= TC1_INFO;
4354 }
4355
4356 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4357 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4358 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4359 }
4360
4361 putnext(tep->te_rq, ackmp);
4362 }
4363
4364 static void
4365 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4366 {
4367 if (! tep->te_closing)
4368 tl_info_req(mp, tep);
4369 else
4370 freemsg(mp);
4371
4372 tl_serializer_exit(tep);
4373 tl_refrele(tep);
4374 }
4375
4376 static void
4377 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4378 {
4379 mblk_t *ackmp;
4380
4381 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4382 M_PCPROTO, T_INFO_ACK);
4383 if (ackmp == NULL) {
4384 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4385 "tl_info_req: reallocb failed"));
4386 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4387 return;
4388 }
4389
4390 /*
4391 * fill in T_INFO_ACK contents
4392 */
4393 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4394
4395 /*
4396 * send ack message
4397 */
4398 putnext(tep->te_rq, ackmp);
4399 }
4400
4401 /*
4402 * Handle M_DATA, T_data_req and T_optdata_req.
4403 * If this is a socket pass through T_optdata_req options unmodified.
4404 */
4405 static void
4406 tl_data(mblk_t *mp, tl_endpt_t *tep)
4407 {
4408 queue_t *wq = tep->te_wq;
4409 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4410 ssize_t msz = MBLKL(mp);
4411 tl_endpt_t *peer_tep;
4412 queue_t *peer_rq;
4413 boolean_t closing = tep->te_closing;
4414
4415 if (IS_CLTS(tep)) {
4416 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4417 SL_TRACE|SL_ERROR,
4418 "tl_wput:clts:unattached M_DATA"));
4419 if (!closing) {
4420 tl_merror(wq, mp, EPROTO);
4421 } else {
4422 freemsg(mp);
4423 }
4424 return;
4425 }
4426
4427 /*
4428 * If the endpoint is closing it should still forward any data to the
4429 * peer (if it has one). If it is not allowed to forward it can just
4430 * free the message.
4431 */
4432 if (closing &&
4433 (tep->te_state != TS_DATA_XFER) &&
4434 (tep->te_state != TS_WREQ_ORDREL)) {
4435 freemsg(mp);
4436 return;
4437 }
4438
4439 if (DB_TYPE(mp) == M_PROTO) {
4440 if (prim->type == T_DATA_REQ &&
4441 msz < sizeof (struct T_data_req)) {
4442 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4443 SL_TRACE|SL_ERROR,
4444 "tl_data:T_DATA_REQ:invalid message"));
4445 if (!closing) {
4446 tl_merror(wq, mp, EPROTO);
4447 } else {
4448 freemsg(mp);
4449 }
4450 return;
4451 } else if (prim->type == T_OPTDATA_REQ &&
4452 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4453 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4454 SL_TRACE|SL_ERROR,
4455 "tl_data:T_OPTDATA_REQ:invalid message"));
4456 if (!closing) {
4457 tl_merror(wq, mp, EPROTO);
4458 } else {
4459 freemsg(mp);
4460 }
4461 return;
4462 }
4463 }
4464
4465 /*
4466 * connection oriented provider
4467 */
4468 switch (tep->te_state) {
4469 case TS_IDLE:
4470 /*
4471 * Other end not here - do nothing.
4472 */
4473 freemsg(mp);
4474 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4475 "tl_data:cots with endpoint idle"));
4476 return;
4477
4478 case TS_DATA_XFER:
4479 /* valid states */
4480 if (tep->te_conp != NULL)
4481 break;
4482
4483 if (tep->te_oconp == NULL) {
4484 if (!closing) {
4485 tl_merror(wq, mp, EPROTO);
4486 } else {
4487 freemsg(mp);
4488 }
4489 return;
4490 }
4491 /*
4492 * For a socket the T_CONN_CON is sent early thus
4493 * the peer might not yet have accepted the connection.
4494 * If we are closing queue the packet with the T_CONN_IND.
4495 * Otherwise defer processing the packet until the peer
4496 * accepts the connection.
4497 * Note that the queue is noenabled when we go into this
4498 * state.
4499 */
4500 if (!closing) {
4501 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4502 SL_TRACE|SL_ERROR,
4503 "tl_data: ocon"));
4504 TL_PUTBQ(tep, mp);
4505 return;
4506 }
4507 if (DB_TYPE(mp) == M_PROTO) {
4508 if (msz < sizeof (t_scalar_t)) {
4509 freemsg(mp);
4510 return;
4511 }
4512 /* reuse message block - just change REQ to IND */
4513 if (prim->type == T_DATA_REQ)
4514 prim->type = T_DATA_IND;
4515 else
4516 prim->type = T_OPTDATA_IND;
4517 }
4518 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4519 return;
4520
4521 case TS_WREQ_ORDREL:
4522 if (tep->te_conp == NULL) {
4523 /*
4524 * Other end closed - generate discon_ind
4525 * with reason 0 to cause an EPIPE but no
4526 * read side error on AF_UNIX sockets.
4527 */
4528 freemsg(mp);
4529 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4530 SL_TRACE|SL_ERROR,
4531 "tl_data: WREQ_ORDREL and no peer"));
4532 tl_discon_ind(tep, 0);
4533 return;
4534 }
4535 break;
4536
4537 default:
4538 /* invalid state for event TE_DATA_REQ */
4539 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4540 "tl_data:cots:out of state"));
4541 tl_merror(wq, mp, EPROTO);
4542 return;
4543 }
4544 /*
4545 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4546 * (State stays same on this event)
4547 */
4548
4549 /*
4550 * get connected endpoint
4551 */
4552 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4553 freemsg(mp);
4554 /* Peer closed */
4555 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4556 "tl_data: peer gone"));
4557 return;
4558 }
4559
4560 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4561 peer_rq = peer_tep->te_rq;
4562
4563 /*
4564 * Put it back if flow controlled
4565 * Note: Messages already on queue when we are closing is bounded
4566 * so we can ignore flow control.
4567 */
4568 if (!canputnext(peer_rq) && !closing) {
4569 TL_PUTBQ(tep, mp);
4570 return;
4571 }
4572
4573 /*
4574 * validate peer state
4575 */
4576 switch (peer_tep->te_state) {
4577 case TS_DATA_XFER:
4578 case TS_WIND_ORDREL:
4579 /* valid states */
4580 break;
4581 default:
4582 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4583 "tl_data:rx side:invalid state"));
4584 tl_merror(peer_tep->te_wq, mp, EPROTO);
4585 return;
4586 }
4587 if (DB_TYPE(mp) == M_PROTO) {
4588 /* reuse message block - just change REQ to IND */
4589 if (prim->type == T_DATA_REQ)
4590 prim->type = T_DATA_IND;
4591 else
4592 prim->type = T_OPTDATA_IND;
4593 }
4594 /*
4595 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4596 * (peer state stays same on this event)
4597 */
4598 /*
4599 * send data to connected peer
4600 */
4601 putnext(peer_rq, mp);
4602 }
4603
4604
4605
4606 static void
4607 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4608 {
4609 queue_t *wq = tep->te_wq;
4610 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4611 ssize_t msz = MBLKL(mp);
4612 tl_endpt_t *peer_tep;
4613 queue_t *peer_rq;
4614 boolean_t closing = tep->te_closing;
4615
4616 if (msz < sizeof (struct T_exdata_req)) {
4617 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4618 "tl_exdata:invalid message"));
4619 if (!closing) {
4620 tl_merror(wq, mp, EPROTO);
4621 } else {
4622 freemsg(mp);
4623 }
4624 return;
4625 }
4626
4627 /*
4628 * If the endpoint is closing it should still forward any data to the
4629 * peer (if it has one). If it is not allowed to forward it can just
4630 * free the message.
4631 */
4632 if (closing &&
4633 (tep->te_state != TS_DATA_XFER) &&
4634 (tep->te_state != TS_WREQ_ORDREL)) {
4635 freemsg(mp);
4636 return;
4637 }
4638
4639 /*
4640 * validate state
4641 */
4642 switch (tep->te_state) {
4643 case TS_IDLE:
4644 /*
4645 * Other end not here - do nothing.
4646 */
4647 freemsg(mp);
4648 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4649 "tl_exdata:cots with endpoint idle"));
4650 return;
4651
4652 case TS_DATA_XFER:
4653 /* valid states */
4654 if (tep->te_conp != NULL)
4655 break;
4656
4657 if (tep->te_oconp == NULL) {
4658 if (!closing) {
4659 tl_merror(wq, mp, EPROTO);
4660 } else {
4661 freemsg(mp);
4662 }
4663 return;
4664 }
4665 /*
4666 * For a socket the T_CONN_CON is sent early thus
4667 * the peer might not yet have accepted the connection.
4668 * If we are closing queue the packet with the T_CONN_IND.
4669 * Otherwise defer processing the packet until the peer
4670 * accepts the connection.
4671 * Note that the queue is noenabled when we go into this
4672 * state.
4673 */
4674 if (!closing) {
4675 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4676 SL_TRACE|SL_ERROR,
4677 "tl_exdata: ocon"));
4678 TL_PUTBQ(tep, mp);
4679 return;
4680 }
4681 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4682 "tl_exdata: closing socket ocon"));
4683 prim->type = T_EXDATA_IND;
4684 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4685 return;
4686
4687 case TS_WREQ_ORDREL:
4688 if (tep->te_conp == NULL) {
4689 /*
4690 * Other end closed - generate discon_ind
4691 * with reason 0 to cause an EPIPE but no
4692 * read side error on AF_UNIX sockets.
4693 */
4694 freemsg(mp);
4695 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4696 SL_TRACE|SL_ERROR,
4697 "tl_exdata: WREQ_ORDREL and no peer"));
4698 tl_discon_ind(tep, 0);
4699 return;
4700 }
4701 break;
4702
4703 default:
4704 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4705 SL_TRACE|SL_ERROR,
4706 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4707 tep->te_state));
4708 tl_merror(wq, mp, EPROTO);
4709 return;
4710 }
4711 /*
4712 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4713 * (state stays same on this event)
4714 */
4715
4716 /*
4717 * get connected endpoint
4718 */
4719 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4720 freemsg(mp);
4721 /* Peer closed */
4722 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4723 "tl_exdata: peer gone"));
4724 return;
4725 }
4726
4727 peer_rq = peer_tep->te_rq;
4728
4729 /*
4730 * Put it back if flow controlled
4731 * Note: Messages already on queue when we are closing is bounded
4732 * so we can ignore flow control.
4733 */
4734 if (!canputnext(peer_rq) && !closing) {
4735 TL_PUTBQ(tep, mp);
4736 return;
4737 }
4738
4739 /*
4740 * validate state on peer
4741 */
4742 switch (peer_tep->te_state) {
4743 case TS_DATA_XFER:
4744 case TS_WIND_ORDREL:
4745 /* valid states */
4746 break;
4747 default:
4748 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4749 "tl_exdata:rx side:invalid state"));
4750 tl_merror(peer_tep->te_wq, mp, EPROTO);
4751 return;
4752 }
4753 /*
4754 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4755 * (peer state stays same on this event)
4756 */
4757 /*
4758 * reuse message block
4759 */
4760 prim->type = T_EXDATA_IND;
4761
4762 /*
4763 * send data to connected peer
4764 */
4765 putnext(peer_rq, mp);
4766 }
4767
4768
4769
4770 static void
4771 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4772 {
4773 queue_t *wq = tep->te_wq;
4774 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4775 ssize_t msz = MBLKL(mp);
4776 tl_endpt_t *peer_tep;
4777 queue_t *peer_rq;
4778 boolean_t closing = tep->te_closing;
4779
4780 if (msz < sizeof (struct T_ordrel_req)) {
4781 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4782 "tl_ordrel:invalid message"));
4783 if (!closing) {
4784 tl_merror(wq, mp, EPROTO);
4785 } else {
4786 freemsg(mp);
4787 }
4788 return;
4789 }
4790
4791 /*
4792 * validate state
4793 */
4794 switch (tep->te_state) {
4795 case TS_DATA_XFER:
4796 case TS_WREQ_ORDREL:
4797 /* valid states */
4798 if (tep->te_conp != NULL)
4799 break;
4800
4801 if (tep->te_oconp == NULL)
4802 break;
4803
4804 /*
4805 * For a socket the T_CONN_CON is sent early thus
4806 * the peer might not yet have accepted the connection.
4807 * If we are closing queue the packet with the T_CONN_IND.
4808 * Otherwise defer processing the packet until the peer
4809 * accepts the connection.
4810 * Note that the queue is noenabled when we go into this
4811 * state.
4812 */
4813 if (!closing) {
4814 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4815 SL_TRACE|SL_ERROR,
4816 "tl_ordlrel: ocon"));
4817 TL_PUTBQ(tep, mp);
4818 return;
4819 }
4820 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4821 "tl_ordlrel: closing socket ocon"));
4822 prim->type = T_ORDREL_IND;
4823 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4824 return;
4825
4826 default:
4827 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4828 SL_TRACE|SL_ERROR,
4829 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4830 tep->te_state));
4831 if (!closing) {
4832 tl_merror(wq, mp, EPROTO);
4833 } else {
4834 freemsg(mp);
4835 }
4836 return;
4837 }
4838 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4839
4840 /*
4841 * get connected endpoint
4842 */
4843 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4844 /* Peer closed */
4845 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4846 "tl_ordrel: peer gone"));
4847 freemsg(mp);
4848 return;
4849 }
4850
4851 peer_rq = peer_tep->te_rq;
4852
4853 /*
4854 * Put it back if flow controlled except when we are closing.
4855 * Note: Messages already on queue when we are closing is bounded
4856 * so we can ignore flow control.
4857 */
4858 if (! canputnext(peer_rq) && !closing) {
4859 TL_PUTBQ(tep, mp);
4860 return;
4861 }
4862
4863 /*
4864 * validate state on peer
4865 */
4866 switch (peer_tep->te_state) {
4867 case TS_DATA_XFER:
4868 case TS_WIND_ORDREL:
4869 /* valid states */
4870 break;
4871 default:
4872 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4873 "tl_ordrel:rx side:invalid state"));
4874 tl_merror(peer_tep->te_wq, mp, EPROTO);
4875 return;
4876 }
4877 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4878
4879 /*
4880 * reuse message block
4881 */
4882 prim->type = T_ORDREL_IND;
4883 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4884 "tl_ordrel: send ordrel_ind"));
4885
4886 /*
4887 * send data to connected peer
4888 */
4889 putnext(peer_rq, mp);
4890 }
4891
4892
4893 /*
4894 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4895 */
4896 static void
4897 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4898 {
4899 size_t err_sz;
4900 tl_endpt_t *tep;
4901 struct T_unitdata_req *udreq;
4902 mblk_t *err_mp;
4903 t_scalar_t alen;
4904 t_scalar_t olen;
4905 struct T_uderror_ind *uderr;
4906 uchar_t *addr_startp;
4907
4908 err_sz = sizeof (struct T_uderror_ind);
4909 tep = (tl_endpt_t *)wq->q_ptr;
4910 udreq = (struct T_unitdata_req *)mp->b_rptr;
4911 alen = udreq->DEST_length;
4912 olen = udreq->OPT_length;
4913
4914 if (alen > 0)
4915 err_sz = T_ALIGN(err_sz + alen);
4916 if (olen > 0)
4917 err_sz += olen;
4918
4919 err_mp = allocb(err_sz, BPRI_MED);
4920 if (! err_mp) {
4921 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4922 "tl_uderr:allocb failure"));
4923 /*
4924 * Note: no rollback of state needed as it does
4925 * not change in connectionless transport
4926 */
4927 tl_memrecover(wq, mp, err_sz);
4928 return;
4929 }
4930
4931 DB_TYPE(err_mp) = M_PROTO;
4932 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4933 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4934 uderr->PRIM_type = T_UDERROR_IND;
4935 uderr->ERROR_type = err;
4936 uderr->DEST_length = alen;
4937 uderr->OPT_length = olen;
4938 if (alen <= 0) {
4939 uderr->DEST_offset = 0;
4940 } else {
4941 uderr->DEST_offset =
4942 (t_scalar_t)sizeof (struct T_uderror_ind);
4943 addr_startp = mp->b_rptr + udreq->DEST_offset;
4944 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4945 (size_t)alen);
4946 }
4947 if (olen <= 0) {
4948 uderr->OPT_offset = 0;
4949 } else {
4950 uderr->OPT_offset =
4951 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4952 uderr->DEST_length);
4953 addr_startp = mp->b_rptr + udreq->OPT_offset;
4954 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4955 (size_t)olen);
4956 }
4957 freemsg(mp);
4958
4959 /*
4960 * send indication message
4961 */
4962 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4963
4964 qreply(wq, err_mp);
4965 }
4966
4967 static void
4968 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4969 {
4970 queue_t *wq = tep->te_wq;
4971
4972 if (!tep->te_closing && (wq->q_first != NULL)) {
4973 TL_PUTQ(tep, mp);
4974 } else if (tep->te_rq != NULL)
4975 tl_unitdata(mp, tep);
4976 else
4977 freemsg(mp);
4978
4979 tl_serializer_exit(tep);
4980 tl_refrele(tep);
4981 }
4982
4983 /*
4984 * Handle T_unitdata_req.
4985 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4986 * If this is a socket pass through options unmodified.
4987 */
4988 static void
4989 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4990 {
4991 queue_t *wq = tep->te_wq;
4992 soux_addr_t ux_addr;
4993 tl_addr_t destaddr;
4994 uchar_t *addr_startp;
4995 tl_endpt_t *peer_tep;
4996 struct T_unitdata_ind *udind;
4997 struct T_unitdata_req *udreq;
4998 ssize_t msz, ui_sz;
4999 t_scalar_t alen, aoff, olen, ooff;
5000 t_scalar_t oldolen = 0;
5001 cred_t *cr = NULL;
5002 pid_t cpid;
5003
5004 udreq = (struct T_unitdata_req *)mp->b_rptr;
5005 msz = MBLKL(mp);
5006
5007 /*
5008 * validate the state
5009 */
5010 if (tep->te_state != TS_IDLE) {
5011 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5012 SL_TRACE|SL_ERROR,
5013 "tl_wput:T_CONN_REQ:out of state"));
5014 tl_merror(wq, mp, EPROTO);
5015 return;
5016 }
5017 /*
5018 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5019 * (state does not change on this event)
5020 */
5021
5022 /*
5023 * validate the message
5024 * Note: dereference fields in struct inside message only
5025 * after validating the message length.
5026 */
5027 if (msz < sizeof (struct T_unitdata_req)) {
5028 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5029 "tl_unitdata:invalid message length"));
5030 tl_merror(wq, mp, EINVAL);
5031 return;
5032 }
5033 alen = udreq->DEST_length;
5034 aoff = udreq->DEST_offset;
5035 oldolen = olen = udreq->OPT_length;
5036 ooff = udreq->OPT_offset;
5037 if (olen == 0)
5038 ooff = 0;
5039
5040 if (IS_SOCKET(tep)) {
5041 if ((alen != TL_SOUX_ADDRLEN) ||
5042 (aoff < 0) ||
5043 (aoff + alen > msz) ||
5044 (olen < 0) || (ooff < 0) ||
5045 ((olen > 0) && ((ooff + olen) > msz))) {
5046 (void) (STRLOG(TL_ID, tep->te_minor,
5047 1, SL_TRACE|SL_ERROR,
5048 "tl_unitdata_req: invalid socket addr "
5049 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5050 (int)msz, alen, aoff, olen, ooff));
5051 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5052 return;
5053 }
5054 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5055
5056 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5057 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5058 (void) (STRLOG(TL_ID, tep->te_minor,
5059 1, SL_TRACE|SL_ERROR,
5060 "tl_conn_req: invalid socket magic"));
5061 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5062 return;
5063 }
5064 } else {
5065 if ((alen < 0) ||
5066 (aoff < 0) ||
5067 ((alen > 0) && ((aoff + alen) > msz)) ||
5068 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5069 ((aoff + alen) < 0) ||
5070 ((olen > 0) && ((ooff + olen) > msz)) ||
5071 (olen < 0) ||
5072 (ooff < 0) ||
5073 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5074 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5075 SL_TRACE|SL_ERROR,
5076 "tl_unitdata:invalid unit data message"));
5077 tl_merror(wq, mp, EINVAL);
5078 return;
5079 }
5080 }
5081
5082 /* Options not supported unless it's a socket */
5083 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5084 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5085 "tl_unitdata:option use(unsupported) or zero len addr"));
5086 tl_uderr(wq, mp, EPROTO);
5087 return;
5088 }
5089 #ifdef DEBUG
5090 /*
5091 * Mild form of ASSERT()ion to detect broken TPI apps.
5092 * if (! assertion)
5093 * log warning;
5094 */
5095 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5096 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5097 "tl_unitdata:addr overlaps TPI message"));
5098 }
5099 #endif
5100 /*
5101 * get destination endpoint
5102 */
5103 destaddr.ta_alen = alen;
5104 destaddr.ta_abuf = mp->b_rptr + aoff;
5105 destaddr.ta_zoneid = tep->te_zoneid;
5106
5107 /*
5108 * Check whether the destination is the same that was used previously
5109 * and the destination endpoint is in the right state. If something is
5110 * wrong, find destination again and cache it.
5111 */
5112 peer_tep = tep->te_lastep;
5113
5114 if ((peer_tep == NULL) || peer_tep->te_closing ||
5115 (peer_tep->te_state != TS_IDLE) ||
5116 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5117 /*
5118 * Not the same as cached destination , need to find the right
5119 * destination.
5120 */
5121 peer_tep = (IS_SOCKET(tep) ?
5122 tl_sock_find_peer(tep, &ux_addr) :
5123 tl_find_peer(tep, &destaddr));
5124
5125 if (peer_tep == NULL) {
5126 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5127 SL_TRACE|SL_ERROR,
5128 "tl_unitdata:no one at destination address"));
5129 tl_uderr(wq, mp, ECONNRESET);
5130 return;
5131 }
5132
5133 /*
5134 * Cache the new peer.
5135 */
5136 if (tep->te_lastep != NULL)
5137 tl_refrele(tep->te_lastep);
5138
5139 tep->te_lastep = peer_tep;
5140 }
5141
5142 if (peer_tep->te_state != TS_IDLE) {
5143 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5144 "tl_unitdata:provider in invalid state"));
5145 tl_uderr(wq, mp, EPROTO);
5146 return;
5147 }
5148
5149 ASSERT(peer_tep->te_rq != NULL);
5150
5151 /*
5152 * Put it back if flow controlled except when we are closing.
5153 * Note: Messages already on queue when we are closing is bounded
5154 * so we can ignore flow control.
5155 */
5156 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5157 /* record what we are flow controlled on */
5158 if (tep->te_flowq != NULL) {
5159 list_remove(&tep->te_flowq->te_flowlist, tep);
5160 }
5161 list_insert_head(&peer_tep->te_flowlist, tep);
5162 tep->te_flowq = peer_tep;
5163 TL_PUTBQ(tep, mp);
5164 return;
5165 }
5166 /*
5167 * prepare indication message
5168 */
5169
5170 /*
5171 * calculate length of message
5172 */
5173 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5174 cr = msg_getcred(mp, &cpid);
5175 ASSERT(cr != NULL);
5176
5177 if (peer_tep->te_flag & TL_SETCRED) {
5178 ASSERT(olen == 0);
5179 olen = (t_scalar_t)sizeof (struct opthdr) +
5180 OPTLEN(sizeof (tl_credopt_t));
5181 /* 1 option only */
5182 } else if (peer_tep->te_flag & TL_SETUCRED) {
5183 ASSERT(olen == 0);
5184 olen = (t_scalar_t)sizeof (struct opthdr) +
5185 OPTLEN(ucredminsize(cr));
5186 /* 1 option only */
5187 } else {
5188 /* Possibly more than one option */
5189 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5190 OPTLEN(ucredminsize(cr));
5191 }
5192 }
5193
5194 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5195 olen;
5196 /*
5197 * If the unitdata_ind fits and we are not adding options
5198 * reuse the udreq mblk.
5199 */
5200 if (msz >= ui_sz && alen >= tep->te_alen &&
5201 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5202 /*
5203 * Reuse the original mblk. Leave options in place.
5204 */
5205 udind = (struct T_unitdata_ind *)mp->b_rptr;
5206 udind->PRIM_type = T_UNITDATA_IND;
5207 udind->SRC_length = tep->te_alen;
5208 addr_startp = mp->b_rptr + udind->SRC_offset;
5209 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5210 } else {
5211 /* Allocate a new T_unidata_ind message */
5212 mblk_t *ui_mp;
5213
5214 ui_mp = allocb(ui_sz, BPRI_MED);
5215 if (! ui_mp) {
5216 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5217 "tl_unitdata:allocb failure:message queued"));
5218 tl_memrecover(wq, mp, ui_sz);
5219 return;
5220 }
5221
5222 /*
5223 * fill in T_UNITDATA_IND contents
5224 */
5225 DB_TYPE(ui_mp) = M_PROTO;
5226 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5227 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5228 udind->PRIM_type = T_UNITDATA_IND;
5229 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5230 udind->SRC_length = tep->te_alen;
5231 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5232 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5233 udind->OPT_offset =
5234 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5235 udind->OPT_length = olen;
5236 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5237
5238 if (oldolen != 0) {
5239 bcopy((void *)((uintptr_t)udreq + ooff),
5240 (void *)((uintptr_t)udind +
5241 udind->OPT_offset),
5242 oldolen);
5243 }
5244 ASSERT(cr != NULL);
5245
5246 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5247 oldolen, cr, cpid,
5248 peer_tep->te_flag, peer_tep->te_credp);
5249 } else {
5250 bcopy((void *)((uintptr_t)udreq + ooff),
5251 (void *)((uintptr_t)udind + udind->OPT_offset),
5252 olen);
5253 }
5254
5255 /*
5256 * relink data blocks from mp to ui_mp
5257 */
5258 ui_mp->b_cont = mp->b_cont;
5259 freeb(mp);
5260 mp = ui_mp;
5261 }
5262 /*
5263 * send indication message
5264 */
5265 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5266 putnext(peer_tep->te_rq, mp);
5267 }
5268
5269
5270
5271 /*
5272 * Check if a given addr is in use.
5273 * Endpoint ptr returned or NULL if not found.
5274 * The name space is separate for each mode. This implies that
5275 * sockets get their own name space.
5276 */
5277 static tl_endpt_t *
5278 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5279 {
5280 tl_endpt_t *peer_tep = NULL;
5281 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5282 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5283
5284 ASSERT(! IS_SOCKET(tep));
5285
5286 ASSERT(ap != NULL && ap->ta_alen > 0);
5287 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5288 ASSERT(ap->ta_abuf != NULL);
5289 EQUIV(rc == 0, peer_tep != NULL);
5290 IMPLY(rc == 0,
5291 (tep->te_zoneid == peer_tep->te_zoneid) &&
5292 (tep->te_transport == peer_tep->te_transport));
5293
5294 if ((rc == 0) && (peer_tep->te_closing)) {
5295 tl_refrele(peer_tep);
5296 peer_tep = NULL;
5297 }
5298
5299 return (peer_tep);
5300 }
5301
5302 /*
5303 * Find peer for a socket based on unix domain address.
5304 * For implicit addresses our peer can be found by minor number in ai hash. For
5305 * explicit binds we look vnode address at addr_hash.
5306 */
5307 static tl_endpt_t *
5308 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5309 {
5310 tl_endpt_t *peer_tep = NULL;
5311 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5312 tep->te_aihash : tep->te_addrhash;
5313 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5314 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5315
5316 ASSERT(IS_SOCKET(tep));
5317 EQUIV(rc == 0, peer_tep != NULL);
5318 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5319
5320 if (peer_tep != NULL) {
5321 /* Don't attempt to use closing peer. */
5322 if (peer_tep->te_closing)
5323 goto errout;
5324
5325 /*
5326 * Cross-zone unix sockets are permitted, but for Trusted
5327 * Extensions only, the "server" for these must be in the
5328 * global zone.
5329 */
5330 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5331 is_system_labeled() &&
5332 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5333 goto errout;
5334 }
5335
5336 return (peer_tep);
5337
5338 errout:
5339 tl_refrele(peer_tep);
5340 return (NULL);
5341 }
5342
5343 /*
5344 * Generate a free addr and return it in struct pointed by ap
5345 * but allocating space for address buffer.
5346 * The generated address will be at least 4 bytes long and, if req->ta_alen
5347 * exceeds 4 bytes, be req->ta_alen bytes long.
5348 *
5349 * If address is found it will be inserted in the hash.
5350 *
5351 * If req->ta_alen is larger than the default alen (4 bytes) the last
5352 * alen-4 bytes will always be the same as in req.
5353 *
5354 * Return 0 for failure.
5355 * Return non-zero for success.
5356 */
5357 static boolean_t
5358 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5359 {
5360 t_scalar_t alen;
5361 uint32_t loopcnt; /* Limit loop to 2^32 */
5362
5363 ASSERT(tep->te_hash_hndl != NULL);
5364 ASSERT(! IS_SOCKET(tep));
5365
5366 if (tep->te_hash_hndl == NULL)
5367 return (B_FALSE);
5368
5369 /*
5370 * check if default addr is in use
5371 * if it is - bump it and try again
5372 */
5373 if (req == NULL) {
5374 alen = sizeof (uint32_t);
5375 } else {
5376 alen = max(req->ta_alen, sizeof (uint32_t));
5377 ASSERT(tep->te_zoneid == req->ta_zoneid);
5378 }
5379
5380 if (tep->te_alen < alen) {
5381 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5382
5383 /*
5384 * Not enough space in tep->ta_ap to hold the address,
5385 * allocate a bigger space.
5386 */
5387 if (abuf == NULL)
5388 return (B_FALSE);
5389
5390 if (tep->te_alen > 0)
5391 kmem_free(tep->te_abuf, tep->te_alen);
5392
5393 tep->te_alen = alen;
5394 tep->te_abuf = abuf;
5395 }
5396
5397 /* Copy in the address in req */
5398 if (req != NULL) {
5399 ASSERT(alen >= req->ta_alen);
5400 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5401 }
5402
5403 /*
5404 * First try minor number then try default addresses.
5405 */
5406 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5407
5408 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5409 if (mod_hash_insert_reserve(tep->te_addrhash,
5410 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5411 tep->te_hash_hndl) == 0) {
5412 /*
5413 * found free address
5414 */
5415 tep->te_flag |= TL_ADDRHASHED;
5416 tep->te_hash_hndl = NULL;
5417
5418 return (B_TRUE); /* successful return */
5419 }
5420 /*
5421 * Use default address.
5422 */
5423 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5424 atomic_add_32(&tep->te_defaddr, 1);
5425 }
5426
5427 /*
5428 * Failed to find anything.
5429 */
5430 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5431 "tl_get_any_addr:looped 2^32 times"));
5432 return (B_FALSE);
5433 }
5434
5435 /*
5436 * reallocb + set r/w ptrs to reflect size.
5437 */
5438 static mblk_t *
5439 tl_resizemp(mblk_t *mp, ssize_t new_size)
5440 {
5441 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5442 return (NULL);
5443
5444 mp->b_rptr = DB_BASE(mp);
5445 mp->b_wptr = mp->b_rptr + new_size;
5446 return (mp);
5447 }
5448
5449 static void
5450 tl_cl_backenable(tl_endpt_t *tep)
5451 {
5452 list_t *l = &tep->te_flowlist;
5453 tl_endpt_t *elp;
5454
5455 ASSERT(IS_CLTS(tep));
5456
5457 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5458 ASSERT(tep->te_ser == elp->te_ser);
5459 ASSERT(elp->te_flowq == tep);
5460 if (! elp->te_closing)
5461 TL_QENABLE(elp);
5462 elp->te_flowq = NULL;
5463 list_remove(l, elp);
5464 }
5465 }
5466
5467 /*
5468 * Unconnect endpoints.
5469 */
5470 static void
5471 tl_co_unconnect(tl_endpt_t *tep)
5472 {
5473 tl_endpt_t *peer_tep = tep->te_conp;
5474 tl_endpt_t *srv_tep = tep->te_oconp;
5475 list_t *l;
5476 tl_icon_t *tip;
5477 tl_endpt_t *cl_tep;
5478 mblk_t *d_mp;
5479
5480 ASSERT(IS_COTS(tep));
5481 /*
5482 * If our peer is closing, don't use it.
5483 */
5484 if ((peer_tep != NULL) && peer_tep->te_closing) {
5485 TL_UNCONNECT(tep->te_conp);
5486 peer_tep = NULL;
5487 }
5488 if ((srv_tep != NULL) && srv_tep->te_closing) {
5489 TL_UNCONNECT(tep->te_oconp);
5490 srv_tep = NULL;
5491 }
5492
5493 if (tep->te_nicon > 0) {
5494 l = &tep->te_iconp;
5495 /*
5496 * If incoming requests pending, change state
5497 * of clients on disconnect ind event and send
5498 * discon_ind pdu to modules above them
5499 * for server: all clients get disconnect
5500 */
5501
5502 while (tep->te_nicon > 0) {
5503 tip = list_head(l);
5504 cl_tep = tip->ti_tep;
5505
5506 if (cl_tep == NULL) {
5507 tl_freetip(tep, tip);
5508 continue;
5509 }
5510
5511 if (cl_tep->te_oconp != NULL) {
5512 ASSERT(cl_tep != cl_tep->te_oconp);
5513 TL_UNCONNECT(cl_tep->te_oconp);
5514 }
5515
5516 if (cl_tep->te_closing) {
5517 tl_freetip(tep, tip);
5518 continue;
5519 }
5520
5521 enableok(cl_tep->te_wq);
5522 TL_QENABLE(cl_tep);
5523 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5524 if (d_mp != NULL) {
5525 cl_tep->te_state = TS_IDLE;
5526 putnext(cl_tep->te_rq, d_mp);
5527 } else {
5528 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5529 SL_TRACE|SL_ERROR,
5530 "tl_co_unconnect:icmng: "
5531 "allocb failure"));
5532 }
5533 tl_freetip(tep, tip);
5534 }
5535 } else if (srv_tep != NULL) {
5536 /*
5537 * If outgoing request pending, change state
5538 * of server on discon ind event
5539 */
5540
5541 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5542 IS_COTSORD(srv_tep) &&
5543 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5544 /*
5545 * Queue ordrel_ind for server to be picked up
5546 * when the connection is accepted.
5547 */
5548 d_mp = tl_ordrel_ind_alloc();
5549 } else {
5550 /*
5551 * send discon_ind to server
5552 */
5553 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5554 }
5555 if (d_mp == NULL) {
5556 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5557 SL_TRACE|SL_ERROR,
5558 "tl_co_unconnect:outgoing:allocb failure"));
5559 TL_UNCONNECT(tep->te_oconp);
5560 goto discon_peer;
5561 }
5562
5563 /*
5564 * If this is a socket the T_DISCON_IND is queued with
5565 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5566 * from the list of pending connections.
5567 * Note that when te_oconp is set the peer better have
5568 * a t_connind_t for the client.
5569 */
5570 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5571 /*
5572 * Queue the disconnection message.
5573 */
5574 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5575 } else {
5576 tip = tl_icon_find(srv_tep, tep->te_seqno);
5577 if (tip == NULL) {
5578 freemsg(d_mp);
5579 } else {
5580 ASSERT(tep == tip->ti_tep);
5581 ASSERT(tep->te_ser == srv_tep->te_ser);
5582 /*
5583 * Delete tip from the server list.
5584 */
5585 if (srv_tep->te_nicon == 1) {
5586 srv_tep->te_state =
5587 NEXTSTATE(TE_DISCON_IND2,
5588 srv_tep->te_state);
5589 } else {
5590 srv_tep->te_state =
5591 NEXTSTATE(TE_DISCON_IND3,
5592 srv_tep->te_state);
5593 }
5594 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5595 T_DISCON_IND);
5596 putnext(srv_tep->te_rq, d_mp);
5597 tl_freetip(srv_tep, tip);
5598 }
5599 TL_UNCONNECT(tep->te_oconp);
5600 srv_tep = NULL;
5601 }
5602 } else if (peer_tep != NULL) {
5603 /*
5604 * unconnect existing connection
5605 * If connected, change state of peer on
5606 * discon ind event and send discon ind pdu
5607 * to module above it
5608 */
5609
5610 ASSERT(tep->te_ser == peer_tep->te_ser);
5611 if (IS_COTSORD(peer_tep) &&
5612 (peer_tep->te_state == TS_WIND_ORDREL ||
5613 peer_tep->te_state == TS_DATA_XFER)) {
5614 /*
5615 * send ordrel ind
5616 */
5617 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5618 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5619 peer_tep->te_state,
5620 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5621 d_mp = tl_ordrel_ind_alloc();
5622 if (! d_mp) {
5623 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5624 SL_TRACE|SL_ERROR,
5625 "tl_co_unconnect:connected:"
5626 "allocb failure"));
5627 /*
5628 * Continue with cleaning up peer as
5629 * this side may go away with the close
5630 */
5631 TL_QENABLE(peer_tep);
5632 goto discon_peer;
5633 }
5634 peer_tep->te_state =
5635 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5636
5637 putnext(peer_tep->te_rq, d_mp);
5638 /*
5639 * Handle flow control case. This will generate
5640 * a t_discon_ind message with reason 0 if there
5641 * is data queued on the write side.
5642 */
5643 TL_QENABLE(peer_tep);
5644 } else if (IS_COTSORD(peer_tep) &&
5645 peer_tep->te_state == TS_WREQ_ORDREL) {
5646 /*
5647 * Sent an ordrel_ind. We send a discon with
5648 * with error 0 to inform that the peer is gone.
5649 */
5650 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5651 SL_TRACE|SL_ERROR,
5652 "tl_co_unconnect: discon in state %d",
5653 tep->te_state));
5654 tl_discon_ind(peer_tep, 0);
5655 } else {
5656 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5657 SL_TRACE|SL_ERROR,
5658 "tl_co_unconnect: state %d", tep->te_state));
5659 tl_discon_ind(peer_tep, ECONNRESET);
5660 }
5661
5662 discon_peer:
5663 /*
5664 * Disconnect cross-pointers only for close
5665 */
5666 if (tep->te_closing) {
5667 peer_tep = tep->te_conp;
5668 TL_REMOVE_PEER(peer_tep->te_conp);
5669 TL_REMOVE_PEER(tep->te_conp);
5670 }
5671 }
5672 }
5673
5674 /*
5675 * Note: The following routine does not recover from allocb()
5676 * failures
5677 * The reason should be from the <sys/errno.h> space.
5678 */
5679 static void
5680 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5681 {
5682 mblk_t *d_mp;
5683
5684 if (tep->te_closing)
5685 return;
5686
5687 /*
5688 * flush the queues.
5689 */
5690 flushq(tep->te_rq, FLUSHDATA);
5691 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5692
5693 /*
5694 * send discon ind
5695 */
5696 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5697 if (! d_mp) {
5698 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5699 "tl_discon_ind:allocb failure"));
5700 return;
5701 }
5702 tep->te_state = TS_IDLE;
5703 putnext(tep->te_rq, d_mp);
5704 }
5705
5706 /*
5707 * Note: The following routine does not recover from allocb()
5708 * failures
5709 * The reason should be from the <sys/errno.h> space.
5710 */
5711 static mblk_t *
5712 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5713 {
5714 mblk_t *mp;
5715 struct T_discon_ind *tdi;
5716
5717 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5718 DB_TYPE(mp) = M_PROTO;
5719 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5720 tdi = (struct T_discon_ind *)mp->b_rptr;
5721 tdi->PRIM_type = T_DISCON_IND;
5722 tdi->DISCON_reason = reason;
5723 tdi->SEQ_number = seqnum;
5724 }
5725 return (mp);
5726 }
5727
5728
5729 /*
5730 * Note: The following routine does not recover from allocb()
5731 * failures
5732 */
5733 static mblk_t *
5734 tl_ordrel_ind_alloc(void)
5735 {
5736 mblk_t *mp;
5737 struct T_ordrel_ind *toi;
5738
5739 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5740 DB_TYPE(mp) = M_PROTO;
5741 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5742 toi = (struct T_ordrel_ind *)mp->b_rptr;
5743 toi->PRIM_type = T_ORDREL_IND;
5744 }
5745 return (mp);
5746 }
5747
5748
5749 /*
5750 * Lookup the seqno in the list of queued connections.
5751 */
5752 static tl_icon_t *
5753 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5754 {
5755 list_t *l = &tep->te_iconp;
5756 tl_icon_t *tip = list_head(l);
5757
5758 ASSERT(seqno != 0);
5759
5760 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5761 ;
5762
5763 return (tip);
5764 }
5765
5766 /*
5767 * Queue data for a given T_CONN_IND while verifying that redundant
5768 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5769 * Used when the originator of the connection closes.
5770 */
5771 static void
5772 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5773 {
5774 tl_icon_t *tip;
5775 mblk_t **mpp, *mp;
5776 int prim, nprim;
5777
5778 if (nmp->b_datap->db_type == M_PROTO)
5779 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5780 else
5781 nprim = -1; /* M_DATA */
5782
5783 tip = tl_icon_find(tep, seqno);
5784 if (tip == NULL) {
5785 freemsg(nmp);
5786 return;
5787 }
5788
5789 ASSERT(tip->ti_seqno != 0);
5790 mpp = &tip->ti_mp;
5791 while (*mpp != NULL) {
5792 mp = *mpp;
5793
5794 if (mp->b_datap->db_type == M_PROTO)
5795 prim = ((union T_primitives *)mp->b_rptr)->type;
5796 else
5797 prim = -1; /* M_DATA */
5798
5799 /*
5800 * Allow nothing after a T_DISCON_IND
5801 */
5802 if (prim == T_DISCON_IND) {
5803 freemsg(nmp);
5804 return;
5805 }
5806 /*
5807 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5808 */
5809 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5810 freemsg(nmp);
5811 return;
5812 }
5813 mpp = &(mp->b_next);
5814 }
5815 *mpp = nmp;
5816 }
5817
5818 /*
5819 * Verify if a certain TPI primitive exists on the connind queue.
5820 * Use prim -1 for M_DATA.
5821 * Return non-zero if found.
5822 */
5823 static boolean_t
5824 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5825 {
5826 tl_icon_t *tip = tl_icon_find(tep, seqno);
5827 boolean_t found = B_FALSE;
5828
5829 if (tip != NULL) {
5830 mblk_t *mp;
5831 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5832 found = (DB_TYPE(mp) == M_PROTO &&
5833 ((union T_primitives *)mp->b_rptr)->type == prim);
5834 }
5835 }
5836 return (found);
5837 }
5838
5839 /*
5840 * Send the b_next mblk chain that has accumulated before the connection
5841 * was accepted. Perform the necessary state transitions.
5842 */
5843 static void
5844 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5845 {
5846 mblk_t *mp;
5847 union T_primitives *primp;
5848
5849 if (tep->te_closing) {
5850 tl_icon_freemsgs(mpp);
5851 return;
5852 }
5853
5854 ASSERT(tep->te_state == TS_DATA_XFER);
5855 ASSERT(tep->te_rq->q_first == NULL);
5856
5857 while ((mp = *mpp) != NULL) {
5858 *mpp = mp->b_next;
5859 mp->b_next = NULL;
5860
5861 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5862 switch (DB_TYPE(mp)) {
5863 default:
5864 freemsg(mp);
5865 break;
5866 case M_DATA:
5867 putnext(tep->te_rq, mp);
5868 break;
5869 case M_PROTO:
5870 primp = (union T_primitives *)mp->b_rptr;
5871 switch (primp->type) {
5872 case T_UNITDATA_IND:
5873 case T_DATA_IND:
5874 case T_OPTDATA_IND:
5875 case T_EXDATA_IND:
5876 putnext(tep->te_rq, mp);
5877 break;
5878 case T_ORDREL_IND:
5879 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5880 tep->te_state);
5881 putnext(tep->te_rq, mp);
5882 break;
5883 case T_DISCON_IND:
5884 tep->te_state = TS_IDLE;
5885 putnext(tep->te_rq, mp);
5886 break;
5887 default:
5888 #ifdef DEBUG
5889 cmn_err(CE_PANIC,
5890 "tl_icon_sendmsgs: unknown primitive");
5891 #endif /* DEBUG */
5892 freemsg(mp);
5893 break;
5894 }
5895 break;
5896 }
5897 }
5898 }
5899
5900 /*
5901 * Free the b_next mblk chain that has accumulated before the connection
5902 * was accepted.
5903 */
5904 static void
5905 tl_icon_freemsgs(mblk_t **mpp)
5906 {
5907 mblk_t *mp;
5908
5909 while ((mp = *mpp) != NULL) {
5910 *mpp = mp->b_next;
5911 mp->b_next = NULL;
5912 freemsg(mp);
5913 }
5914 }
5915
5916 /*
5917 * Send M_ERROR
5918 * Note: assumes caller ensured enough space in mp or enough
5919 * memory available. Does not attempt recovery from allocb()
5920 * failures
5921 */
5922
5923 static void
5924 tl_merror(queue_t *wq, mblk_t *mp, int error)
5925 {
5926 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5927
5928 if (tep->te_closing) {
5929 freemsg(mp);
5930 return;
5931 }
5932
5933 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5934 SL_TRACE|SL_ERROR,
5935 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5936
5937 /*
5938 * flush all messages on queue. we are shutting
5939 * the stream down on fatal error
5940 */
5941 flushq(wq, FLUSHALL);
5942 if (IS_COTS(tep)) {
5943 /* connection oriented - unconnect endpoints */
5944 tl_co_unconnect(tep);
5945 }
5946 if (mp->b_cont) {
5947 freemsg(mp->b_cont);
5948 mp->b_cont = NULL;
5949 }
5950
5951 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5952 freemsg(mp);
5953 mp = allocb(1, BPRI_HI);
5954 if (!mp) {
5955 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5956 SL_TRACE|SL_ERROR,
5957 "tl_merror:M_PROTO: out of memory"));
5958 return;
5959 }
5960 }
5961 if (mp) {
5962 DB_TYPE(mp) = M_ERROR;
5963 mp->b_rptr = DB_BASE(mp);
5964 *mp->b_rptr = (char)error;
5965 mp->b_wptr = mp->b_rptr + sizeof (char);
5966 qreply(wq, mp);
5967 } else {
5968 (void) putnextctl1(tep->te_rq, M_ERROR, error);
5969 }
5970 }
5971
5972 static void
5973 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5974 {
5975 ASSERT(cr != NULL);
5976
5977 if (flag & TL_SETCRED) {
5978 struct opthdr *opt = (struct opthdr *)buf;
5979 tl_credopt_t *tlcred;
5980
5981 opt->level = TL_PROT_LEVEL;
5982 opt->name = TL_OPT_PEER_CRED;
5983 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5984
5985 tlcred = (tl_credopt_t *)(opt + 1);
5986 tlcred->tc_uid = crgetuid(cr);
5987 tlcred->tc_gid = crgetgid(cr);
5988 tlcred->tc_ruid = crgetruid(cr);
5989 tlcred->tc_rgid = crgetrgid(cr);
5990 tlcred->tc_suid = crgetsuid(cr);
5991 tlcred->tc_sgid = crgetsgid(cr);
5992 tlcred->tc_ngroups = crgetngroups(cr);
5993 } else if (flag & TL_SETUCRED) {
5994 struct opthdr *opt = (struct opthdr *)buf;
5995
5996 opt->level = TL_PROT_LEVEL;
5997 opt->name = TL_OPT_PEER_UCRED;
5998 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5999
6000 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6001 } else {
6002 struct T_opthdr *topt = (struct T_opthdr *)buf;
6003 ASSERT(flag & TL_SOCKUCRED);
6004
6005 topt->level = SOL_SOCKET;
6006 topt->name = SCM_UCRED;
6007 topt->len = ucredminsize(cr) + sizeof (*topt);
6008 topt->status = 0;
6009 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6010 }
6011 }
6012
6013 /* ARGSUSED */
6014 static int
6015 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6016 {
6017 /* no default value processed in protocol specific code currently */
6018 return (-1);
6019 }
6020
6021 /* ARGSUSED */
6022 static int
6023 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6024 {
6025 int len;
6026 tl_endpt_t *tep;
6027 int *valp;
6028
6029 tep = (tl_endpt_t *)wq->q_ptr;
6030
6031 len = 0;
6032
6033 /*
6034 * Assumes: option level and name sanity check done elsewhere
6035 */
6036
6037 switch (level) {
6038 case SOL_SOCKET:
6039 if (! IS_SOCKET(tep))
6040 break;
6041 switch (name) {
6042 case SO_RECVUCRED:
6043 len = sizeof (int);
6044 valp = (int *)ptr;
6045 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6046 break;
6047 default:
6048 break;
6049 }
6050 break;
6051 case TL_PROT_LEVEL:
6052 switch (name) {
6053 case TL_OPT_PEER_CRED:
6054 case TL_OPT_PEER_UCRED:
6055 /*
6056 * option not supposed to retrieved directly
6057 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6058 * when some internal flags set by other options
6059 * Direct retrieval always designed to fail(ignored)
6060 * for this option.
6061 */
6062 break;
6063 }
6064 }
6065 return (len);
6066 }
6067
6068 /* ARGSUSED */
6069 static int
6070 tl_set_opt(
6071 queue_t *wq,
6072 uint_t mgmt_flags,
6073 int level,
6074 int name,
6075 uint_t inlen,
6076 uchar_t *invalp,
6077 uint_t *outlenp,
6078 uchar_t *outvalp,
6079 void *thisdg_attrs,
6080 cred_t *cr)
6081 {
6082 int error;
6083 tl_endpt_t *tep;
6084
6085 tep = (tl_endpt_t *)wq->q_ptr;
6086
6087 error = 0; /* NOERROR */
6088
6089 /*
6090 * Assumes: option level and name sanity checks done elsewhere
6091 */
6092
6093 switch (level) {
6094 case SOL_SOCKET:
6095 if (! IS_SOCKET(tep)) {
6096 error = EINVAL;
6097 break;
6098 }
6099 /*
6100 * TBD: fill in other AF_UNIX socket options and then stop
6101 * returning error.
6102 */
6103 switch (name) {
6104 case SO_RECVUCRED:
6105 /*
6106 * We only support this for datagram sockets;
6107 * getpeerucred handles the connection oriented
6108 * transports.
6109 */
6110 if (! IS_CLTS(tep)) {
6111 error = EINVAL;
6112 break;
6113 }
6114 if (*(int *)invalp == 0)
6115 tep->te_flag &= ~TL_SOCKUCRED;
6116 else
6117 tep->te_flag |= TL_SOCKUCRED;
6118 break;
6119 default:
6120 error = EINVAL;
6121 break;
6122 }
6123 break;
6124 case TL_PROT_LEVEL:
6125 switch (name) {
6126 case TL_OPT_PEER_CRED:
6127 case TL_OPT_PEER_UCRED:
6128 /*
6129 * option not supposed to be set directly
6130 * Its value in initialized for each endpoint at
6131 * driver open time.
6132 * Direct setting always designed to fail for this
6133 * option.
6134 */
6135 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6136 SL_TRACE|SL_ERROR,
6137 "tl_set_opt: option is not supported"));
6138 error = EPROTO;
6139 break;
6140 }
6141 }
6142 return (error);
6143 }
6144
6145
6146 static void
6147 tl_timer(void *arg)
6148 {
6149 queue_t *wq = arg;
6150 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6151
6152 ASSERT(tep);
6153
6154 tep->te_timoutid = 0;
6155
6156 enableok(wq);
6157 /*
6158 * Note: can call wsrv directly here and save context switch
6159 * Consider change when qtimeout (not timeout) is active
6160 */
6161 qenable(wq);
6162 }
6163
6164 static void
6165 tl_buffer(void *arg)
6166 {
6167 queue_t *wq = arg;
6168 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6169
6170 ASSERT(tep);
6171
6172 tep->te_bufcid = 0;
6173 tep->te_nowsrv = B_FALSE;
6174
6175 enableok(wq);
6176 /*
6177 * Note: can call wsrv directly here and save context switch
6178 * Consider change when qbufcall (not bufcall) is active
6179 */
6180 qenable(wq);
6181 }
6182
6183 static void
6184 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6185 {
6186 tl_endpt_t *tep;
6187
6188 tep = (tl_endpt_t *)wq->q_ptr;
6189
6190 if (tep->te_closing) {
6191 freemsg(mp);
6192 return;
6193 }
6194 noenable(wq);
6195
6196 (void) insq(wq, wq->q_first, mp);
6197
6198 if (tep->te_bufcid || tep->te_timoutid) {
6199 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6200 "tl_memrecover:recover %p pending", (void *)wq));
6201 return;
6202 }
6203
6204 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6205 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6206 drv_usectohz(TL_BUFWAIT));
6207 }
6208 }
6209
6210 static void
6211 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6212 {
6213 ASSERT(tip->ti_seqno != 0);
6214
6215 if (tip->ti_mp != NULL) {
6216 tl_icon_freemsgs(&tip->ti_mp);
6217 tip->ti_mp = NULL;
6218 }
6219 if (tip->ti_tep != NULL) {
6220 tl_refrele(tip->ti_tep);
6221 tip->ti_tep = NULL;
6222 }
6223 list_remove(&tep->te_iconp, tip);
6224 kmem_free(tip, sizeof (tl_icon_t));
6225 tep->te_nicon--;
6226 }
6227
6228 /*
6229 * Remove address from address hash.
6230 */
6231 static void
6232 tl_addr_unbind(tl_endpt_t *tep)
6233 {
6234 tl_endpt_t *elp;
6235
6236 if (tep->te_flag & TL_ADDRHASHED) {
6237 if (IS_SOCKET(tep)) {
6238 (void) mod_hash_remove(tep->te_addrhash,
6239 (mod_hash_key_t)tep->te_vp,
6240 (mod_hash_val_t *)&elp);
6241 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6242 tep->te_magic = SOU_MAGIC_IMPLICIT;
6243 } else {
6244 (void) mod_hash_remove(tep->te_addrhash,
6245 (mod_hash_key_t)&tep->te_ap,
6246 (mod_hash_val_t *)&elp);
6247 (void) kmem_free(tep->te_abuf, tep->te_alen);
6248 tep->te_alen = -1;
6249 tep->te_abuf = NULL;
6250 }
6251 tep->te_flag &= ~TL_ADDRHASHED;
6252 }
6253 }