Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/tl.c
+++ new/usr/src/uts/common/io/tl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 28 * Copyright 2015 Joyent, Inc.
29 29 */
30 30
31 31 /*
32 32 * Multithreaded STREAMS Local Transport Provider.
33 33 *
34 34 * OVERVIEW
35 35 * ========
36 36 *
37 37 * This driver provides TLI as well as socket semantics. It provides
38 38 * connectionless, connection oriented, and connection oriented with orderly
39 39 * release transports for TLI and sockets. Each transport type has separate name
40 40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41 41 * this removes any name space conflicts when binding to socket style transport
42 42 * addresses.
43 43 *
44 44 * NOTE: There is one exception: Socket ticots and ticotsord transports share
45 45 * the same namespace. In fact, sockets always use ticotsord type transport.
46 46 *
47 47 * The driver mode is specified during open() by the minor number used for
48 48 * open.
49 49 *
50 50 * The sockets in addition have the following semantic differences:
51 51 * No support for passing up credentials (TL_SET[U]CRED).
52 52 *
53 53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54 54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55 55 * T_OPTDATA_IND.
56 56 *
57 57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58 58 * a T_CONN_RES is received from the acceptor. This means that a socket
59 59 * connect will complete before the peer has called accept.
60 60 *
61 61 *
62 62 * MULTITHREADING
63 63 * ==============
64 64 *
65 65 * The driver does not use STREAMS protection mechanisms. Instead it uses a
66 66 * generic "serializer" abstraction. Most of the operations are executed behind
67 67 * the serializer and are, essentially single-threaded. All functions executed
68 68 * behind the same serializer are strictly serialized. So if one thread calls
69 69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70 70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71 71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72 72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73 73 * same time.
74 74 *
75 75 * Connectionless transport use a single serializer per transport type (one for
76 76 * TLI and one for sockets. Connection-oriented transports use finer-grained
77 77 * serializers.
78 78 *
79 79 * All COTS-type endpoints start their life with private serializers. During
80 80 * connection request processing the endpoint serializer is switched to the
81 81 * listener's serializer and the rest of T_CONN_REQ processing is done on the
82 82 * listener serializer. During T_CONN_RES processing the eager serializer is
83 83 * switched from listener to acceptor serializer and after that point all
84 84 * processing for eager and acceptor happens on this serializer. To avoid races
85 85 * with endpoint closes while its serializer may be changing closes are blocked
86 86 * while serializers are manipulated.
87 87 *
88 88 * References accounting
89 89 * ---------------------
90 90 *
91 91 * Endpoints are reference counted and freed when the last reference is
92 92 * dropped. Functions within the serializer may access an endpoint state even
93 93 * after an endpoint closed. The te_closing being set on the endpoint indicates
94 94 * that the endpoint entered its close routine.
95 95 *
96 96 * One reference is held for each opened endpoint instance. The reference
97 97 * counter is incremented when the endpoint is linked to another endpoint and
98 98 * decremented when the link disappears. It is also incremented when the
99 99 * endpoint is found by the hash table lookup. This increment is atomic with the
100 100 * lookup itself and happens while the hash table read lock is held.
101 101 *
102 102 * Close synchronization
103 103 * ---------------------
104 104 *
105 105 * During close the endpoint as marked as closing using te_closing flag. It is
106 106 * usually enough to check for te_closing flag since all other state changes
107 107 * happen after this flag is set and the close entered serializer. Immediately
108 108 * after setting te_closing flag tl_close() enters serializer and waits until
109 109 * the callback finishes. This allows all functions called within serializer to
110 110 * simply check te_closing without any locks.
111 111 *
112 112 * Serializer management.
113 113 * ---------------------
114 114 *
115 115 * For COTS transports serializers are created when the endpoint is constructed
116 116 * and destroyed when the endpoint is destructed. CLTS transports use global
117 117 * serializers - one for sockets and one for TLI.
118 118 *
119 119 * COTS serializers have separate reference counts to deal with several
120 120 * endpoints sharing the same serializer. There is a subtle problem related to
121 121 * the serializer destruction. The serializer should never be destroyed by any
122 122 * function executed inside serializer. This means that close has to wait till
123 123 * all serializer activity for this endpoint is finished before it can drop the
124 124 * last reference on the endpoint (which may as well free the serializer). This
125 125 * is only relevant for COTS transports which manage serializers
126 126 * dynamically. For CLTS transports close may complete without waiting for all
127 127 * serializer activity to finish since serializer is only destroyed at driver
128 128 * detach time.
129 129 *
130 130 * COTS endpoints keep track of the number of outstanding requests on the
131 131 * serializer for the endpoint. The code handling accept() avoids changing
132 132 * client serializer if it has any pending messages on the serializer and
133 133 * instead moves acceptor to listener's serializer.
134 134 *
135 135 *
136 136 * Use of hash tables
137 137 * ------------------
138 138 *
139 139 * The driver uses modhash hash table implementation. Each transport uses two
140 140 * hash tables - one for finding endpoints by acceptor ID and another one for
141 141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142 142 * pair of hash tables since sockets only use TICOTSORD.
143 143 *
144 144 * All hash tables lookups increment a reference count for returned endpoints,
145 145 * so we may safely check the endpoint state even when the endpoint is removed
146 146 * from the hash by another thread immediately after it is found.
147 147 *
148 148 *
149 149 * CLOSE processing
150 150 * ================
151 151 *
152 152 * The driver enters serializer twice on close(). The close sequence is the
153 153 * following:
154 154 *
155 155 * 1) Wait until closing is safe (te_closewait becomes zero)
156 156 * This step is needed to prevent close during serializer switches. In most
157 157 * cases (close happening after connection establishment) te_closewait is
158 158 * zero.
159 159 * 1) Set te_closing.
160 160 * 2) Call tl_close_ser() within serializer and wait for it to complete.
161 161 *
162 162 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
163 163 * It also needs to clear write-side q_next pointers - this should be done
164 164 * before qprocsoff().
165 165 *
166 166 * This synchronous serializer entry during close is needed to ensure that
167 167 * the queue is valid everywhere inside the serializer.
168 168 *
169 169 * Note that in many cases close will execute tl_close_ser() synchronously,
170 170 * so it will not wait at all.
171 171 *
172 172 * 3) Calls qprocsoff().
173 173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174 174 * complete (for COTS transports). For CLTS transport there is no wait.
175 175 *
176 176 * tl_close_finish_ser() Finishes the close process and wakes up waiting
177 177 * close if there is any.
178 178 *
179 179 * Note that in most cases close will enter te_close_ser_finish()
180 180 * synchronously and will not wait at all.
181 181 *
182 182 *
183 183 * Flow Control
184 184 * ============
185 185 *
186 186 * The driver implements both read and write side service routines. No one calls
187 187 * putq() on the read queue. The read side service routine tl_rsrv() is called
188 188 * when the read side stream is back-enabled. It enters serializer synchronously
189 189 * (waits till serializer processing is complete). Within serializer it
190 190 * back-enables all endpoints blocked by the queue for connection-less
191 191 * transports and enables write side service processing for the peer for
192 192 * connection-oriented transports.
193 193 *
194 194 * Read and write side service routines use special mblk_sized space in the
195 195 * endpoint structure to enter perimeter.
196 196 *
197 197 * Write-side flow control
198 198 * -----------------------
199 199 *
200 200 * Write side flow control is a bit tricky. The driver needs to deal with two
201 201 * message queues - the explicit STREAMS message queue maintained by
202 202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203 203 * queues should be synchronized to preserve message ordering and should
204 204 * maintain a single order determined by the order in which messages enter
205 205 * tl_wput(). In order to maintain the ordering between these two queues the
206 206 * STREAMS queue is only manipulated within the serializer, so the ordering is
207 207 * provided by the serializer.
208 208 *
209 209 * Functions called from the tl_wsrv() sometimes may call putbq(). To
210 210 * immediately stop any further processing of the STREAMS message queues the
211 211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212 212 * side service processing stops when the flag is set.
213 213 *
214 214 * The tl_wsrv() function enters serializer synchronously and waits for it to
215 215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216 216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217 217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218 218 * always bounded by the amount of messages on the STREAMS queue at the time
219 219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220 220 * queue from another serialized entry which can't happen in parallel. This
221 221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222 222 * of it draining forever while writer places new messages on the STREAMS
223 223 * queue).
224 224 *
225 225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
226 226 *
227 227 *
228 228 * Unix Domain Sockets
229 229 * ===================
230 230 *
231 231 * The driver knows the structure of Unix Domain sockets addresses and treats
232 232 * them differently from generic TLI addresses. For sockets implicit binds are
233 233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234 234 * instead of using address length of zero. Explicit binds specify
235 235 * SOU_MAGIC_EXPLICIT as magic.
236 236 *
237 237 * For implicit binds we always use minor number as soua_vp part of the address
238 238 * and avoid any hash table lookups. This saves two hash tables lookups per
239 239 * anonymous bind.
240 240 *
241 241 * For explicit address we hash the vnode pointer instead of hashing the
242 242 * full-scale address+zone+length. Hashing by pointer is more efficient then
243 243 * hashing by the full address.
244 244 *
245 245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246 246 * tep structure, so it should be never freed.
247 247 *
248 248 * Also for sockets the driver always uses minor number as acceptor id.
249 249 *
250 250 * TPI VIOLATIONS
251 251 * --------------
252 252 *
253 253 * This driver violates TPI in several respects for Unix Domain Sockets:
254 254 *
255 255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256 256 * is requested and the endpoint is already in use. There is no point in
257 257 * generating an unused address since this address will be rejected by
258 258 * sockfs anyway. For implicit binds it always generates a new address
259 259 * (sets soua_vp to its minor number).
260 260 *
261 261 * 2) It always uses minor number as acceptor ID and never uses queue
262 262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263 263 * message and they do not use the queue pointer.
264 264 *
265 265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266 266 * followed by listen(). The listen() should be issued with non-zero
267 267 * backlog, so sotpi_listen() issues unbind request followed by bind
268 268 * request to the same address but with a non-zero qlen value. Both
269 269 * tl_bind() and tl_unbind() require write lock on the hash table to
270 270 * insert/remove the address. The driver does not remove the address from
271 271 * the hash for endpoints that are bound to the explicit address and have
272 272 * backlog of zero. During T_BIND_REQ processing if the address requested
273 273 * is equal to the address the endpoint already has it updates the backlog
274 274 * without reinserting the address in the hash table. This optimization
275 275 * avoids two hash table updates for each listener created. It always
276 276 * avoids the problem of a "stolen" address when another listener may use
277 277 * the same address between the unbind and bind and suddenly listen() fails
278 278 * because address is in use even though the bind() succeeded.
279 279 *
280 280 *
281 281 * CONNECTIONLESS TRANSPORTS
282 282 * =========================
283 283 *
284 284 * Connectionless transports all share the same serializer (one for TLI and one
285 285 * for Sockets). Functions executing behind serializer can check or modify state
286 286 * of any endpoint.
287 287 *
288 288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289 289 * te_lastep field. The next time X talks to some address A it checks whether A
290 290 * is the same as Y's address and if it is there is no need to lookup Y. If the
291 291 * address is different or the state of Y is not appropriate (e.g. closed or not
292 292 * idle) X does a lookup using tl_find_peer() and caches the new address.
293 293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294 294 * on the endpoint found.
295 295 *
296 296 * During close of endpoint Y it doesn't try to remove itself from other
297 297 * endpoints caches. They will detect that Y is gone and will search the peer
298 298 * endpoint again.
299 299 *
300 300 * Flow Control Handling.
301 301 * ----------------------
302 302 *
303 303 * Each connectionless endpoint keeps a list of endpoints which are
304 304 * flow-controlled by its queue. It also keeps a pointer to the queue which
305 305 * flow-controls itself. Whenever flow control releases for endpoint X it
306 306 * enables all queues from the list. During close it also back-enables everyone
307 307 * in the list. If X is flow-controlled when it is closing it removes it from
308 308 * the peers list.
309 309 *
310 310 * DATA STRUCTURES
311 311 * ===============
312 312 *
313 313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314 314 * endpoint state. For connection-oriented transports it has a keeps a list
315 315 * of pending connections (tl_icon_t). For connectionless transports it keeps a
316 316 * list of endpoints flow controlled by this one.
317 317 *
318 318 * Each transport type is represented by a per-transport data structure
319 319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320 320 * endpoint address hash tables for each transport. It also contains pointer to
321 321 * transport serializer for connectionless transports.
322 322 *
323 323 * Each endpoint keeps a link to its transport structure, so the code can find
324 324 * all per-transport information quickly.
325 325 */
326 326
327 327 #include <sys/types.h>
328 328 #include <sys/inttypes.h>
329 329 #include <sys/stream.h>
330 330 #include <sys/stropts.h>
331 331 #define _SUN_TPI_VERSION 2
332 332 #include <sys/tihdr.h>
333 333 #include <sys/strlog.h>
334 334 #include <sys/debug.h>
335 335 #include <sys/cred.h>
336 336 #include <sys/errno.h>
337 337 #include <sys/kmem.h>
338 338 #include <sys/id_space.h>
339 339 #include <sys/modhash.h>
340 340 #include <sys/mkdev.h>
341 341 #include <sys/tl.h>
342 342 #include <sys/stat.h>
343 343 #include <sys/conf.h>
344 344 #include <sys/modctl.h>
345 345 #include <sys/strsun.h>
346 346 #include <sys/socket.h>
347 347 #include <sys/socketvar.h>
348 348 #include <sys/sysmacros.h>
349 349 #include <sys/xti_xtiopt.h>
350 350 #include <sys/ddi.h>
351 351 #include <sys/sunddi.h>
352 352 #include <sys/zone.h>
353 353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
354 354 #include <inet/optcom.h>
355 355 #include <sys/strsubr.h>
356 356 #include <sys/ucred.h>
357 357 #include <sys/suntpi.h>
358 358 #include <sys/list.h>
359 359 #include <sys/serializer.h>
360 360
361 361 /*
362 362 * TBD List
363 363 * 14 Eliminate state changes through table
364 364 * 16. AF_UNIX socket options
365 365 * 17. connect() for ticlts
366 366 * 18. support for "netstat" to show AF_UNIX plus TLI local
367 367 * transport connections
368 368 * 21. sanity check to flushing on sending M_ERROR
369 369 */
370 370
371 371 /*
372 372 * CONSTANT DECLARATIONS
373 373 * --------------------
374 374 */
375 375
376 376 /*
377 377 * Local declarations
378 378 */
379 379 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
380 380
381 381 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
382 382 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
383 383 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
384 384 /*
385 385 * Hash tables size.
386 386 */
387 387 #define TL_HASH_SIZE 311
388 388
389 389 /*
390 390 * Definitions for module_info
391 391 */
392 392 #define TL_ID (104) /* module ID number */
393 393 #define TL_NAME "tl" /* module name */
394 394 #define TL_MINPSZ (0) /* min packet size */
395 395 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
396 396 #define TL_HIWAT (16*1024) /* hi water mark */
397 397 #define TL_LOWAT (256) /* lo water mark */
398 398 /*
399 399 * Definition of minor numbers/modes for new transport provider modes.
400 400 * We view the socket use as a separate mode to get a separate name space.
401 401 */
402 402 #define TL_TICOTS 0 /* connection oriented transport */
403 403 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
404 404 #define TL_TICLTS 2 /* connectionless transport */
405 405 #define TL_UNUSED 3
406 406 #define TL_SOCKET 4 /* Socket */
407 407 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
408 408 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
409 409 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
410 410
411 411 #define TL_MINOR_MASK 0x7
412 412 #define TL_MINOR_START (TL_TICLTS + 1)
413 413
414 414 /*
415 415 * LOCAL MACROS
416 416 */
417 417 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
418 418
419 419 /*
420 420 * EXTERNAL VARIABLE DECLARATIONS
421 421 * -----------------------------
422 422 */
423 423 /*
424 424 * state table defined in the OS space.c
425 425 */
426 426 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
427 427
428 428 /*
429 429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
430 430 */
431 431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432 432 static int tl_close(queue_t *, int, cred_t *);
433 433 static void tl_wput(queue_t *, mblk_t *);
434 434 static void tl_wsrv(queue_t *);
435 435 static void tl_rsrv(queue_t *);
436 436
437 437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438 438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439 439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
440 440
441 441
442 442 /*
443 443 * GLOBAL DATA STRUCTURES AND VARIABLES
444 444 * -----------------------------------
445 445 */
446 446
447 447 /*
448 448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449 449 * For now, we only manage the SO_RECVUCRED option but we also have
450 450 * harmless dummy options to make things work with some common code we access.
451 451 */
↓ open down ↓ |
451 lines elided |
↑ open up ↑ |
452 452 opdes_t tl_opt_arr[] = {
453 453 /* The SO_TYPE is needed for the hack below */
454 454 {
455 455 SO_TYPE,
456 456 SOL_SOCKET,
457 457 OA_R,
458 458 OA_R,
459 459 OP_NP,
460 460 0,
461 461 sizeof (t_scalar_t),
462 - 0
462 + { 0 }
463 463 },
464 464 {
465 465 SO_RECVUCRED,
466 466 SOL_SOCKET,
467 467 OA_RW,
468 468 OA_RW,
469 469 OP_NP,
470 470 0,
471 471 sizeof (int),
472 - 0
472 + { 0 }
473 473 }
474 474 };
475 475
476 476 /*
477 477 * Table of all supported levels
478 478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479 479 * any supported options so we need this info separately.
480 480 *
481 481 * This is needed only for topmost tpi providers.
482 482 */
483 483 optlevel_t tl_valid_levels_arr[] = {
484 484 XTI_GENERIC,
485 485 SOL_SOCKET,
486 486 TL_PROT_LEVEL
487 487 };
488 488
489 489 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
490 490 /*
491 491 * Current upper bound on the amount of space needed to return all options.
492 492 * Additional options with data size of sizeof(long) are handled automatically.
493 493 * Others need hand job.
494 494 */
495 495 #define TL_MAX_OPT_BUF_LEN \
496 496 ((A_CNT(tl_opt_arr) << 2) + \
497 497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
498 498 + 64 + sizeof (struct T_optmgmt_ack))
499 499
500 500 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
501 501
502 502 /*
503 503 * transport addr structure
504 504 */
505 505 typedef struct tl_addr {
506 506 zoneid_t ta_zoneid; /* Zone scope of address */
507 507 t_scalar_t ta_alen; /* length of abuf */
508 508 void *ta_abuf; /* the addr itself */
509 509 } tl_addr_t;
510 510
511 511 /*
512 512 * Refcounted version of serializer.
513 513 */
514 514 typedef struct tl_serializer {
515 515 uint_t ts_refcnt;
516 516 serializer_t *ts_serializer;
517 517 } tl_serializer_t;
518 518
519 519 /*
520 520 * Each transport type has a separate state.
521 521 * Per-transport state.
522 522 */
523 523 typedef struct tl_transport_state {
524 524 char *tr_name;
525 525 minor_t tr_minor;
526 526 uint32_t tr_defaddr;
527 527 mod_hash_t *tr_ai_hash;
528 528 mod_hash_t *tr_addr_hash;
529 529 tl_serializer_t *tr_serializer;
530 530 } tl_transport_state_t;
531 531
532 532 #define TL_DFADDR 0x1000
533 533
534 534 static tl_transport_state_t tl_transports[] = {
535 535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
542 542 };
543 543
544 544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
545 545
546 546 struct tl_endpt;
547 547 typedef struct tl_endpt tl_endpt_t;
548 548
549 549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
550 550
551 551 /*
552 552 * Data structure used to represent pending connects.
553 553 * Records enough information so that the connecting peer can close
554 554 * before the connection gets accepted.
555 555 */
556 556 typedef struct tl_icon {
557 557 list_node_t ti_node;
558 558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
559 559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
560 560 t_scalar_t ti_seqno; /* Sequence number */
561 561 } tl_icon_t;
562 562
563 563 typedef struct so_ux_addr soux_addr_t;
564 564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
565 565
566 566 /*
567 567 * Maximum number of unaccepted connection indications allowed per listener.
568 568 */
569 569 #define TL_MAXQLEN 4096
570 570 int tl_maxqlen = TL_MAXQLEN;
571 571
572 572 /*
573 573 * transport endpoint structure
574 574 */
575 575 struct tl_endpt {
576 576 queue_t *te_rq; /* stream read queue */
577 577 queue_t *te_wq; /* stream write queue */
578 578 uint32_t te_refcnt;
579 579 int32_t te_state; /* TPI state of endpoint */
580 580 minor_t te_minor; /* minor number */
581 581 #define te_seqno te_minor
582 582 uint_t te_flag; /* flag field */
583 583 boolean_t te_nowsrv;
584 584 tl_serializer_t *te_ser; /* Serializer to use */
585 585 #define te_serializer te_ser->ts_serializer
586 586
587 587 soux_addr_t te_uxaddr; /* Socket address */
588 588 #define te_magic te_uxaddr.soua_magic
589 589 #define te_vp te_uxaddr.soua_vp
590 590 tl_addr_t te_ap; /* addr bound to this endpt */
591 591 #define te_zoneid te_ap.ta_zoneid
592 592 #define te_alen te_ap.ta_alen
593 593 #define te_abuf te_ap.ta_abuf
594 594
595 595 tl_transport_state_t *te_transport;
596 596 #define te_addrhash te_transport->tr_addr_hash
597 597 #define te_aihash te_transport->tr_ai_hash
598 598 #define te_defaddr te_transport->tr_defaddr
599 599 cred_t *te_credp; /* endpoint user credentials */
600 600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
601 601
602 602 /*
603 603 * State specific for connection-oriented and connectionless transports.
604 604 */
605 605 union {
606 606 /* Connection-oriented state. */
607 607 struct {
608 608 t_uscalar_t _te_nicon; /* count of conn requests */
609 609 t_uscalar_t _te_qlen; /* max conn requests */
610 610 tl_endpt_t *_te_oconp; /* conn request pending */
611 611 tl_endpt_t *_te_conp; /* connected endpt */
612 612 #ifndef _ILP32
613 613 void *_te_pad;
614 614 #endif
615 615 list_t _te_iconp; /* list of conn ind. pending */
616 616 } _te_cots_state;
617 617 /* Connection-less state. */
618 618 struct {
619 619 tl_endpt_t *_te_lastep; /* last dest. endpoint */
620 620 tl_endpt_t *_te_flowq; /* flow controlled on whom */
621 621 list_node_t _te_flows; /* lists of connections */
622 622 list_t _te_flowlist; /* Who flowcontrols on me */
623 623 } _te_clts_state;
624 624 } _te_transport_state;
625 625 #define te_nicon _te_transport_state._te_cots_state._te_nicon
626 626 #define te_qlen _te_transport_state._te_cots_state._te_qlen
627 627 #define te_oconp _te_transport_state._te_cots_state._te_oconp
628 628 #define te_conp _te_transport_state._te_cots_state._te_conp
629 629 #define te_iconp _te_transport_state._te_cots_state._te_iconp
630 630 #define te_lastep _te_transport_state._te_clts_state._te_lastep
631 631 #define te_flowq _te_transport_state._te_clts_state._te_flowq
632 632 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
633 633 #define te_flows _te_transport_state._te_clts_state._te_flows
634 634
635 635 bufcall_id_t te_bufcid; /* outstanding bufcall id */
636 636 timeout_id_t te_timoutid; /* outstanding timeout id */
637 637 pid_t te_cpid; /* cached pid of endpoint */
638 638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
639 639 /*
640 640 * Pieces of the endpoint state needed for closing.
641 641 */
642 642 kmutex_t te_closelock;
643 643 kcondvar_t te_closecv;
644 644 uint8_t te_closing; /* The endpoint started closing */
645 645 uint8_t te_closewait; /* Wait in close until zero */
646 646 mblk_t te_closemp; /* for entering serializer on close */
647 647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
648 648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
649 649 kmutex_t te_srv_lock;
650 650 kcondvar_t te_srv_cv;
651 651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
652 652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
653 653 /*
654 654 * Pieces of the endpoint state needed for serializer transitions.
655 655 */
656 656 kmutex_t te_ser_lock; /* Protects the count below */
657 657 uint_t te_ser_count; /* Number of messages on serializer */
658 658 };
659 659
660 660 /*
661 661 * Flag values. Lower 4 bits specify that transport used.
662 662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663 663 * they allow to identify the endpoint more easily.
664 664 */
665 665 #define TL_LISTENER 0x00010 /* the listener endpoint */
666 666 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
667 667 #define TL_EAGER 0x00040 /* connecting endpoint */
668 668 #define TL_ACCEPTED 0x00080 /* accepted connection */
669 669 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
670 670 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
671 671 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
672 672 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
673 673 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
674 674 /*
675 675 * Boolean checks for the endpoint type.
676 676 */
677 677 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
678 678 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
679 679 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
680 680 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
681 681
682 682 /*
683 683 * Certain operations are always used together. These macros reduce the chance
684 684 * of missing a part of a combination.
685 685 */
686 686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687 687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
688 688
689 689 #define TL_PUTBQ(x, mp) { \
690 690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
691 691 (x)->te_nowsrv = B_TRUE; \
692 692 (void) putbq((x)->te_wq, mp); \
693 693 }
694 694
695 695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696 696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
697 697
698 698 /*
699 699 * STREAMS driver glue data structures.
700 700 */
701 701 static struct module_info tl_minfo = {
702 702 TL_ID, /* mi_idnum */
703 703 TL_NAME, /* mi_idname */
704 704 TL_MINPSZ, /* mi_minpsz */
705 705 TL_MAXPSZ, /* mi_maxpsz */
706 706 TL_HIWAT, /* mi_hiwat */
707 707 TL_LOWAT /* mi_lowat */
708 708 };
709 709
710 710 static struct qinit tl_rinit = {
711 711 NULL, /* qi_putp */
712 712 (int (*)())tl_rsrv, /* qi_srvp */
713 713 tl_open, /* qi_qopen */
714 714 tl_close, /* qi_qclose */
715 715 NULL, /* qi_qadmin */
716 716 &tl_minfo, /* qi_minfo */
717 717 NULL /* qi_mstat */
718 718 };
719 719
720 720 static struct qinit tl_winit = {
721 721 (int (*)())tl_wput, /* qi_putp */
722 722 (int (*)())tl_wsrv, /* qi_srvp */
723 723 NULL, /* qi_qopen */
724 724 NULL, /* qi_qclose */
725 725 NULL, /* qi_qadmin */
726 726 &tl_minfo, /* qi_minfo */
727 727 NULL /* qi_mstat */
728 728 };
729 729
730 730 static struct streamtab tlinfo = {
731 731 &tl_rinit, /* st_rdinit */
732 732 &tl_winit, /* st_wrinit */
733 733 NULL, /* st_muxrinit */
734 734 NULL /* st_muxwrinit */
735 735 };
736 736
737 737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
738 738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
739 739
740 740 static struct modldrv modldrv = {
↓ open down ↓ |
258 lines elided |
↑ open up ↑ |
741 741 &mod_driverops, /* Type of module -- pseudo driver here */
742 742 "TPI Local Transport (tl)",
743 743 &tl_devops, /* driver ops */
744 744 };
745 745
746 746 /*
747 747 * Module linkage information for the kernel.
748 748 */
749 749 static struct modlinkage modlinkage = {
750 750 MODREV_1,
751 - &modldrv,
752 - NULL
751 + { &modldrv, NULL }
753 752 };
754 753
755 754 /*
756 755 * Templates for response to info request
757 756 * Check sanity of unlimited connect data etc.
758 757 */
759 758
760 759 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
761 760 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
762 761
763 762 static struct T_info_ack tl_cots_info_ack =
764 763 {
765 764 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
766 765 T_INFINITE, /* TSDU size */
767 766 T_INFINITE, /* ETSDU size */
768 767 T_INFINITE, /* CDATA_size */
769 768 T_INFINITE, /* DDATA_size */
770 769 T_INFINITE, /* ADDR_size */
771 770 T_INFINITE, /* OPT_size */
772 771 0, /* TIDU_size - fill at run time */
773 772 T_COTS, /* SERV_type */
774 773 -1, /* CURRENT_state */
775 774 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
776 775 };
777 776
778 777 static struct T_info_ack tl_clts_info_ack =
779 778 {
780 779 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
781 780 0, /* TSDU_size - fill at run time */
782 781 -2, /* ETSDU_size -2 => not supported */
783 782 -2, /* CDATA_size -2 => not supported */
784 783 -2, /* DDATA_size -2 => not supported */
785 784 -1, /* ADDR_size -1 => infinite */
786 785 -1, /* OPT_size */
787 786 0, /* TIDU_size - fill at run time */
788 787 T_CLTS, /* SERV_type */
789 788 -1, /* CURRENT_state */
790 789 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
791 790 };
792 791
793 792 /*
794 793 * private copy of devinfo pointer used in tl_info
795 794 */
796 795 static dev_info_t *tl_dip;
797 796
798 797 /*
799 798 * Endpoints cache.
800 799 */
801 800 static kmem_cache_t *tl_cache;
802 801 /*
803 802 * Minor number space.
804 803 */
805 804 static id_space_t *tl_minors;
806 805
807 806 /*
808 807 * Default Data Unit size.
809 808 */
810 809 static t_scalar_t tl_tidusz;
811 810
812 811 /*
813 812 * Size of hash tables.
814 813 */
815 814 static size_t tl_hash_size = TL_HASH_SIZE;
816 815
817 816 /*
818 817 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819 818 * for sockets.
820 819 */
821 820 static int tl_disable_early_connect = 0;
822 821 static int tl_client_closing_when_accepting;
823 822
824 823 static int tl_serializer_noswitch;
825 824
826 825 /*
827 826 * LOCAL FUNCTION PROTOTYPES
828 827 * -------------------------
829 828 */
830 829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831 830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
832 831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833 832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834 833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835 834 t_scalar_t);
836 835 static void tl_bind(mblk_t *, tl_endpt_t *);
837 836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838 837 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
839 838 static void tl_unbind(mblk_t *, tl_endpt_t *);
840 839 static void tl_optmgmt(queue_t *, mblk_t *);
841 840 static void tl_conn_req(queue_t *, mblk_t *);
842 841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843 842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
844 843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
845 844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
846 845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
847 846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
848 847 static void tl_info_req(mblk_t *, tl_endpt_t *);
849 848 static void tl_addr_req(mblk_t *, tl_endpt_t *);
850 849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851 850 static void tl_data(mblk_t *, tl_endpt_t *);
852 851 static void tl_exdata(mblk_t *, tl_endpt_t *);
853 852 static void tl_ordrel(mblk_t *, tl_endpt_t *);
854 853 static void tl_unitdata(mblk_t *, tl_endpt_t *);
855 854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856 855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857 856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858 857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859 858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860 859 static void tl_cl_backenable(tl_endpt_t *);
861 860 static void tl_co_unconnect(tl_endpt_t *);
862 861 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863 862 static void tl_discon_ind(tl_endpt_t *, uint32_t);
864 863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865 864 static mblk_t *tl_ordrel_ind_alloc(void);
866 865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867 866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868 867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869 868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870 869 static void tl_icon_freemsgs(mblk_t **);
871 870 static void tl_merror(queue_t *, mblk_t *, int);
872 871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
873 872 static int tl_default_opt(queue_t *, int, int, uchar_t *);
874 873 static int tl_get_opt(queue_t *, int, int, uchar_t *);
875 874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
876 875 uchar_t *, void *, cred_t *);
877 876 static void tl_memrecover(queue_t *, mblk_t *, size_t);
878 877 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879 878 static void tl_free(tl_endpt_t *);
880 879 static int tl_constructor(void *, void *, int);
881 880 static void tl_destructor(void *, void *);
882 881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883 882 static tl_serializer_t *tl_serializer_alloc(int);
884 883 static void tl_serializer_refhold(tl_serializer_t *);
885 884 static void tl_serializer_refrele(tl_serializer_t *);
886 885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887 886 static void tl_serializer_exit(tl_endpt_t *);
888 887 static boolean_t tl_noclose(tl_endpt_t *);
889 888 static void tl_closeok(tl_endpt_t *);
890 889 static void tl_refhold(tl_endpt_t *);
891 890 static void tl_refrele(tl_endpt_t *);
892 891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893 892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894 893 static void tl_close_ser(mblk_t *, tl_endpt_t *);
895 894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896 895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897 896 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898 897 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899 898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900 899 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901 900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902 901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903 902 static void tl_addr_unbind(tl_endpt_t *);
904 903
905 904 /*
906 905 * Intialize option database object for TL
907 906 */
908 907
909 908 optdb_obj_t tl_opt_obj = {
910 909 tl_default_opt, /* TL default value function pointer */
911 910 tl_get_opt, /* TL get function pointer */
912 911 tl_set_opt, /* TL set function pointer */
913 912 TL_OPT_ARR_CNT, /* TL option database count of entries */
914 913 tl_opt_arr, /* TL option database */
915 914 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
916 915 tl_valid_levels_arr /* TL valid level array */
917 916 };
918 917
919 918 /*
920 919 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921 920 * ---------------------------------------
922 921 */
923 922
924 923 /*
925 924 * Loadable module routines
926 925 */
927 926 int
928 927 _init(void)
929 928 {
930 929 return (mod_install(&modlinkage));
931 930 }
932 931
933 932 int
934 933 _fini(void)
935 934 {
936 935 return (mod_remove(&modlinkage));
937 936 }
938 937
939 938 int
940 939 _info(struct modinfo *modinfop)
941 940 {
942 941 return (mod_info(&modlinkage, modinfop));
943 942 }
944 943
945 944 /*
946 945 * Driver Entry Points and Other routines
947 946 */
948 947 static int
949 948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
950 949 {
951 950 int i;
952 951 char name[32];
953 952
954 953 /*
955 954 * Resume from a checkpoint state.
956 955 */
957 956 if (cmd == DDI_RESUME)
958 957 return (DDI_SUCCESS);
959 958
960 959 if (cmd != DDI_ATTACH)
961 960 return (DDI_FAILURE);
962 961
963 962 /*
964 963 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
965 964 * streams message sizes can be unlimited. We use a defined constant
966 965 * instead.
967 966 */
968 967 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
969 968
970 969 /*
971 970 * Create subdevices for each transport.
972 971 */
973 972 for (i = 0; i < TL_UNUSED; i++) {
974 973 if (ddi_create_minor_node(devi,
975 974 tl_transports[i].tr_name,
976 975 S_IFCHR, tl_transports[i].tr_minor,
977 976 DDI_PSEUDO, NULL) == DDI_FAILURE) {
978 977 ddi_remove_minor_node(devi, NULL);
979 978 return (DDI_FAILURE);
980 979 }
981 980 }
982 981
983 982 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 983 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
985 984
986 985 if (tl_cache == NULL) {
987 986 ddi_remove_minor_node(devi, NULL);
988 987 return (DDI_FAILURE);
989 988 }
990 989
991 990 tl_minors = id_space_create("tl_minor_space",
992 991 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
993 992
994 993 /*
995 994 * Create ID space for minor numbers
996 995 */
997 996 for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 997 tl_transport_state_t *t = &tl_transports[i];
999 998
1000 999 if (i == TL_UNUSED)
1001 1000 continue;
1002 1001
1003 1002 /* Socket COTSORD shares namespace with COTS */
1004 1003 if (i == TL_SOCK_COTSORD) {
1005 1004 t->tr_ai_hash =
1006 1005 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 1006 ASSERT(t->tr_ai_hash != NULL);
1008 1007 t->tr_addr_hash =
1009 1008 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 1009 ASSERT(t->tr_addr_hash != NULL);
1011 1010 continue;
1012 1011 }
1013 1012
1014 1013 /*
1015 1014 * Create hash tables.
1016 1015 */
1017 1016 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 1017 t->tr_name);
1019 1018 #ifdef _ILP32
1020 1019 if (i & TL_SOCKET)
1021 1020 t->tr_ai_hash =
1022 1021 mod_hash_create_idhash(name, tl_hash_size - 1,
1023 1022 mod_hash_null_valdtor);
1024 1023 else
1025 1024 t->tr_ai_hash =
1026 1025 mod_hash_create_ptrhash(name, tl_hash_size,
1027 1026 mod_hash_null_valdtor, sizeof (queue_t));
1028 1027 #else
1029 1028 t->tr_ai_hash =
1030 1029 mod_hash_create_idhash(name, tl_hash_size - 1,
1031 1030 mod_hash_null_valdtor);
1032 1031 #endif /* _ILP32 */
1033 1032
1034 1033 if (i & TL_SOCKET) {
1035 1034 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 1035 t->tr_name);
1037 1036 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 1037 tl_hash_size, mod_hash_null_valdtor,
1039 1038 sizeof (uintptr_t));
1040 1039 } else {
1041 1040 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 1041 t->tr_name);
1043 1042 t->tr_addr_hash = mod_hash_create_extended(name,
1044 1043 tl_hash_size, mod_hash_null_keydtor,
1045 1044 mod_hash_null_valdtor,
1046 1045 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1047 1046 }
1048 1047
1049 1048 /* Create serializer for connectionless transports. */
1050 1049 if (i & TL_TICLTS)
1051 1050 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1052 1051 }
1053 1052
1054 1053 tl_dip = devi;
1055 1054
1056 1055 return (DDI_SUCCESS);
1057 1056 }
1058 1057
1059 1058 static int
1060 1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1061 1060 {
1062 1061 int i;
1063 1062
1064 1063 if (cmd == DDI_SUSPEND)
1065 1064 return (DDI_SUCCESS);
1066 1065
1067 1066 if (cmd != DDI_DETACH)
1068 1067 return (DDI_FAILURE);
1069 1068
1070 1069 /*
1071 1070 * Destroy arenas and hash tables.
1072 1071 */
1073 1072 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 1073 tl_transport_state_t *t = &tl_transports[i];
1075 1074
1076 1075 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 1076 continue;
1078 1077
1079 1078 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080 1079 if (t->tr_serializer != NULL) {
1081 1080 tl_serializer_refrele(t->tr_serializer);
1082 1081 t->tr_serializer = NULL;
1083 1082 }
1084 1083
1085 1084 #ifdef _ILP32
1086 1085 if (i & TL_SOCKET)
1087 1086 mod_hash_destroy_idhash(t->tr_ai_hash);
1088 1087 else
1089 1088 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 1089 #else
1091 1090 mod_hash_destroy_idhash(t->tr_ai_hash);
1092 1091 #endif /* _ILP32 */
1093 1092 t->tr_ai_hash = NULL;
1094 1093 if (i & TL_SOCKET)
1095 1094 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 1095 else
1097 1096 mod_hash_destroy_hash(t->tr_addr_hash);
1098 1097 t->tr_addr_hash = NULL;
1099 1098 }
1100 1099
1101 1100 kmem_cache_destroy(tl_cache);
1102 1101 tl_cache = NULL;
1103 1102 id_space_destroy(tl_minors);
1104 1103 tl_minors = NULL;
1105 1104 ddi_remove_minor_node(devi, NULL);
1106 1105 return (DDI_SUCCESS);
1107 1106 }
1108 1107
1109 1108 /* ARGSUSED */
1110 1109 static int
1111 1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1112 1111 {
1113 1112
1114 1113 int retcode = DDI_FAILURE;
1115 1114
1116 1115 switch (infocmd) {
1117 1116
1118 1117 case DDI_INFO_DEVT2DEVINFO:
1119 1118 if (tl_dip != NULL) {
1120 1119 *result = (void *)tl_dip;
1121 1120 retcode = DDI_SUCCESS;
1122 1121 }
1123 1122 break;
1124 1123
1125 1124 case DDI_INFO_DEVT2INSTANCE:
1126 1125 *result = (void *)0;
1127 1126 retcode = DDI_SUCCESS;
1128 1127 break;
1129 1128
1130 1129 default:
1131 1130 break;
1132 1131 }
1133 1132 return (retcode);
1134 1133 }
1135 1134
1136 1135 /*
1137 1136 * Endpoint reference management.
1138 1137 */
1139 1138 static void
1140 1139 tl_refhold(tl_endpt_t *tep)
1141 1140 {
1142 1141 atomic_inc_32(&tep->te_refcnt);
1143 1142 }
1144 1143
1145 1144 static void
1146 1145 tl_refrele(tl_endpt_t *tep)
1147 1146 {
1148 1147 ASSERT(tep->te_refcnt != 0);
1149 1148
1150 1149 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151 1150 tl_free(tep);
1152 1151 }
1153 1152
1154 1153 /*ARGSUSED*/
1155 1154 static int
1156 1155 tl_constructor(void *buf, void *cdrarg, int kmflags)
1157 1156 {
1158 1157 tl_endpt_t *tep = buf;
1159 1158
1160 1159 bzero(tep, sizeof (tl_endpt_t));
1161 1160 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 1161 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 1162 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 1163 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 1164 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1166 1165
1167 1166 return (0);
1168 1167 }
1169 1168
1170 1169 /*ARGSUSED*/
1171 1170 static void
1172 1171 tl_destructor(void *buf, void *cdrarg)
1173 1172 {
1174 1173 tl_endpt_t *tep = buf;
1175 1174
1176 1175 mutex_destroy(&tep->te_closelock);
1177 1176 cv_destroy(&tep->te_closecv);
1178 1177 mutex_destroy(&tep->te_srv_lock);
1179 1178 cv_destroy(&tep->te_srv_cv);
1180 1179 mutex_destroy(&tep->te_ser_lock);
1181 1180 }
1182 1181
1183 1182 static void
1184 1183 tl_free(tl_endpt_t *tep)
1185 1184 {
1186 1185 ASSERT(tep->te_refcnt == 0);
1187 1186 ASSERT(tep->te_transport != NULL);
1188 1187 ASSERT(tep->te_rq == NULL);
1189 1188 ASSERT(tep->te_wq == NULL);
1190 1189 ASSERT(tep->te_ser != NULL);
1191 1190 ASSERT(tep->te_ser_count == 0);
1192 1191 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1193 1192
1194 1193 if (IS_SOCKET(tep)) {
1195 1194 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 1195 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 1196 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 1197 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 1198 } else if (tep->te_abuf != NULL) {
1200 1199 kmem_free(tep->te_abuf, tep->te_alen);
1201 1200 tep->te_alen = -1; /* uninitialized */
1202 1201 tep->te_abuf = NULL;
1203 1202 } else {
1204 1203 ASSERT(tep->te_alen == -1);
1205 1204 }
1206 1205
1207 1206 id_free(tl_minors, tep->te_minor);
1208 1207 ASSERT(tep->te_credp == NULL);
1209 1208
1210 1209 if (tep->te_hash_hndl != NULL)
1211 1210 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1212 1211
1213 1212 if (IS_COTS(tep)) {
1214 1213 TL_REMOVE_PEER(tep->te_conp);
1215 1214 TL_REMOVE_PEER(tep->te_oconp);
1216 1215 tl_serializer_refrele(tep->te_ser);
1217 1216 tep->te_ser = NULL;
1218 1217 ASSERT(tep->te_nicon == 0);
1219 1218 ASSERT(list_head(&tep->te_iconp) == NULL);
1220 1219 } else {
1221 1220 ASSERT(tep->te_lastep == NULL);
1222 1221 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 1222 ASSERT(tep->te_flowq == NULL);
1224 1223 }
1225 1224
1226 1225 ASSERT(tep->te_bufcid == 0);
1227 1226 ASSERT(tep->te_timoutid == 0);
1228 1227 bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 1228 tep->te_acceptor_id = 0;
1230 1229
1231 1230 ASSERT(tep->te_closewait == 0);
1232 1231 ASSERT(!tep->te_rsrv_active);
1233 1232 ASSERT(!tep->te_wsrv_active);
1234 1233 tep->te_closing = 0;
1235 1234 tep->te_nowsrv = B_FALSE;
1236 1235 tep->te_flag = 0;
1237 1236
1238 1237 kmem_cache_free(tl_cache, tep);
1239 1238 }
1240 1239
1241 1240 /*
1242 1241 * Allocate/free reference-counted wrappers for serializers.
1243 1242 */
1244 1243 static tl_serializer_t *
1245 1244 tl_serializer_alloc(int flags)
1246 1245 {
1247 1246 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 1247 serializer_t *ser;
1249 1248
1250 1249 if (s == NULL)
1251 1250 return (NULL);
1252 1251
1253 1252 ser = serializer_create(flags);
1254 1253
1255 1254 if (ser == NULL) {
1256 1255 kmem_free(s, sizeof (tl_serializer_t));
1257 1256 return (NULL);
1258 1257 }
1259 1258
1260 1259 s->ts_refcnt = 1;
1261 1260 s->ts_serializer = ser;
1262 1261 return (s);
1263 1262 }
1264 1263
1265 1264 static void
1266 1265 tl_serializer_refhold(tl_serializer_t *s)
1267 1266 {
1268 1267 atomic_inc_32(&s->ts_refcnt);
1269 1268 }
1270 1269
1271 1270 static void
1272 1271 tl_serializer_refrele(tl_serializer_t *s)
1273 1272 {
1274 1273 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275 1274 serializer_destroy(s->ts_serializer);
1276 1275 kmem_free(s, sizeof (tl_serializer_t));
1277 1276 }
1278 1277 }
1279 1278
1280 1279 /*
1281 1280 * Post a request on the endpoint serializer. For COTS transports keep track of
1282 1281 * the number of pending requests.
1283 1282 */
1284 1283 static void
1285 1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1286 1285 {
1287 1286 if (IS_COTS(tep)) {
1288 1287 mutex_enter(&tep->te_ser_lock);
1289 1288 tep->te_ser_count++;
1290 1289 mutex_exit(&tep->te_ser_lock);
1291 1290 }
1292 1291 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1293 1292 }
1294 1293
1295 1294 /*
1296 1295 * Complete processing the request on the serializer. Decrement the counter for
1297 1296 * pending requests for COTS transports.
1298 1297 */
1299 1298 static void
1300 1299 tl_serializer_exit(tl_endpt_t *tep)
1301 1300 {
1302 1301 if (IS_COTS(tep)) {
1303 1302 mutex_enter(&tep->te_ser_lock);
1304 1303 ASSERT(tep->te_ser_count != 0);
1305 1304 tep->te_ser_count--;
1306 1305 mutex_exit(&tep->te_ser_lock);
1307 1306 }
1308 1307 }
1309 1308
1310 1309 /*
1311 1310 * Hash management functions.
1312 1311 */
1313 1312
1314 1313 /*
1315 1314 * Return TRUE if two addresses are equal, false otherwise.
1316 1315 */
1317 1316 static boolean_t
1318 1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1319 1318 {
1320 1319 return ((ap1->ta_alen > 0) &&
1321 1320 (ap1->ta_alen == ap2->ta_alen) &&
1322 1321 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 1322 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1324 1323 }
1325 1324
1326 1325 /*
1327 1326 * This function is called whenever an endpoint is found in the hash table.
1328 1327 */
1329 1328 /* ARGSUSED0 */
1330 1329 static void
1331 1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1332 1331 {
1333 1332 tl_refhold((tl_endpt_t *)val);
1334 1333 }
1335 1334
1336 1335 /*
1337 1336 * Address hash function.
1338 1337 */
1339 1338 /* ARGSUSED */
1340 1339 static uint_t
1341 1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1342 1341 {
1343 1342 tl_addr_t *ap = (tl_addr_t *)key;
1344 1343 size_t len = ap->ta_alen;
1345 1344 uchar_t *p = ap->ta_abuf;
1346 1345 uint_t i, g;
1347 1346
1348 1347 ASSERT((len > 0) && (p != NULL));
1349 1348
1350 1349 for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 1350 i = (i << 4) + (*p);
1352 1351 if ((g = (i & 0xf0000000U)) != 0) {
1353 1352 i ^= (g >> 24);
1354 1353 i ^= g;
1355 1354 }
1356 1355 }
1357 1356 return (i);
1358 1357 }
1359 1358
1360 1359 /*
1361 1360 * This function is used by hash lookups. It compares two generic addresses.
1362 1361 */
1363 1362 static int
1364 1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1365 1364 {
1366 1365 #ifdef DEBUG
1367 1366 tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 1367 tl_addr_t *ap2 = (tl_addr_t *)key2;
1369 1368
1370 1369 ASSERT(key1 != NULL);
1371 1370 ASSERT(key2 != NULL);
1372 1371
1373 1372 ASSERT(ap1->ta_abuf != NULL);
1374 1373 ASSERT(ap2->ta_abuf != NULL);
1375 1374 ASSERT(ap1->ta_alen > 0);
1376 1375 ASSERT(ap2->ta_alen > 0);
1377 1376 #endif
1378 1377
1379 1378 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1380 1379 }
1381 1380
1382 1381 /*
1383 1382 * Prevent endpoint from closing if possible.
1384 1383 * Return B_TRUE on success, B_FALSE on failure.
1385 1384 */
1386 1385 static boolean_t
1387 1386 tl_noclose(tl_endpt_t *tep)
1388 1387 {
1389 1388 boolean_t rc = B_FALSE;
1390 1389
1391 1390 mutex_enter(&tep->te_closelock);
1392 1391 if (! tep->te_closing) {
1393 1392 ASSERT(tep->te_closewait == 0);
1394 1393 tep->te_closewait++;
1395 1394 rc = B_TRUE;
1396 1395 }
1397 1396 mutex_exit(&tep->te_closelock);
1398 1397 return (rc);
1399 1398 }
1400 1399
1401 1400 /*
1402 1401 * Allow endpoint to close if needed.
1403 1402 */
1404 1403 static void
1405 1404 tl_closeok(tl_endpt_t *tep)
1406 1405 {
1407 1406 ASSERT(tep->te_closewait > 0);
1408 1407 mutex_enter(&tep->te_closelock);
1409 1408 ASSERT(tep->te_closewait == 1);
1410 1409 tep->te_closewait--;
1411 1410 cv_signal(&tep->te_closecv);
1412 1411 mutex_exit(&tep->te_closelock);
1413 1412 }
1414 1413
1415 1414 /*
1416 1415 * STREAMS open entry point.
1417 1416 */
1418 1417 /* ARGSUSED */
1419 1418 static int
1420 1419 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1421 1420 {
1422 1421 tl_endpt_t *tep;
1423 1422 minor_t minor = getminor(*devp);
1424 1423
1425 1424 /*
1426 1425 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 1426 * are illegal
1428 1427 */
1429 1428 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 1429 return (ENXIO);
1431 1430
1432 1431 if (rq->q_ptr != NULL)
1433 1432 return (0);
1434 1433
1435 1434 /* Minor number should specify the mode used for the driver. */
1436 1435 if ((minor >= TL_UNUSED))
1437 1436 return (ENXIO);
1438 1437
1439 1438 if (oflag & SO_SOCKSTR) {
1440 1439 minor |= TL_SOCKET;
1441 1440 }
1442 1441
1443 1442 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 1443 tep->te_refcnt = 1;
1445 1444 tep->te_cpid = curproc->p_pid;
1446 1445 rq->q_ptr = WR(rq)->q_ptr = tep;
1447 1446 tep->te_state = TS_UNBND;
1448 1447 tep->te_credp = credp;
1449 1448 crhold(credp);
1450 1449 tep->te_zoneid = getzoneid();
1451 1450
1452 1451 tep->te_flag = minor & TL_MINOR_MASK;
1453 1452 tep->te_transport = &tl_transports[minor];
1454 1453
1455 1454 /* Allocate a unique minor number for this instance. */
1456 1455 tep->te_minor = (minor_t)id_alloc(tl_minors);
1457 1456
1458 1457 /* Reserve hash handle for bind(). */
1459 1458 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1460 1459
1461 1460 /* Transport-specific initialization */
1462 1461 if (IS_COTS(tep)) {
1463 1462 /* Use private serializer */
1464 1463 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1465 1464
1466 1465 /* Create list for pending connections */
1467 1466 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 1467 offsetof(tl_icon_t, ti_node));
1469 1468 tep->te_qlen = 0;
1470 1469 tep->te_nicon = 0;
1471 1470 tep->te_oconp = NULL;
1472 1471 tep->te_conp = NULL;
1473 1472 } else {
1474 1473 /* Use shared serializer */
1475 1474 tep->te_ser = tep->te_transport->tr_serializer;
1476 1475 bzero(&tep->te_flows, sizeof (list_node_t));
1477 1476 /* Create list for flow control */
1478 1477 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 1478 offsetof(tl_endpt_t, te_flows));
1480 1479 tep->te_flowq = NULL;
1481 1480 tep->te_lastep = NULL;
1482 1481
1483 1482 }
1484 1483
1485 1484 /* Initialize endpoint address */
1486 1485 if (IS_SOCKET(tep)) {
1487 1486 /* Socket-specific address handling. */
1488 1487 tep->te_alen = TL_SOUX_ADDRLEN;
1489 1488 tep->te_abuf = &tep->te_uxaddr;
1490 1489 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 1490 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 1491 } else {
1493 1492 tep->te_alen = -1;
1494 1493 tep->te_abuf = NULL;
1495 1494 }
1496 1495
1497 1496 /* clone the driver */
1498 1497 *devp = makedevice(getmajor(*devp), tep->te_minor);
1499 1498
1500 1499 tep->te_rq = rq;
1501 1500 tep->te_wq = WR(rq);
1502 1501
1503 1502 #ifdef _ILP32
1504 1503 if (IS_SOCKET(tep))
1505 1504 tep->te_acceptor_id = tep->te_minor;
1506 1505 else
1507 1506 tep->te_acceptor_id = (t_uscalar_t)rq;
1508 1507 #else
1509 1508 tep->te_acceptor_id = tep->te_minor;
1510 1509 #endif /* _ILP32 */
1511 1510
1512 1511
1513 1512 qprocson(rq);
1514 1513
1515 1514 /*
1516 1515 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 1516 * insertion so insertion can't fail.
1518 1517 */
1519 1518 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 1519 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 1520 (mod_hash_val_t)tep);
1522 1521
1523 1522 return (0);
1524 1523 }
1525 1524
1526 1525 /* ARGSUSED1 */
1527 1526 static int
1528 1527 tl_close(queue_t *rq, int flag, cred_t *credp)
1529 1528 {
1530 1529 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 1530 tl_endpt_t *elp = NULL;
1532 1531 queue_t *wq = tep->te_wq;
1533 1532 int rc;
1534 1533
1535 1534 ASSERT(wq == WR(rq));
1536 1535
1537 1536 /*
1538 1537 * Remove the endpoint from acceptor hash.
1539 1538 */
1540 1539 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 1540 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 1541 (mod_hash_val_t *)&elp);
1543 1542 ASSERT(rc == 0 && tep == elp);
1544 1543 if ((rc != 0) || (tep != elp)) {
1545 1544 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1546 1545 SL_TRACE|SL_ERROR,
1547 1546 "tl_close:inconsistency in AI hash"));
1548 1547 }
1549 1548
1550 1549 /*
1551 1550 * Wait till close is safe, then mark endpoint as closing.
1552 1551 */
1553 1552 mutex_enter(&tep->te_closelock);
1554 1553 while (tep->te_closewait)
1555 1554 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 1555 tep->te_closing = B_TRUE;
1557 1556 /*
1558 1557 * Will wait for the serializer part of the close to finish, so set
1559 1558 * te_closewait now.
1560 1559 */
1561 1560 tep->te_closewait = 1;
1562 1561 tep->te_nowsrv = B_FALSE;
1563 1562 mutex_exit(&tep->te_closelock);
1564 1563
1565 1564 /*
1566 1565 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 1566 * It is safe because close will wait for tl_close_ser to finish.
1568 1567 */
1569 1568 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1570 1569
1571 1570 /*
1572 1571 * Wait for the first phase of close to complete before qprocsoff().
1573 1572 */
1574 1573 mutex_enter(&tep->te_closelock);
1575 1574 while (tep->te_closewait)
1576 1575 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 1576 mutex_exit(&tep->te_closelock);
1578 1577
1579 1578 qprocsoff(rq);
1580 1579
1581 1580 if (tep->te_bufcid) {
1582 1581 qunbufcall(rq, tep->te_bufcid);
1583 1582 tep->te_bufcid = 0;
1584 1583 }
1585 1584 if (tep->te_timoutid) {
1586 1585 (void) quntimeout(rq, tep->te_timoutid);
1587 1586 tep->te_timoutid = 0;
1588 1587 }
1589 1588
1590 1589 /*
1591 1590 * Finish close behind serializer.
1592 1591 *
1593 1592 * For a CLTS endpoint increase a refcount and continue close processing
1594 1593 * with serializer protection. This processing may happen asynchronously
1595 1594 * with the completion of tl_close().
1596 1595 *
1597 1596 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 1597 * may go away together with tep and we need to destroy serializer
1599 1598 * outside of serializer context.
1600 1599 */
1601 1600 ASSERT(tep->te_closewait == 0);
1602 1601 if (IS_COTS(tep))
1603 1602 tep->te_closewait = 1;
1604 1603 else
1605 1604 tl_refhold(tep);
1606 1605
1607 1606 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1608 1607
1609 1608 /*
1610 1609 * For connection-oriented transports wait for all serializer activity
1611 1610 * to settle down.
1612 1611 */
1613 1612 if (IS_COTS(tep)) {
1614 1613 mutex_enter(&tep->te_closelock);
1615 1614 while (tep->te_closewait)
1616 1615 cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 1616 mutex_exit(&tep->te_closelock);
1618 1617 }
1619 1618
1620 1619 crfree(tep->te_credp);
1621 1620 tep->te_credp = NULL;
1622 1621 tep->te_wq = NULL;
1623 1622 tl_refrele(tep);
1624 1623 /*
1625 1624 * tep is likely to be destroyed now, so can't reference it any more.
1626 1625 */
1627 1626
1628 1627 rq->q_ptr = wq->q_ptr = NULL;
1629 1628 return (0);
1630 1629 }
1631 1630
1632 1631 /*
1633 1632 * First phase of close processing done behind the serializer.
1634 1633 *
1635 1634 * Do not drop the reference in the end - tl_close() wants this reference to
1636 1635 * stay.
1637 1636 */
1638 1637 /* ARGSUSED0 */
1639 1638 static void
1640 1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1641 1640 {
1642 1641 ASSERT(tep->te_closing);
1643 1642 ASSERT(tep->te_closewait == 1);
1644 1643 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1645 1644
1646 1645 tep->te_flag |= TL_CLOSE_SER;
1647 1646
1648 1647 /*
1649 1648 * Drain out all messages on queue except for TL_TICOTS where the
1650 1649 * abortive release semantics permit discarding of data on close
1651 1650 */
1652 1651 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 1652 tl_wsrv_ser(NULL, tep);
1654 1653 }
1655 1654
1656 1655 /* Remove address from hash table. */
1657 1656 tl_addr_unbind(tep);
1658 1657 /*
1659 1658 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 1659 * queue of the driver, so clear these before qprocsoff() is called.
1661 1660 * Also clear q_next for the peer since this queue is going away.
1662 1661 */
1663 1662 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 1663 tl_endpt_t *peer_tep = tep->te_conp;
1665 1664
1666 1665 tep->te_wq->q_next = NULL;
1667 1666 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 1667 peer_tep->te_wq->q_next = NULL;
1669 1668 }
1670 1669
1671 1670 tep->te_rq = NULL;
1672 1671
1673 1672 /* wake up tl_close() */
1674 1673 tl_closeok(tep);
1675 1674 tl_serializer_exit(tep);
1676 1675 }
1677 1676
1678 1677 /*
1679 1678 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680 1679 * the reference for CLTS.
1681 1680 *
1682 1681 * Called from serializer. Should drop reference count for CLTS only.
1683 1682 */
1684 1683 /* ARGSUSED0 */
1685 1684 static void
1686 1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1687 1686 {
1688 1687 ASSERT(tep->te_closing);
1689 1688 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 1689 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1691 1690
1692 1691 tep->te_state = -1; /* Uninitialized */
1693 1692 if (IS_COTS(tep)) {
1694 1693 tl_co_unconnect(tep);
1695 1694 } else {
1696 1695 /* Connectionless specific cleanup */
1697 1696 TL_REMOVE_PEER(tep->te_lastep);
1698 1697 /*
1699 1698 * Backenable anybody that is flow controlled waiting for
1700 1699 * this endpoint.
1701 1700 */
1702 1701 tl_cl_backenable(tep);
1703 1702 if (tep->te_flowq != NULL) {
1704 1703 list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 1704 tep->te_flowq = NULL;
1706 1705 }
1707 1706 }
1708 1707
1709 1708 tl_serializer_exit(tep);
1710 1709 if (IS_COTS(tep))
1711 1710 tl_closeok(tep);
1712 1711 else
1713 1712 tl_refrele(tep);
1714 1713 }
1715 1714
1716 1715 /*
1717 1716 * STREAMS write-side put procedure.
1718 1717 * Enter serializer for most of the processing.
1719 1718 *
1720 1719 * The T_CONN_REQ is processed outside of serializer.
1721 1720 */
1722 1721 static void
1723 1722 tl_wput(queue_t *wq, mblk_t *mp)
1724 1723 {
1725 1724 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1726 1725 ssize_t msz = MBLKL(mp);
1727 1726 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1728 1727 tlproc_t *tl_proc = NULL;
1729 1728
1730 1729 switch (DB_TYPE(mp)) {
1731 1730 case M_DATA:
1732 1731 /* Only valid for connection-oriented transports */
1733 1732 if (IS_CLTS(tep)) {
1734 1733 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1735 1734 SL_TRACE|SL_ERROR,
1736 1735 "tl_wput:M_DATA invalid for ticlts driver"));
1737 1736 tl_merror(wq, mp, EPROTO);
1738 1737 return;
1739 1738 }
1740 1739 tl_proc = tl_wput_data_ser;
1741 1740 break;
1742 1741
1743 1742 case M_IOCTL:
1744 1743 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 1744 case TL_IOC_CREDOPT:
1746 1745 /* FALLTHROUGH */
1747 1746 case TL_IOC_UCREDOPT:
1748 1747 /*
1749 1748 * Serialize endpoint state change.
1750 1749 */
1751 1750 tl_proc = tl_do_ioctl_ser;
1752 1751 break;
1753 1752
1754 1753 default:
1755 1754 miocnak(wq, mp, 0, EINVAL);
1756 1755 return;
1757 1756 }
1758 1757 break;
1759 1758
1760 1759 case M_FLUSH:
1761 1760 /*
1762 1761 * do canonical M_FLUSH processing
1763 1762 */
1764 1763 if (*mp->b_rptr & FLUSHW) {
1765 1764 flushq(wq, FLUSHALL);
1766 1765 *mp->b_rptr &= ~FLUSHW;
1767 1766 }
1768 1767 if (*mp->b_rptr & FLUSHR) {
1769 1768 flushq(RD(wq), FLUSHALL);
1770 1769 qreply(wq, mp);
1771 1770 } else {
1772 1771 freemsg(mp);
1773 1772 }
1774 1773 return;
1775 1774
1776 1775 case M_PROTO:
1777 1776 if (msz < sizeof (prim->type)) {
1778 1777 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1779 1778 SL_TRACE|SL_ERROR,
1780 1779 "tl_wput:M_PROTO data too short"));
1781 1780 tl_merror(wq, mp, EPROTO);
1782 1781 return;
1783 1782 }
1784 1783 switch (prim->type) {
1785 1784 case T_OPTMGMT_REQ:
1786 1785 case T_SVR4_OPTMGMT_REQ:
1787 1786 /*
1788 1787 * Process TPI option management requests immediately
1789 1788 * in put procedure regardless of in-order processing
1790 1789 * of already queued messages.
1791 1790 * (Note: This driver supports AF_UNIX socket
1792 1791 * implementation. Unless we implement this processing,
1793 1792 * setsockopt() on socket endpoint will block on flow
1794 1793 * controlled endpoints which it should not. That is
1795 1794 * required for successful execution of VSU socket tests
1796 1795 * and is consistent with BSD socket behavior).
1797 1796 */
1798 1797 tl_optmgmt(wq, mp);
1799 1798 return;
1800 1799 case O_T_BIND_REQ:
1801 1800 case T_BIND_REQ:
1802 1801 tl_proc = tl_bind_ser;
1803 1802 break;
1804 1803 case T_CONN_REQ:
1805 1804 if (IS_CLTS(tep)) {
1806 1805 tl_merror(wq, mp, EPROTO);
1807 1806 return;
1808 1807 }
1809 1808 tl_conn_req(wq, mp);
1810 1809 return;
1811 1810 case T_DATA_REQ:
1812 1811 case T_OPTDATA_REQ:
1813 1812 case T_EXDATA_REQ:
1814 1813 case T_ORDREL_REQ:
1815 1814 tl_proc = tl_putq_ser;
1816 1815 break;
1817 1816 case T_UNITDATA_REQ:
1818 1817 if (IS_COTS(tep) ||
1819 1818 (msz < sizeof (struct T_unitdata_req))) {
1820 1819 tl_merror(wq, mp, EPROTO);
1821 1820 return;
1822 1821 }
1823 1822 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 1823 tl_proc = tl_unitdata_ser;
1825 1824 } else {
1826 1825 tl_proc = tl_putq_ser;
1827 1826 }
1828 1827 break;
1829 1828 default:
1830 1829 /*
1831 1830 * process in service procedure if message already
1832 1831 * queued (maintain in-order processing)
1833 1832 */
1834 1833 if (wq->q_first != NULL) {
1835 1834 tl_proc = tl_putq_ser;
1836 1835 } else {
1837 1836 tl_proc = tl_wput_ser;
1838 1837 }
1839 1838 break;
1840 1839 }
1841 1840 break;
1842 1841
1843 1842 case M_PCPROTO:
1844 1843 /*
1845 1844 * Check that the message has enough data to figure out TPI
1846 1845 * primitive.
1847 1846 */
1848 1847 if (msz < sizeof (prim->type)) {
1849 1848 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1850 1849 SL_TRACE|SL_ERROR,
1851 1850 "tl_wput:M_PCROTO data too short"));
1852 1851 tl_merror(wq, mp, EPROTO);
1853 1852 return;
1854 1853 }
1855 1854 switch (prim->type) {
1856 1855 case T_CAPABILITY_REQ:
1857 1856 tl_capability_req(mp, tep);
1858 1857 return;
1859 1858 case T_INFO_REQ:
1860 1859 tl_proc = tl_info_req_ser;
1861 1860 break;
1862 1861 case T_ADDR_REQ:
1863 1862 tl_proc = tl_addr_req_ser;
1864 1863 break;
1865 1864
1866 1865 default:
1867 1866 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1868 1867 SL_TRACE|SL_ERROR,
1869 1868 "tl_wput:unknown TPI msg primitive"));
1870 1869 tl_merror(wq, mp, EPROTO);
1871 1870 return;
1872 1871 }
1873 1872 break;
1874 1873 default:
1875 1874 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1876 1875 "tl_wput:default:unexpected Streams message"));
1877 1876 freemsg(mp);
1878 1877 return;
1879 1878 }
1880 1879
1881 1880 /*
1882 1881 * Continue processing via serializer.
1883 1882 */
1884 1883 ASSERT(tl_proc != NULL);
1885 1884 tl_refhold(tep);
1886 1885 tl_serializer_enter(tep, tl_proc, mp);
1887 1886 }
1888 1887
1889 1888 /*
1890 1889 * Place message on the queue while preserving order.
1891 1890 */
1892 1891 static void
1893 1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1894 1893 {
1895 1894 if (tep->te_closing) {
1896 1895 tl_wput_ser(mp, tep);
1897 1896 } else {
1898 1897 TL_PUTQ(tep, mp);
1899 1898 tl_serializer_exit(tep);
1900 1899 tl_refrele(tep);
1901 1900 }
1902 1901
1903 1902 }
1904 1903
1905 1904 static void
1906 1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1907 1906 {
1908 1907 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1909 1908
1910 1909 switch (DB_TYPE(mp)) {
1911 1910 case M_DATA:
1912 1911 tl_data(mp, tep);
1913 1912 break;
1914 1913 case M_PROTO:
1915 1914 tl_do_proto(mp, tep);
1916 1915 break;
1917 1916 default:
1918 1917 freemsg(mp);
1919 1918 break;
1920 1919 }
1921 1920 }
1922 1921
1923 1922 /*
1924 1923 * Write side put procedure called from serializer.
1925 1924 */
1926 1925 static void
1927 1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1928 1927 {
1929 1928 tl_wput_common_ser(mp, tep);
1930 1929 tl_serializer_exit(tep);
1931 1930 tl_refrele(tep);
1932 1931 }
1933 1932
1934 1933 /*
1935 1934 * M_DATA processing. Called from serializer.
1936 1935 */
1937 1936 static void
1938 1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1939 1938 {
1940 1939 tl_endpt_t *peer_tep = tep->te_conp;
1941 1940 queue_t *peer_rq;
1942 1941
1943 1942 ASSERT(DB_TYPE(mp) == M_DATA);
1944 1943 ASSERT(IS_COTS(tep));
1945 1944
1946 1945 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1947 1946
1948 1947 /*
1949 1948 * fastpath for data. Ignore flow control if tep is closing.
1950 1949 */
1951 1950 if ((peer_tep != NULL) &&
1952 1951 !peer_tep->te_closing &&
1953 1952 ((tep->te_state == TS_DATA_XFER) ||
1954 1953 (tep->te_state == TS_WREQ_ORDREL)) &&
1955 1954 (tep->te_wq != NULL) &&
1956 1955 (tep->te_wq->q_first == NULL) &&
1957 1956 ((peer_tep->te_state == TS_DATA_XFER) ||
1958 1957 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1959 1958 ((peer_rq = peer_tep->te_rq) != NULL) &&
1960 1959 (canputnext(peer_rq) || tep->te_closing)) {
1961 1960 putnext(peer_rq, mp);
1962 1961 } else if (tep->te_closing) {
1963 1962 /*
1964 1963 * It is possible that by the time we got here tep started to
1965 1964 * close. If the write queue is not empty, and the state is
1966 1965 * TS_DATA_XFER the data should be delivered in order, so we
1967 1966 * call putq() instead of freeing the data.
1968 1967 */
1969 1968 if ((tep->te_wq != NULL) &&
1970 1969 ((tep->te_state == TS_DATA_XFER) ||
1971 1970 (tep->te_state == TS_WREQ_ORDREL))) {
1972 1971 TL_PUTQ(tep, mp);
1973 1972 } else {
1974 1973 freemsg(mp);
1975 1974 }
1976 1975 } else {
1977 1976 TL_PUTQ(tep, mp);
1978 1977 }
1979 1978
1980 1979 tl_serializer_exit(tep);
1981 1980 tl_refrele(tep);
1982 1981 }
1983 1982
1984 1983 /*
1985 1984 * Write side service routine.
1986 1985 *
1987 1986 * All actual processing happens within serializer which is entered
1988 1987 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1989 1988 * messages that need processing may have arrived, so tl_wsrv repeats until
1990 1989 * queue is empty or te_nowsrv is set.
1991 1990 */
1992 1991 static void
1993 1992 tl_wsrv(queue_t *wq)
1994 1993 {
1995 1994 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1996 1995
1997 1996 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1998 1997 mutex_enter(&tep->te_srv_lock);
1999 1998 ASSERT(tep->te_wsrv_active == B_FALSE);
2000 1999 tep->te_wsrv_active = B_TRUE;
2001 2000 mutex_exit(&tep->te_srv_lock);
2002 2001
2003 2002 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2004 2003
2005 2004 /*
2006 2005 * Wait for serializer job to complete.
2007 2006 */
2008 2007 mutex_enter(&tep->te_srv_lock);
2009 2008 while (tep->te_wsrv_active) {
2010 2009 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2011 2010 }
2012 2011 cv_signal(&tep->te_srv_cv);
2013 2012 mutex_exit(&tep->te_srv_lock);
2014 2013 }
2015 2014 }
2016 2015
2017 2016 /*
2018 2017 * Serialized write side processing of the STREAMS queue.
2019 2018 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2020 2019 * is NULL.
2021 2020 */
2022 2021 static void
2023 2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2024 2023 {
2025 2024 mblk_t *mp;
2026 2025 queue_t *wq = tep->te_wq;
2027 2026
2028 2027 ASSERT(wq != NULL);
2029 2028 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2030 2029 tl_wput_common_ser(mp, tep);
2031 2030 }
2032 2031
2033 2032 /*
2034 2033 * Wakeup service routine unless called from close.
2035 2034 * If ser_mp is specified, the caller is tl_wsrv().
2036 2035 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2037 2036 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2038 2037 * be no matching tl_serializer_exit() in this case.
2039 2038 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2040 2039 * waiting on te_srv_cv.
2041 2040 */
2042 2041 if (ser_mp != NULL) {
2043 2042 /*
2044 2043 * We are called from tl_wsrv.
2045 2044 */
2046 2045 mutex_enter(&tep->te_srv_lock);
2047 2046 ASSERT(tep->te_wsrv_active);
2048 2047 tep->te_wsrv_active = B_FALSE;
2049 2048 cv_signal(&tep->te_srv_cv);
2050 2049 mutex_exit(&tep->te_srv_lock);
2051 2050 tl_serializer_exit(tep);
2052 2051 }
2053 2052 }
2054 2053
2055 2054 /*
2056 2055 * Called when the stream is backenabled. Enter serializer and qenable everyone
2057 2056 * flow controlled by tep.
2058 2057 *
2059 2058 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2060 2059 * is possible that two instances of tl_rsrv will be running reusing the same
2061 2060 * rsrv mblk.
2062 2061 */
2063 2062 static void
2064 2063 tl_rsrv(queue_t *rq)
2065 2064 {
2066 2065 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2067 2066
2068 2067 ASSERT(rq->q_first == NULL);
2069 2068 ASSERT(tep->te_rsrv_active == 0);
2070 2069
2071 2070 tep->te_rsrv_active = B_TRUE;
2072 2071 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2073 2072 /*
2074 2073 * Wait for serializer job to complete.
2075 2074 */
2076 2075 mutex_enter(&tep->te_srv_lock);
2077 2076 while (tep->te_rsrv_active) {
2078 2077 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2079 2078 }
2080 2079 cv_signal(&tep->te_srv_cv);
2081 2080 mutex_exit(&tep->te_srv_lock);
2082 2081 }
2083 2082
2084 2083 /* ARGSUSED */
2085 2084 static void
2086 2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2087 2086 {
2088 2087 tl_endpt_t *peer_tep;
2089 2088
2090 2089 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2091 2090 tl_cl_backenable(tep);
2092 2091 } else if (
2093 2092 IS_COTS(tep) &&
2094 2093 ((peer_tep = tep->te_conp) != NULL) &&
2095 2094 !peer_tep->te_closing &&
2096 2095 ((tep->te_state == TS_DATA_XFER) ||
2097 2096 (tep->te_state == TS_WIND_ORDREL)||
2098 2097 (tep->te_state == TS_WREQ_ORDREL))) {
2099 2098 TL_QENABLE(peer_tep);
2100 2099 }
2101 2100
2102 2101 /*
2103 2102 * Wakeup read side service routine.
2104 2103 */
2105 2104 mutex_enter(&tep->te_srv_lock);
2106 2105 ASSERT(tep->te_rsrv_active);
2107 2106 tep->te_rsrv_active = B_FALSE;
2108 2107 cv_signal(&tep->te_srv_cv);
2109 2108 mutex_exit(&tep->te_srv_lock);
2110 2109 tl_serializer_exit(tep);
2111 2110 }
2112 2111
2113 2112 /*
2114 2113 * process M_PROTO messages. Always called from serializer.
2115 2114 */
2116 2115 static void
2117 2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2118 2117 {
2119 2118 ssize_t msz = MBLKL(mp);
2120 2119 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2121 2120
2122 2121 /* Message size was validated by tl_wput(). */
2123 2122 ASSERT(msz >= sizeof (prim->type));
2124 2123
2125 2124 switch (prim->type) {
2126 2125 case T_UNBIND_REQ:
2127 2126 tl_unbind(mp, tep);
2128 2127 break;
2129 2128
2130 2129 case T_ADDR_REQ:
2131 2130 tl_addr_req(mp, tep);
2132 2131 break;
2133 2132
2134 2133 case O_T_CONN_RES:
2135 2134 case T_CONN_RES:
2136 2135 if (IS_CLTS(tep)) {
2137 2136 tl_merror(tep->te_wq, mp, EPROTO);
2138 2137 break;
2139 2138 }
2140 2139 tl_conn_res(mp, tep);
2141 2140 break;
2142 2141
2143 2142 case T_DISCON_REQ:
2144 2143 if (IS_CLTS(tep)) {
2145 2144 tl_merror(tep->te_wq, mp, EPROTO);
2146 2145 break;
2147 2146 }
2148 2147 tl_discon_req(mp, tep);
2149 2148 break;
2150 2149
2151 2150 case T_DATA_REQ:
2152 2151 if (IS_CLTS(tep)) {
2153 2152 tl_merror(tep->te_wq, mp, EPROTO);
2154 2153 break;
2155 2154 }
2156 2155 tl_data(mp, tep);
2157 2156 break;
2158 2157
2159 2158 case T_OPTDATA_REQ:
2160 2159 if (IS_CLTS(tep)) {
2161 2160 tl_merror(tep->te_wq, mp, EPROTO);
2162 2161 break;
2163 2162 }
2164 2163 tl_data(mp, tep);
2165 2164 break;
2166 2165
2167 2166 case T_EXDATA_REQ:
2168 2167 if (IS_CLTS(tep)) {
2169 2168 tl_merror(tep->te_wq, mp, EPROTO);
2170 2169 break;
2171 2170 }
2172 2171 tl_exdata(mp, tep);
2173 2172 break;
2174 2173
2175 2174 case T_ORDREL_REQ:
2176 2175 if (! IS_COTSORD(tep)) {
2177 2176 tl_merror(tep->te_wq, mp, EPROTO);
2178 2177 break;
2179 2178 }
2180 2179 tl_ordrel(mp, tep);
2181 2180 break;
2182 2181
2183 2182 case T_UNITDATA_REQ:
2184 2183 if (IS_COTS(tep)) {
2185 2184 tl_merror(tep->te_wq, mp, EPROTO);
2186 2185 break;
2187 2186 }
2188 2187 tl_unitdata(mp, tep);
2189 2188 break;
2190 2189
2191 2190 default:
2192 2191 tl_merror(tep->te_wq, mp, EPROTO);
2193 2192 break;
2194 2193 }
2195 2194 }
2196 2195
2197 2196 /*
2198 2197 * Process ioctl from serializer.
2199 2198 * This is a wrapper around tl_do_ioctl().
2200 2199 */
2201 2200 static void
2202 2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2203 2202 {
2204 2203 if (! tep->te_closing)
2205 2204 tl_do_ioctl(mp, tep);
2206 2205 else
2207 2206 freemsg(mp);
2208 2207
2209 2208 tl_serializer_exit(tep);
2210 2209 tl_refrele(tep);
2211 2210 }
2212 2211
2213 2212 static void
2214 2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2215 2214 {
2216 2215 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2217 2216 int cmd = iocbp->ioc_cmd;
2218 2217 queue_t *wq = tep->te_wq;
2219 2218 int error;
2220 2219 int thisopt, otheropt;
2221 2220
2222 2221 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2223 2222
2224 2223 switch (cmd) {
2225 2224 case TL_IOC_CREDOPT:
2226 2225 if (cmd == TL_IOC_CREDOPT) {
2227 2226 thisopt = TL_SETCRED;
2228 2227 otheropt = TL_SETUCRED;
2229 2228 } else {
2230 2229 /* FALLTHROUGH */
2231 2230 case TL_IOC_UCREDOPT:
2232 2231 thisopt = TL_SETUCRED;
2233 2232 otheropt = TL_SETCRED;
2234 2233 }
2235 2234 /*
2236 2235 * The credentials passing does not apply to sockets.
2237 2236 * Only one of the cred options can be set at a given time.
2238 2237 */
2239 2238 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2240 2239 miocnak(wq, mp, 0, EINVAL);
2241 2240 return;
2242 2241 }
2243 2242
2244 2243 /*
2245 2244 * Turn on generation of credential options for
2246 2245 * T_conn_req, T_conn_con, T_unidata_ind.
2247 2246 */
2248 2247 error = miocpullup(mp, sizeof (uint32_t));
2249 2248 if (error != 0) {
2250 2249 miocnak(wq, mp, 0, error);
2251 2250 return;
2252 2251 }
2253 2252 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2254 2253 miocnak(wq, mp, 0, EINVAL);
2255 2254 return;
2256 2255 }
2257 2256
2258 2257 if (*(uint32_t *)mp->b_cont->b_rptr)
2259 2258 tep->te_flag |= thisopt;
2260 2259 else
2261 2260 tep->te_flag &= ~thisopt;
2262 2261
2263 2262 miocack(wq, mp, 0, 0);
2264 2263 break;
2265 2264
2266 2265 default:
2267 2266 /* Should not be here */
2268 2267 miocnak(wq, mp, 0, EINVAL);
2269 2268 break;
2270 2269 }
2271 2270 }
2272 2271
2273 2272
2274 2273 /*
2275 2274 * send T_ERROR_ACK
2276 2275 * Note: assumes enough memory or caller passed big enough mp
2277 2276 * - no recovery from allocb failures
2278 2277 */
2279 2278
2280 2279 static void
2281 2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2282 2281 t_scalar_t unix_err, t_scalar_t type)
2283 2282 {
2284 2283 struct T_error_ack *err_ack;
2285 2284 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2286 2285 M_PCPROTO, T_ERROR_ACK);
2287 2286
2288 2287 if (ackmp == NULL) {
2289 2288 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2290 2289 "tl_error_ack:out of mblk memory"));
2291 2290 tl_merror(wq, NULL, ENOSR);
2292 2291 return;
2293 2292 }
2294 2293 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2295 2294 err_ack->ERROR_prim = type;
2296 2295 err_ack->TLI_error = tli_err;
2297 2296 err_ack->UNIX_error = unix_err;
2298 2297
2299 2298 /*
2300 2299 * send error ack message
2301 2300 */
2302 2301 qreply(wq, ackmp);
2303 2302 }
2304 2303
2305 2304
2306 2305
2307 2306 /*
2308 2307 * send T_OK_ACK
2309 2308 * Note: assumes enough memory or caller passed big enough mp
2310 2309 * - no recovery from allocb failures
2311 2310 */
2312 2311 static void
2313 2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2314 2313 {
2315 2314 struct T_ok_ack *ok_ack;
2316 2315 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2317 2316 M_PCPROTO, T_OK_ACK);
2318 2317
2319 2318 if (ackmp == NULL) {
2320 2319 tl_merror(wq, NULL, ENOMEM);
2321 2320 return;
2322 2321 }
2323 2322
2324 2323 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2325 2324 ok_ack->CORRECT_prim = type;
2326 2325
2327 2326 (void) qreply(wq, ackmp);
2328 2327 }
2329 2328
2330 2329 /*
2331 2330 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2332 2331 * This is a wrapper around tl_bind().
2333 2332 */
2334 2333 static void
2335 2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2336 2335 {
2337 2336 if (! tep->te_closing)
2338 2337 tl_bind(mp, tep);
2339 2338 else
2340 2339 freemsg(mp);
2341 2340
2342 2341 tl_serializer_exit(tep);
2343 2342 tl_refrele(tep);
2344 2343 }
2345 2344
2346 2345 /*
2347 2346 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2348 2347 * Assumes that the endpoint is in the unbound.
2349 2348 */
2350 2349 static void
2351 2350 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2352 2351 {
2353 2352 queue_t *wq = tep->te_wq;
2354 2353 struct T_bind_ack *b_ack;
2355 2354 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2356 2355 mblk_t *ackmp, *bamp;
2357 2356 soux_addr_t ux_addr;
2358 2357 t_uscalar_t qlen = 0;
2359 2358 t_scalar_t alen, aoff;
2360 2359 tl_addr_t addr_req;
2361 2360 void *addr_startp;
2362 2361 ssize_t msz = MBLKL(mp), basize;
2363 2362 t_scalar_t tli_err = 0, unix_err = 0;
2364 2363 t_scalar_t save_prim_type = bind->PRIM_type;
2365 2364 t_scalar_t save_state = tep->te_state;
2366 2365
2367 2366 if (tep->te_state != TS_UNBND) {
2368 2367 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2369 2368 SL_TRACE|SL_ERROR,
2370 2369 "tl_wput:bind_request:out of state, state=%d",
2371 2370 tep->te_state));
2372 2371 tli_err = TOUTSTATE;
2373 2372 goto error;
2374 2373 }
2375 2374
2376 2375 if (msz < sizeof (struct T_bind_req)) {
2377 2376 tli_err = TSYSERR; unix_err = EINVAL;
2378 2377 goto error;
2379 2378 }
2380 2379
2381 2380 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2382 2381
2383 2382 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2384 2383 (bind->PRIM_type == T_BIND_REQ));
2385 2384
2386 2385 alen = bind->ADDR_length;
2387 2386 aoff = bind->ADDR_offset;
2388 2387
2389 2388 /* negotiate max conn req pending */
2390 2389 if (IS_COTS(tep)) {
2391 2390 qlen = bind->CONIND_number;
2392 2391 if (qlen > tl_maxqlen)
2393 2392 qlen = tl_maxqlen;
2394 2393 }
2395 2394
2396 2395 /*
2397 2396 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2398 2397 * and bound again.
2399 2398 */
2400 2399 if ((tep->te_hash_hndl == NULL) &&
2401 2400 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2402 2401 mod_hash_reserve_nosleep(tep->te_addrhash,
2403 2402 &tep->te_hash_hndl) != 0) {
2404 2403 tli_err = TSYSERR; unix_err = ENOSR;
2405 2404 goto error;
2406 2405 }
2407 2406
2408 2407 /*
2409 2408 * Verify address correctness.
2410 2409 */
2411 2410 if (IS_SOCKET(tep)) {
2412 2411 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2413 2412
2414 2413 if ((alen != TL_SOUX_ADDRLEN) ||
2415 2414 (aoff < 0) ||
2416 2415 (aoff + alen > msz)) {
2417 2416 (void) (STRLOG(TL_ID, tep->te_minor,
2418 2417 1, SL_TRACE|SL_ERROR,
2419 2418 "tl_bind: invalid socket addr"));
2420 2419 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2421 2420 tli_err = TSYSERR; unix_err = EINVAL;
2422 2421 goto error;
2423 2422 }
2424 2423 /* Copy address from message to local buffer. */
2425 2424 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2426 2425 /*
2427 2426 * Check that we got correct address from sockets
2428 2427 */
2429 2428 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2430 2429 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2431 2430 (void) (STRLOG(TL_ID, tep->te_minor,
2432 2431 1, SL_TRACE|SL_ERROR,
2433 2432 "tl_bind: invalid socket magic"));
2434 2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2435 2434 tli_err = TSYSERR; unix_err = EINVAL;
2436 2435 goto error;
2437 2436 }
2438 2437 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2439 2438 (ux_addr.soua_vp != NULL)) {
2440 2439 (void) (STRLOG(TL_ID, tep->te_minor,
2441 2440 1, SL_TRACE|SL_ERROR,
2442 2441 "tl_bind: implicit addr non-empty"));
2443 2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2444 2443 tli_err = TSYSERR; unix_err = EINVAL;
2445 2444 goto error;
2446 2445 }
2447 2446 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2448 2447 (ux_addr.soua_vp == NULL)) {
2449 2448 (void) (STRLOG(TL_ID, tep->te_minor,
2450 2449 1, SL_TRACE|SL_ERROR,
2451 2450 "tl_bind: explicit addr empty"));
2452 2451 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2453 2452 tli_err = TSYSERR; unix_err = EINVAL;
2454 2453 goto error;
2455 2454 }
2456 2455 } else {
2457 2456 if ((alen > 0) && ((aoff < 0) ||
2458 2457 ((ssize_t)(aoff + alen) > msz) ||
2459 2458 ((aoff + alen) < 0))) {
2460 2459 (void) (STRLOG(TL_ID, tep->te_minor,
2461 2460 1, SL_TRACE|SL_ERROR,
2462 2461 "tl_bind: invalid message"));
2463 2462 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2464 2463 tli_err = TSYSERR; unix_err = EINVAL;
2465 2464 goto error;
2466 2465 }
2467 2466 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2468 2467 (void) (STRLOG(TL_ID, tep->te_minor,
2469 2468 1, SL_TRACE|SL_ERROR,
2470 2469 "tl_bind: bad addr in message"));
2471 2470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2472 2471 tli_err = TBADADDR;
2473 2472 goto error;
2474 2473 }
2475 2474 #ifdef DEBUG
2476 2475 /*
2477 2476 * Mild form of ASSERT()ion to detect broken TPI apps.
2478 2477 * if (! assertion)
2479 2478 * log warning;
2480 2479 */
2481 2480 if (! ((alen == 0 && aoff == 0) ||
2482 2481 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2483 2482 (void) (STRLOG(TL_ID, tep->te_minor,
2484 2483 3, SL_TRACE|SL_ERROR,
2485 2484 "tl_bind: addr overlaps TPI message"));
2486 2485 }
2487 2486 #endif
2488 2487 }
2489 2488
2490 2489 /*
2491 2490 * Bind the address provided or allocate one if requested.
2492 2491 * Allow rebinds with a new qlen value.
2493 2492 */
2494 2493 if (IS_SOCKET(tep)) {
2495 2494 /*
2496 2495 * For anonymous requests the te_ap is already set up properly
2497 2496 * so use minor number as an address.
2498 2497 * For explicit requests need to check whether the address is
2499 2498 * already in use.
2500 2499 */
2501 2500 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2502 2501 int rc;
2503 2502
2504 2503 if (tep->te_flag & TL_ADDRHASHED) {
2505 2504 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2506 2505 if (tep->te_vp == ux_addr.soua_vp)
2507 2506 goto skip_addr_bind;
2508 2507 else /* Rebind to a new address. */
2509 2508 tl_addr_unbind(tep);
2510 2509 }
2511 2510 /*
2512 2511 * Insert address in the hash if it is not already
2513 2512 * there. Since we use preallocated handle, the insert
2514 2513 * can fail only if the key is already present.
2515 2514 */
2516 2515 rc = mod_hash_insert_reserve(tep->te_addrhash,
2517 2516 (mod_hash_key_t)ux_addr.soua_vp,
2518 2517 (mod_hash_val_t)tep, tep->te_hash_hndl);
2519 2518
2520 2519 if (rc != 0) {
2521 2520 ASSERT(rc == MH_ERR_DUPLICATE);
2522 2521 /*
2523 2522 * Violate O_T_BIND_REQ semantics and fail with
2524 2523 * TADDRBUSY - sockets will not use any address
2525 2524 * other than supplied one for explicit binds.
2526 2525 */
2527 2526 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2528 2527 SL_TRACE|SL_ERROR,
2529 2528 "tl_bind:requested addr %p is busy",
2530 2529 ux_addr.soua_vp));
2531 2530 tli_err = TADDRBUSY; unix_err = 0;
2532 2531 goto error;
2533 2532 }
2534 2533 tep->te_uxaddr = ux_addr;
2535 2534 tep->te_flag |= TL_ADDRHASHED;
2536 2535 tep->te_hash_hndl = NULL;
2537 2536 }
2538 2537 } else if (alen == 0) {
2539 2538 /*
2540 2539 * assign any free address
2541 2540 */
2542 2541 if (! tl_get_any_addr(tep, NULL)) {
2543 2542 (void) (STRLOG(TL_ID, tep->te_minor,
2544 2543 1, SL_TRACE|SL_ERROR,
2545 2544 "tl_bind:failed to get buffer for any "
2546 2545 "address"));
2547 2546 tli_err = TSYSERR; unix_err = ENOSR;
2548 2547 goto error;
2549 2548 }
2550 2549 } else {
2551 2550 addr_req.ta_alen = alen;
2552 2551 addr_req.ta_abuf = (mp->b_rptr + aoff);
2553 2552 addr_req.ta_zoneid = tep->te_zoneid;
2554 2553
2555 2554 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2556 2555 if (tep->te_abuf == NULL) {
2557 2556 tli_err = TSYSERR; unix_err = ENOSR;
2558 2557 goto error;
2559 2558 }
2560 2559 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2561 2560 tep->te_alen = alen;
2562 2561
2563 2562 if (mod_hash_insert_reserve(tep->te_addrhash,
2564 2563 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2565 2564 tep->te_hash_hndl) != 0) {
2566 2565 if (save_prim_type == T_BIND_REQ) {
2567 2566 /*
2568 2567 * The bind semantics for this primitive
2569 2568 * require a failure if the exact address
2570 2569 * requested is busy
2571 2570 */
2572 2571 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2573 2572 SL_TRACE|SL_ERROR,
2574 2573 "tl_bind:requested addr is busy"));
2575 2574 tli_err = TADDRBUSY; unix_err = 0;
2576 2575 goto error;
2577 2576 }
2578 2577
2579 2578 /*
2580 2579 * O_T_BIND_REQ semantics say if address if requested
2581 2580 * address is busy, bind to any available free address
2582 2581 */
2583 2582 if (! tl_get_any_addr(tep, &addr_req)) {
2584 2583 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2585 2584 SL_TRACE|SL_ERROR,
2586 2585 "tl_bind:unable to get any addr buf"));
2587 2586 tli_err = TSYSERR; unix_err = ENOMEM;
2588 2587 goto error;
2589 2588 }
2590 2589 } else {
2591 2590 tep->te_flag |= TL_ADDRHASHED;
2592 2591 tep->te_hash_hndl = NULL;
2593 2592 }
2594 2593 }
2595 2594
2596 2595 ASSERT(tep->te_alen >= 0);
2597 2596
2598 2597 skip_addr_bind:
2599 2598 /*
2600 2599 * prepare T_BIND_ACK TPI message
2601 2600 */
2602 2601 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2603 2602 bamp = reallocb(mp, basize, 0);
2604 2603 if (bamp == NULL) {
2605 2604 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2606 2605 "tl_wput:tl_bind: allocb failed"));
2607 2606 /*
2608 2607 * roll back state changes
2609 2608 */
2610 2609 tl_addr_unbind(tep);
2611 2610 tep->te_state = TS_UNBND;
2612 2611 tl_memrecover(wq, mp, basize);
2613 2612 return;
2614 2613 }
2615 2614
2616 2615 DB_TYPE(bamp) = M_PCPROTO;
2617 2616 bamp->b_wptr = bamp->b_rptr + basize;
2618 2617 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2619 2618 b_ack->PRIM_type = T_BIND_ACK;
2620 2619 b_ack->CONIND_number = qlen;
2621 2620 b_ack->ADDR_length = tep->te_alen;
2622 2621 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2623 2622 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2624 2623 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2625 2624
2626 2625 if (IS_COTS(tep)) {
2627 2626 tep->te_qlen = qlen;
2628 2627 if (qlen > 0)
2629 2628 tep->te_flag |= TL_LISTENER;
2630 2629 }
2631 2630
2632 2631 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2633 2632 /*
2634 2633 * send T_BIND_ACK message
2635 2634 */
2636 2635 (void) qreply(wq, bamp);
2637 2636 return;
2638 2637
2639 2638 error:
2640 2639 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2641 2640 if (ackmp == NULL) {
2642 2641 /*
2643 2642 * roll back state changes
2644 2643 */
2645 2644 tep->te_state = save_state;
2646 2645 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2647 2646 return;
2648 2647 }
2649 2648 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2650 2649 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2651 2650 }
2652 2651
2653 2652 /*
2654 2653 * Process T_UNBIND_REQ.
2655 2654 * Called from serializer.
2656 2655 */
2657 2656 static void
2658 2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2659 2658 {
2660 2659 queue_t *wq;
2661 2660 mblk_t *ackmp;
2662 2661
2663 2662 if (tep->te_closing) {
2664 2663 freemsg(mp);
2665 2664 return;
2666 2665 }
2667 2666
2668 2667 wq = tep->te_wq;
2669 2668
2670 2669 /*
2671 2670 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2672 2671 * ==> allocate for T_ERROR_ACK (known max)
2673 2672 */
2674 2673 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2675 2674 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2676 2675 return;
2677 2676 }
2678 2677 /*
2679 2678 * memory resources committed
2680 2679 * Note: no message validation. T_UNBIND_REQ message is
2681 2680 * same size as PRIM_type field so already verified earlier.
2682 2681 */
2683 2682
2684 2683 /*
2685 2684 * validate state
2686 2685 */
2687 2686 if (tep->te_state != TS_IDLE) {
2688 2687 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2689 2688 SL_TRACE|SL_ERROR,
2690 2689 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2691 2690 tep->te_state));
2692 2691 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2693 2692 return;
2694 2693 }
2695 2694 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2696 2695
2697 2696 /*
2698 2697 * TPI says on T_UNBIND_REQ:
2699 2698 * send up a M_FLUSH to flush both
2700 2699 * read and write queues
2701 2700 */
2702 2701 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2703 2702
2704 2703 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2705 2704 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2706 2705
2707 2706 /*
2708 2707 * Sockets use bind with qlen==0 followed by bind() to
2709 2708 * the same address with qlen > 0 for listeners.
2710 2709 * We allow rebind with a new qlen value.
2711 2710 */
2712 2711 tl_addr_unbind(tep);
2713 2712 }
2714 2713
2715 2714 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2716 2715 /*
2717 2716 * send T_OK_ACK
2718 2717 */
2719 2718 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2720 2719 }
2721 2720
2722 2721
2723 2722 /*
2724 2723 * Option management code from drv/ip is used here
2725 2724 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2726 2725 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2727 2726 * However, that is what we want as that option is 'unorthodox'
2728 2727 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2729 2728 * and not in T_SVR4_OPTMGMT_REQ/ACK
2730 2729 * Note2: use of optcom_req means this routine is an exception to
2731 2730 * recovery from allocb() failures.
2732 2731 */
2733 2732
2734 2733 static void
2735 2734 tl_optmgmt(queue_t *wq, mblk_t *mp)
2736 2735 {
2737 2736 tl_endpt_t *tep;
2738 2737 mblk_t *ackmp;
2739 2738 union T_primitives *prim;
2740 2739 cred_t *cr;
2741 2740
2742 2741 tep = (tl_endpt_t *)wq->q_ptr;
2743 2742 prim = (union T_primitives *)mp->b_rptr;
2744 2743
2745 2744 /*
2746 2745 * All Solaris components should pass a db_credp
2747 2746 * for this TPI message, hence we ASSERT.
2748 2747 * But in case there is some other M_PROTO that looks
2749 2748 * like a TPI message sent by some other kernel
2750 2749 * component, we check and return an error.
2751 2750 */
2752 2751 cr = msg_getcred(mp, NULL);
2753 2752 ASSERT(cr != NULL);
2754 2753 if (cr == NULL) {
2755 2754 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2756 2755 return;
2757 2756 }
2758 2757
2759 2758 /* all states OK for AF_UNIX options ? */
2760 2759 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2761 2760 prim->type == T_SVR4_OPTMGMT_REQ) {
2762 2761 /*
2763 2762 * Broken TLI semantics that options can only be managed
2764 2763 * in TS_IDLE state. Needed for Sparc ABI test suite that
2765 2764 * tests this TLI (mis)feature using this device driver.
2766 2765 */
2767 2766 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2768 2767 SL_TRACE|SL_ERROR,
2769 2768 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2770 2769 tep->te_state));
2771 2770 /*
2772 2771 * preallocate memory for T_ERROR_ACK
2773 2772 */
2774 2773 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2775 2774 if (! ackmp) {
2776 2775 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2777 2776 return;
2778 2777 }
2779 2778
2780 2779 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2781 2780 freemsg(mp);
2782 2781 return;
2783 2782 }
2784 2783
2785 2784 /*
2786 2785 * call common option management routine from drv/ip
2787 2786 */
2788 2787 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2789 2788 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2790 2789 } else {
2791 2790 ASSERT(prim->type == T_OPTMGMT_REQ);
2792 2791 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2793 2792 }
2794 2793 }
2795 2794
2796 2795 /*
2797 2796 * Handle T_conn_req - the driver part of accept().
2798 2797 * If TL_SET[U]CRED generate the credentials options.
2799 2798 * If this is a socket pass through options unmodified.
2800 2799 * For sockets generate the T_CONN_CON here instead of
2801 2800 * waiting for the T_CONN_RES.
2802 2801 */
2803 2802 static void
2804 2803 tl_conn_req(queue_t *wq, mblk_t *mp)
2805 2804 {
2806 2805 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2807 2806 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2808 2807 ssize_t msz = MBLKL(mp);
2809 2808 t_scalar_t alen, aoff, olen, ooff, err = 0;
2810 2809 tl_endpt_t *peer_tep = NULL;
2811 2810 mblk_t *ackmp;
2812 2811 mblk_t *dimp;
2813 2812 struct T_discon_ind *di;
2814 2813 soux_addr_t ux_addr;
2815 2814 tl_addr_t dst;
2816 2815
2817 2816 ASSERT(IS_COTS(tep));
2818 2817
2819 2818 if (tep->te_closing) {
2820 2819 freemsg(mp);
2821 2820 return;
2822 2821 }
2823 2822
2824 2823 /*
2825 2824 * preallocate memory for:
2826 2825 * 1. max of T_ERROR_ACK and T_OK_ACK
2827 2826 * ==> known max T_ERROR_ACK
2828 2827 * 2. max of T_DISCON_IND and T_CONN_IND
2829 2828 */
2830 2829 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2831 2830 if (! ackmp) {
2832 2831 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2833 2832 return;
2834 2833 }
2835 2834 /*
2836 2835 * memory committed for T_OK_ACK/T_ERROR_ACK now
2837 2836 * will be committed for T_DISCON_IND/T_CONN_IND later
2838 2837 */
2839 2838
2840 2839 if (tep->te_state != TS_IDLE) {
2841 2840 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2842 2841 SL_TRACE|SL_ERROR,
2843 2842 "tl_wput:T_CONN_REQ:out of state, state=%d",
2844 2843 tep->te_state));
2845 2844 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2846 2845 freemsg(mp);
2847 2846 return;
2848 2847 }
2849 2848
2850 2849 /*
2851 2850 * validate the message
2852 2851 * Note: dereference fields in struct inside message only
2853 2852 * after validating the message length.
2854 2853 */
2855 2854 if (msz < sizeof (struct T_conn_req)) {
2856 2855 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2857 2856 "tl_conn_req:invalid message length"));
2858 2857 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2859 2858 freemsg(mp);
2860 2859 return;
2861 2860 }
2862 2861 alen = creq->DEST_length;
2863 2862 aoff = creq->DEST_offset;
2864 2863 olen = creq->OPT_length;
2865 2864 ooff = creq->OPT_offset;
2866 2865 if (olen == 0)
2867 2866 ooff = 0;
2868 2867
2869 2868 if (IS_SOCKET(tep)) {
2870 2869 if ((alen != TL_SOUX_ADDRLEN) ||
2871 2870 (aoff < 0) ||
2872 2871 (aoff + alen > msz) ||
2873 2872 (alen > msz - sizeof (struct T_conn_req))) {
2874 2873 (void) (STRLOG(TL_ID, tep->te_minor,
2875 2874 1, SL_TRACE|SL_ERROR,
2876 2875 "tl_conn_req: invalid socket addr"));
2877 2876 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878 2877 freemsg(mp);
2879 2878 return;
2880 2879 }
2881 2880 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2882 2881 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2883 2882 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2884 2883 (void) (STRLOG(TL_ID, tep->te_minor,
2885 2884 1, SL_TRACE|SL_ERROR,
2886 2885 "tl_conn_req: invalid socket magic"));
2887 2886 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2888 2887 freemsg(mp);
2889 2888 return;
2890 2889 }
2891 2890 } else {
2892 2891 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2893 2892 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2894 2893 ooff + olen < 0)) ||
2895 2894 olen < 0 || ooff < 0) {
2896 2895 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2897 2896 SL_TRACE|SL_ERROR,
2898 2897 "tl_conn_req:invalid message"));
2899 2898 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2900 2899 freemsg(mp);
2901 2900 return;
2902 2901 }
2903 2902
2904 2903 if (alen <= 0 || aoff < 0 ||
2905 2904 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2906 2905 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2907 2906 SL_TRACE|SL_ERROR,
2908 2907 "tl_conn_req:bad addr in message, "
2909 2908 "alen=%d, msz=%ld",
2910 2909 alen, msz));
2911 2910 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2912 2911 freemsg(mp);
2913 2912 return;
2914 2913 }
2915 2914 #ifdef DEBUG
2916 2915 /*
2917 2916 * Mild form of ASSERT()ion to detect broken TPI apps.
2918 2917 * if (! assertion)
2919 2918 * log warning;
2920 2919 */
2921 2920 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2922 2921 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2923 2922 SL_TRACE|SL_ERROR,
2924 2923 "tl_conn_req: addr overlaps TPI message"));
2925 2924 }
2926 2925 #endif
2927 2926 if (olen) {
2928 2927 /*
2929 2928 * no opts in connect req
2930 2929 * supported in this provider except for sockets.
2931 2930 */
2932 2931 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2933 2932 SL_TRACE|SL_ERROR,
2934 2933 "tl_conn_req:options not supported "
2935 2934 "in message"));
2936 2935 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2937 2936 freemsg(mp);
2938 2937 return;
2939 2938 }
2940 2939 }
2941 2940
2942 2941 /*
2943 2942 * Prevent tep from closing on us.
2944 2943 */
2945 2944 if (! tl_noclose(tep)) {
2946 2945 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2947 2946 "tl_conn_req:endpoint is closing"));
2948 2947 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2949 2948 freemsg(mp);
2950 2949 return;
2951 2950 }
2952 2951
2953 2952 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2954 2953 /*
2955 2954 * get endpoint to connect to
2956 2955 * check that peer with DEST addr is bound to addr
2957 2956 * and has CONIND_number > 0
2958 2957 */
2959 2958 dst.ta_alen = alen;
2960 2959 dst.ta_abuf = mp->b_rptr + aoff;
2961 2960 dst.ta_zoneid = tep->te_zoneid;
2962 2961
2963 2962 /*
2964 2963 * Verify if remote addr is in use
2965 2964 */
2966 2965 peer_tep = (IS_SOCKET(tep) ?
2967 2966 tl_sock_find_peer(tep, &ux_addr) :
2968 2967 tl_find_peer(tep, &dst));
2969 2968
2970 2969 if (peer_tep == NULL) {
2971 2970 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2972 2971 "tl_conn_req:no one at connect address"));
2973 2972 err = ECONNREFUSED;
2974 2973 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2975 2974 /*
2976 2975 * validate that number of incoming connection is
2977 2976 * not to capacity on destination endpoint
2978 2977 */
2979 2978 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2980 2979 "tl_conn_req: qlen overflow connection refused"));
2981 2980 err = ECONNREFUSED;
2982 2981 }
2983 2982
2984 2983 /*
2985 2984 * Send T_DISCON_IND in case of error
2986 2985 */
2987 2986 if (err != 0) {
2988 2987 if (peer_tep != NULL)
2989 2988 tl_refrele(peer_tep);
2990 2989 /* We are still expected to send T_OK_ACK */
2991 2990 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2992 2991 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2993 2992 tl_closeok(tep);
2994 2993 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2995 2994 M_PROTO, T_DISCON_IND);
2996 2995 if (dimp == NULL) {
2997 2996 tl_merror(wq, NULL, ENOSR);
2998 2997 return;
2999 2998 }
3000 2999 di = (struct T_discon_ind *)dimp->b_rptr;
3001 3000 di->DISCON_reason = err;
3002 3001 di->SEQ_number = BADSEQNUM;
3003 3002
3004 3003 tep->te_state = TS_IDLE;
3005 3004 /*
3006 3005 * send T_DISCON_IND message
3007 3006 */
3008 3007 putnext(tep->te_rq, dimp);
3009 3008 return;
3010 3009 }
3011 3010
3012 3011 ASSERT(IS_COTS(peer_tep));
3013 3012
3014 3013 /*
3015 3014 * Found the listener. At this point processing will continue on
3016 3015 * listener serializer. Close of the endpoint should be blocked while we
3017 3016 * switch serializers.
3018 3017 */
3019 3018 tl_serializer_refhold(peer_tep->te_ser);
3020 3019 tl_serializer_refrele(tep->te_ser);
3021 3020 tep->te_ser = peer_tep->te_ser;
3022 3021 ASSERT(tep->te_oconp == NULL);
3023 3022 tep->te_oconp = peer_tep;
3024 3023
3025 3024 /*
3026 3025 * It is safe to close now. Close may continue on listener serializer.
3027 3026 */
3028 3027 tl_closeok(tep);
3029 3028
3030 3029 /*
3031 3030 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3032 3031 * data, so we link mp to ackmp.
3033 3032 */
3034 3033 ackmp->b_cont = mp;
3035 3034 mp = ackmp;
3036 3035
3037 3036 tl_refhold(tep);
3038 3037 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3039 3038 }
3040 3039
3041 3040 /*
3042 3041 * Finish T_CONN_REQ processing on listener serializer.
3043 3042 */
3044 3043 static void
3045 3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3046 3045 {
3047 3046 queue_t *wq;
3048 3047 tl_endpt_t *peer_tep = tep->te_oconp;
3049 3048 mblk_t *confmp, *cimp, *indmp;
3050 3049 void *opts = NULL;
3051 3050 mblk_t *ackmp = mp;
3052 3051 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3053 3052 struct T_conn_ind *ci;
3054 3053 tl_icon_t *tip;
3055 3054 void *addr_startp;
3056 3055 t_scalar_t olen = creq->OPT_length;
3057 3056 t_scalar_t ooff = creq->OPT_offset;
3058 3057 size_t ci_msz;
3059 3058 size_t size;
3060 3059 cred_t *cr = NULL;
3061 3060 pid_t cpid;
3062 3061
3063 3062 if (tep->te_closing) {
3064 3063 TL_UNCONNECT(tep->te_oconp);
3065 3064 tl_serializer_exit(tep);
3066 3065 tl_refrele(tep);
3067 3066 freemsg(mp);
3068 3067 return;
3069 3068 }
3070 3069
3071 3070 wq = tep->te_wq;
3072 3071 tep->te_flag |= TL_EAGER;
3073 3072
3074 3073 /*
3075 3074 * Extract preallocated ackmp from mp.
3076 3075 */
3077 3076 mp = mp->b_cont;
3078 3077 ackmp->b_cont = NULL;
3079 3078
3080 3079 if (olen == 0)
3081 3080 ooff = 0;
3082 3081
3083 3082 if (peer_tep->te_closing ||
3084 3083 !((peer_tep->te_state == TS_IDLE) ||
3085 3084 (peer_tep->te_state == TS_WRES_CIND))) {
3086 3085 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3087 3086 "tl_conn_req:peer in bad state (%d)",
3088 3087 peer_tep->te_state));
3089 3088 TL_UNCONNECT(tep->te_oconp);
3090 3089 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3091 3090 freemsg(ackmp);
3092 3091 tl_serializer_exit(tep);
3093 3092 tl_refrele(tep);
3094 3093 return;
3095 3094 }
3096 3095
3097 3096 /*
3098 3097 * preallocate now for T_DISCON_IND or T_CONN_IND
3099 3098 */
3100 3099 /*
3101 3100 * calculate length of T_CONN_IND message
3102 3101 */
3103 3102 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3104 3103 cr = msg_getcred(mp, &cpid);
3105 3104 ASSERT(cr != NULL);
3106 3105 if (peer_tep->te_flag & TL_SETCRED) {
3107 3106 ooff = 0;
3108 3107 olen = (t_scalar_t) sizeof (struct opthdr) +
3109 3108 OPTLEN(sizeof (tl_credopt_t));
3110 3109 /* 1 option only */
3111 3110 } else {
3112 3111 ooff = 0;
3113 3112 olen = (t_scalar_t)sizeof (struct opthdr) +
3114 3113 OPTLEN(ucredminsize(cr));
3115 3114 /* 1 option only */
3116 3115 }
3117 3116 }
3118 3117 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3119 3118 ci_msz = T_ALIGN(ci_msz) + olen;
3120 3119 size = max(ci_msz, sizeof (struct T_discon_ind));
3121 3120
3122 3121 /*
3123 3122 * Save options from mp - we'll need them for T_CONN_IND.
3124 3123 */
3125 3124 if (ooff != 0) {
3126 3125 opts = kmem_alloc(olen, KM_NOSLEEP);
3127 3126 if (opts == NULL) {
3128 3127 /*
3129 3128 * roll back state changes
3130 3129 */
3131 3130 tep->te_state = TS_IDLE;
3132 3131 tl_memrecover(wq, mp, size);
3133 3132 freemsg(ackmp);
3134 3133 TL_UNCONNECT(tep->te_oconp);
3135 3134 tl_serializer_exit(tep);
3136 3135 tl_refrele(tep);
3137 3136 return;
3138 3137 }
3139 3138 /* Copy options to a temp buffer */
3140 3139 bcopy(mp->b_rptr + ooff, opts, olen);
3141 3140 }
3142 3141
3143 3142 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3144 3143 /*
3145 3144 * Generate a T_CONN_CON that has the identical address
3146 3145 * (and options) as the T_CONN_REQ.
3147 3146 * NOTE: assumes that the T_conn_req and T_conn_con structures
3148 3147 * are isomorphic.
3149 3148 */
3150 3149 confmp = copyb(mp);
3151 3150 if (! confmp) {
3152 3151 /*
3153 3152 * roll back state changes
3154 3153 */
3155 3154 tep->te_state = TS_IDLE;
3156 3155 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3157 3156 freemsg(ackmp);
3158 3157 if (opts != NULL)
3159 3158 kmem_free(opts, olen);
3160 3159 TL_UNCONNECT(tep->te_oconp);
3161 3160 tl_serializer_exit(tep);
3162 3161 tl_refrele(tep);
3163 3162 return;
3164 3163 }
3165 3164 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3166 3165 T_CONN_CON;
3167 3166 } else {
3168 3167 confmp = NULL;
3169 3168 }
3170 3169 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3171 3170 /*
3172 3171 * roll back state changes
3173 3172 */
3174 3173 tep->te_state = TS_IDLE;
3175 3174 tl_memrecover(wq, mp, size);
3176 3175 freemsg(ackmp);
3177 3176 if (opts != NULL)
3178 3177 kmem_free(opts, olen);
3179 3178 freemsg(confmp);
3180 3179 TL_UNCONNECT(tep->te_oconp);
3181 3180 tl_serializer_exit(tep);
3182 3181 tl_refrele(tep);
3183 3182 return;
3184 3183 }
3185 3184
3186 3185 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3187 3186 if (tip == NULL) {
3188 3187 /*
3189 3188 * roll back state changes
3190 3189 */
3191 3190 tep->te_state = TS_IDLE;
3192 3191 tl_memrecover(wq, indmp, sizeof (*tip));
3193 3192 freemsg(ackmp);
3194 3193 if (opts != NULL)
3195 3194 kmem_free(opts, olen);
3196 3195 freemsg(confmp);
3197 3196 TL_UNCONNECT(tep->te_oconp);
3198 3197 tl_serializer_exit(tep);
3199 3198 tl_refrele(tep);
3200 3199 return;
3201 3200 }
3202 3201 tip->ti_mp = NULL;
3203 3202
3204 3203 /*
3205 3204 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3206 3205 * and tl_icon_t cell.
3207 3206 */
3208 3207
3209 3208 /*
3210 3209 * ack validity of request and send the peer credential in the ACK.
3211 3210 */
3212 3211 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3213 3212
3214 3213 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3215 3214 confmp != NULL) {
3216 3215 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3217 3216 }
3218 3217
3219 3218 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3220 3219
3221 3220 /*
3222 3221 * prepare message to send T_CONN_IND
3223 3222 */
3224 3223 /*
3225 3224 * allocate the message - original data blocks retained
3226 3225 * in the returned mblk
3227 3226 */
3228 3227 cimp = tl_resizemp(indmp, size);
3229 3228 if (! cimp) {
3230 3229 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3231 3230 "tl_conn_req:con_ind:allocb failure"));
3232 3231 tl_merror(wq, indmp, ENOMEM);
3233 3232 TL_UNCONNECT(tep->te_oconp);
3234 3233 tl_serializer_exit(tep);
3235 3234 tl_refrele(tep);
3236 3235 if (opts != NULL)
3237 3236 kmem_free(opts, olen);
3238 3237 freemsg(confmp);
3239 3238 ASSERT(tip->ti_mp == NULL);
3240 3239 kmem_free(tip, sizeof (*tip));
3241 3240 return;
3242 3241 }
3243 3242
3244 3243 DB_TYPE(cimp) = M_PROTO;
3245 3244 ci = (struct T_conn_ind *)cimp->b_rptr;
3246 3245 ci->PRIM_type = T_CONN_IND;
3247 3246 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3248 3247 ci->SRC_length = tep->te_alen;
3249 3248 ci->SEQ_number = tep->te_seqno;
3250 3249
3251 3250 addr_startp = cimp->b_rptr + ci->SRC_offset;
3252 3251 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3253 3252 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3254 3253
3255 3254 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3256 3255 ci->SRC_length);
3257 3256 ci->OPT_length = olen; /* because only 1 option */
3258 3257 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3259 3258 cr, cpid,
3260 3259 peer_tep->te_flag, peer_tep->te_credp);
3261 3260 } else if (ooff != 0) {
3262 3261 /* Copy option from T_CONN_REQ */
3263 3262 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3264 3263 ci->SRC_length);
3265 3264 ci->OPT_length = olen;
3266 3265 ASSERT(opts != NULL);
3267 3266 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3268 3267 } else {
3269 3268 ci->OPT_offset = 0;
3270 3269 ci->OPT_length = 0;
3271 3270 }
3272 3271 if (opts != NULL)
3273 3272 kmem_free(opts, olen);
3274 3273
3275 3274 /*
3276 3275 * register connection request with server peer
3277 3276 * append to list of incoming connections
3278 3277 * increment references for both peer_tep and tep: peer_tep is placed on
3279 3278 * te_oconp and tep is placed on listeners queue.
3280 3279 */
3281 3280 tip->ti_tep = tep;
3282 3281 tip->ti_seqno = tep->te_seqno;
3283 3282 list_insert_tail(&peer_tep->te_iconp, tip);
3284 3283 peer_tep->te_nicon++;
3285 3284
3286 3285 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3287 3286 /*
3288 3287 * send the T_CONN_IND message
3289 3288 */
3290 3289 putnext(peer_tep->te_rq, cimp);
3291 3290
3292 3291 /*
3293 3292 * Send a T_CONN_CON message for sockets.
3294 3293 * Disable the queues until we have reached the correct state!
3295 3294 */
3296 3295 if (confmp != NULL) {
3297 3296 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3298 3297 noenable(wq);
3299 3298 putnext(tep->te_rq, confmp);
3300 3299 }
3301 3300 /*
3302 3301 * Now we need to increment tep reference because tep is referenced by
3303 3302 * server list of pending connections. We also need to decrement
3304 3303 * reference before exiting serializer. Two operations void each other
3305 3304 * so we don't modify reference at all.
3306 3305 */
3307 3306 ASSERT(tep->te_refcnt >= 2);
3308 3307 ASSERT(peer_tep->te_refcnt >= 2);
3309 3308 tl_serializer_exit(tep);
3310 3309 }
3311 3310
3312 3311
3313 3312
3314 3313 /*
3315 3314 * Handle T_conn_res on listener stream. Called on listener serializer.
3316 3315 * tl_conn_req has already generated the T_CONN_CON.
3317 3316 * tl_conn_res is called on listener serializer.
3318 3317 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3319 3318 * Switch eager serializer to acceptor's.
3320 3319 *
3321 3320 * If TL_SET[U]CRED generate the credentials options.
3322 3321 * For sockets tl_conn_req has already generated the T_CONN_CON.
3323 3322 */
3324 3323 static void
3325 3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3326 3325 {
3327 3326 queue_t *wq;
3328 3327 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3329 3328 ssize_t msz = MBLKL(mp);
3330 3329 t_scalar_t olen, ooff, err = 0;
3331 3330 t_scalar_t prim = cres->PRIM_type;
3332 3331 uchar_t *addr_startp;
3333 3332 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3334 3333 tl_icon_t *tip;
3335 3334 size_t size;
3336 3335 mblk_t *ackmp, *respmp;
3337 3336 mblk_t *dimp, *ccmp = NULL;
3338 3337 struct T_discon_ind *di;
3339 3338 struct T_conn_con *cc;
3340 3339 boolean_t client_noclose_set = B_FALSE;
3341 3340 boolean_t switch_client_serializer = B_TRUE;
3342 3341
3343 3342 ASSERT(IS_COTS(tep));
3344 3343
3345 3344 if (tep->te_closing) {
3346 3345 freemsg(mp);
3347 3346 return;
3348 3347 }
3349 3348
3350 3349 wq = tep->te_wq;
3351 3350
3352 3351 /*
3353 3352 * preallocate memory for:
3354 3353 * 1. max of T_ERROR_ACK and T_OK_ACK
3355 3354 * ==> known max T_ERROR_ACK
3356 3355 * 2. max of T_DISCON_IND and T_CONN_CON
3357 3356 */
3358 3357 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3359 3358 if (! ackmp) {
3360 3359 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3361 3360 return;
3362 3361 }
3363 3362 /*
3364 3363 * memory committed for T_OK_ACK/T_ERROR_ACK now
3365 3364 * will be committed for T_DISCON_IND/T_CONN_CON later
3366 3365 */
3367 3366
3368 3367
3369 3368 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3370 3369
3371 3370 /*
3372 3371 * validate state
3373 3372 */
3374 3373 if (tep->te_state != TS_WRES_CIND) {
3375 3374 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3376 3375 SL_TRACE|SL_ERROR,
3377 3376 "tl_wput:T_CONN_RES:out of state, state=%d",
3378 3377 tep->te_state));
3379 3378 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3380 3379 freemsg(mp);
3381 3380 return;
3382 3381 }
3383 3382
3384 3383 /*
3385 3384 * validate the message
3386 3385 * Note: dereference fields in struct inside message only
3387 3386 * after validating the message length.
3388 3387 */
3389 3388 if (msz < sizeof (struct T_conn_res)) {
3390 3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3391 3390 "tl_conn_res:invalid message length"));
3392 3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3393 3392 freemsg(mp);
3394 3393 return;
3395 3394 }
3396 3395 olen = cres->OPT_length;
3397 3396 ooff = cres->OPT_offset;
3398 3397 if (((olen > 0) && ((ooff + olen) > msz))) {
3399 3398 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3400 3399 "tl_conn_res:invalid message"));
3401 3400 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3402 3401 freemsg(mp);
3403 3402 return;
3404 3403 }
3405 3404 if (olen) {
3406 3405 /*
3407 3406 * no opts in connect res
3408 3407 * supported in this provider
3409 3408 */
3410 3409 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3411 3410 "tl_conn_res:options not supported in message"));
3412 3411 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3413 3412 freemsg(mp);
3414 3413 return;
3415 3414 }
3416 3415
3417 3416 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3418 3417 ASSERT(tep->te_state == TS_WACK_CRES);
3419 3418
3420 3419 if (cres->SEQ_number < TL_MINOR_START &&
3421 3420 cres->SEQ_number >= BADSEQNUM) {
3422 3421 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3423 3422 "tl_conn_res:remote endpoint sequence number bad"));
3424 3423 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3425 3424 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3426 3425 freemsg(mp);
3427 3426 return;
3428 3427 }
3429 3428
3430 3429 /*
3431 3430 * find accepting endpoint. Will have extra reference if found.
3432 3431 */
3433 3432 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3434 3433 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3435 3434 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3436 3435 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3437 3436 "tl_conn_res:bad accepting endpoint"));
3438 3437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3439 3438 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3440 3439 freemsg(mp);
3441 3440 return;
3442 3441 }
3443 3442
3444 3443 /*
3445 3444 * Prevent acceptor from closing.
3446 3445 */
3447 3446 if (! tl_noclose(acc_ep)) {
3448 3447 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3449 3448 "tl_conn_res:bad accepting endpoint"));
3450 3449 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3451 3450 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3452 3451 tl_refrele(acc_ep);
3453 3452 freemsg(mp);
3454 3453 return;
3455 3454 }
3456 3455
3457 3456 acc_ep->te_flag |= TL_ACCEPTOR;
3458 3457
3459 3458 /*
3460 3459 * validate that accepting endpoint, if different from listening
3461 3460 * has address bound => state is TS_IDLE
3462 3461 * TROUBLE in XPG4 !!?
3463 3462 */
3464 3463 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3465 3464 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3466 3465 "tl_conn_res:accepting endpoint has no address bound,"
3467 3466 "state=%d", acc_ep->te_state));
3468 3467 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3469 3468 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3470 3469 freemsg(mp);
3471 3470 tl_closeok(acc_ep);
3472 3471 tl_refrele(acc_ep);
3473 3472 return;
3474 3473 }
3475 3474
3476 3475 /*
3477 3476 * validate if accepting endpt same as listening, then
3478 3477 * no other incoming connection should be on the queue
3479 3478 */
3480 3479
3481 3480 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3482 3481 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3483 3482 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3484 3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3485 3484 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3486 3485 freemsg(mp);
3487 3486 tl_closeok(acc_ep);
3488 3487 tl_refrele(acc_ep);
3489 3488 return;
3490 3489 }
3491 3490
3492 3491 /*
3493 3492 * Mark for deletion, the entry corresponding to client
3494 3493 * on list of pending connections made by the listener
3495 3494 * search list to see if client is one of the
3496 3495 * recorded as a listener.
3497 3496 */
3498 3497 tip = tl_icon_find(tep, cres->SEQ_number);
3499 3498 if (tip == NULL) {
3500 3499 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3501 3500 "tl_conn_res:no client in listener list"));
3502 3501 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3503 3502 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3504 3503 freemsg(mp);
3505 3504 tl_closeok(acc_ep);
3506 3505 tl_refrele(acc_ep);
3507 3506 return;
3508 3507 }
3509 3508
3510 3509 /*
3511 3510 * If ti_tep is NULL the client has already closed. In this case
3512 3511 * the code below will avoid any action on the client side
3513 3512 * but complete the server and acceptor state transitions.
3514 3513 */
3515 3514 ASSERT(tip->ti_tep == NULL ||
3516 3515 tip->ti_tep->te_seqno == cres->SEQ_number);
3517 3516 cl_ep = tip->ti_tep;
3518 3517
3519 3518 /*
3520 3519 * If the client is present it is switched from listener's to acceptor's
3521 3520 * serializer. We should block client closes while serializers are
3522 3521 * being switched.
3523 3522 *
3524 3523 * It is possible that the client is present but is currently being
3525 3524 * closed. There are two possible cases:
3526 3525 *
3527 3526 * 1) The client has already entered tl_close_finish_ser() and sent
3528 3527 * T_ORDREL_IND. In this case we can just ignore the client (but we
3529 3528 * still need to send all messages from tip->ti_mp to the acceptor).
3530 3529 *
3531 3530 * 2) The client started the close but has not entered
3532 3531 * tl_close_finish_ser() yet. In this case, the client is already
3533 3532 * proceeding asynchronously on the listener's serializer, so we're
3534 3533 * forced to change the acceptor to use the listener's serializer to
3535 3534 * ensure that any operations on the acceptor are serialized with
3536 3535 * respect to the close that's in-progress.
3537 3536 */
3538 3537 if (cl_ep != NULL) {
3539 3538 if (tl_noclose(cl_ep)) {
3540 3539 client_noclose_set = B_TRUE;
3541 3540 } else {
3542 3541 /*
3543 3542 * Client is closing. If it it has sent the
3544 3543 * T_ORDREL_IND, we can simply ignore it - otherwise,
3545 3544 * we have to let let the client continue until it is
3546 3545 * sent.
3547 3546 *
3548 3547 * If we do continue using the client, acceptor will
3549 3548 * switch to client's serializer which is used by client
3550 3549 * for its close.
3551 3550 */
3552 3551 tl_client_closing_when_accepting++;
3553 3552 switch_client_serializer = B_FALSE;
3554 3553 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3555 3554 cl_ep->te_state == -1)
3556 3555 cl_ep = NULL;
3557 3556 }
3558 3557 }
3559 3558
3560 3559 if (cl_ep != NULL) {
3561 3560 /*
3562 3561 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3563 3562 * (latter for sockets only)
3564 3563 */
3565 3564 if (cl_ep->te_state != TS_WCON_CREQ &&
3566 3565 (cl_ep->te_state != TS_DATA_XFER &&
3567 3566 IS_SOCKET(cl_ep))) {
3568 3567 err = ECONNREFUSED;
3569 3568 /*
3570 3569 * T_DISCON_IND sent later after committing memory
3571 3570 * and acking validity of request
3572 3571 */
3573 3572 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3574 3573 "tl_conn_res:peer in bad state"));
3575 3574 }
3576 3575
3577 3576 /*
3578 3577 * preallocate now for T_DISCON_IND or T_CONN_CONN
3579 3578 * ack validity of request (T_OK_ACK) after memory committed
3580 3579 */
3581 3580
3582 3581 if (err)
3583 3582 size = sizeof (struct T_discon_ind);
3584 3583 else {
3585 3584 /*
3586 3585 * calculate length of T_CONN_CON message
3587 3586 */
3588 3587 olen = 0;
3589 3588 if (cl_ep->te_flag & TL_SETCRED) {
3590 3589 olen = (t_scalar_t)sizeof (struct opthdr) +
3591 3590 OPTLEN(sizeof (tl_credopt_t));
3592 3591 } else if (cl_ep->te_flag & TL_SETUCRED) {
3593 3592 olen = (t_scalar_t)sizeof (struct opthdr) +
3594 3593 OPTLEN(ucredminsize(acc_ep->te_credp));
3595 3594 }
3596 3595 size = T_ALIGN(sizeof (struct T_conn_con) +
3597 3596 acc_ep->te_alen) + olen;
3598 3597 }
3599 3598 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3600 3599 /*
3601 3600 * roll back state changes
3602 3601 */
3603 3602 tep->te_state = TS_WRES_CIND;
3604 3603 tl_memrecover(wq, mp, size);
3605 3604 freemsg(ackmp);
3606 3605 if (client_noclose_set)
3607 3606 tl_closeok(cl_ep);
3608 3607 tl_closeok(acc_ep);
3609 3608 tl_refrele(acc_ep);
3610 3609 return;
3611 3610 }
3612 3611 mp = NULL;
3613 3612 }
3614 3613
3615 3614 /*
3616 3615 * Now ack validity of request
3617 3616 */
3618 3617 if (tep->te_nicon == 1) {
3619 3618 if (tep == acc_ep)
3620 3619 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3621 3620 else
3622 3621 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3623 3622 } else
3624 3623 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3625 3624
3626 3625 /*
3627 3626 * send T_DISCON_IND now if client state validation failed earlier
3628 3627 */
3629 3628 if (err) {
3630 3629 tl_ok_ack(wq, ackmp, prim);
3631 3630 /*
3632 3631 * flush the queues - why always ?
3633 3632 */
3634 3633 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3635 3634
3636 3635 dimp = tl_resizemp(respmp, size);
3637 3636 if (! dimp) {
3638 3637 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3639 3638 SL_TRACE|SL_ERROR,
3640 3639 "tl_conn_res:con_ind:allocb failure"));
3641 3640 tl_merror(wq, respmp, ENOMEM);
3642 3641 tl_closeok(acc_ep);
3643 3642 if (client_noclose_set)
3644 3643 tl_closeok(cl_ep);
3645 3644 tl_refrele(acc_ep);
3646 3645 return;
3647 3646 }
3648 3647 if (dimp->b_cont) {
3649 3648 /* no user data in provider generated discon ind */
3650 3649 freemsg(dimp->b_cont);
3651 3650 dimp->b_cont = NULL;
3652 3651 }
3653 3652
3654 3653 DB_TYPE(dimp) = M_PROTO;
3655 3654 di = (struct T_discon_ind *)dimp->b_rptr;
3656 3655 di->PRIM_type = T_DISCON_IND;
3657 3656 di->DISCON_reason = err;
3658 3657 di->SEQ_number = BADSEQNUM;
3659 3658
3660 3659 tep->te_state = TS_IDLE;
3661 3660 /*
3662 3661 * send T_DISCON_IND message
3663 3662 */
3664 3663 putnext(acc_ep->te_rq, dimp);
3665 3664 if (client_noclose_set)
3666 3665 tl_closeok(cl_ep);
3667 3666 tl_closeok(acc_ep);
3668 3667 tl_refrele(acc_ep);
3669 3668 return;
3670 3669 }
3671 3670
3672 3671 /*
3673 3672 * now start connecting the accepting endpoint
3674 3673 */
3675 3674 if (tep != acc_ep)
3676 3675 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3677 3676
3678 3677 if (cl_ep == NULL) {
3679 3678 /*
3680 3679 * The client has already closed. Send up any queued messages
3681 3680 * and change the state accordingly.
3682 3681 */
3683 3682 tl_ok_ack(wq, ackmp, prim);
3684 3683 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3685 3684
3686 3685 /*
3687 3686 * remove endpoint from incoming connection
3688 3687 * delete client from list of incoming connections
3689 3688 */
3690 3689 tl_freetip(tep, tip);
3691 3690 freemsg(mp);
3692 3691 tl_closeok(acc_ep);
3693 3692 tl_refrele(acc_ep);
3694 3693 return;
3695 3694 } else if (tip->ti_mp != NULL) {
3696 3695 /*
3697 3696 * The client could have queued a T_DISCON_IND which needs
3698 3697 * to be sent up.
3699 3698 * Note that t_discon_req can not operate the same as
3700 3699 * t_data_req since it is not possible for it to putbq
3701 3700 * the message and return -1 due to the use of qwriter.
3702 3701 */
3703 3702 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3704 3703 }
3705 3704
3706 3705 /*
3707 3706 * prepare connect confirm T_CONN_CON message
3708 3707 */
3709 3708
3710 3709 /*
3711 3710 * allocate the message - original data blocks
3712 3711 * retained in the returned mblk
3713 3712 */
3714 3713 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3715 3714 ccmp = tl_resizemp(respmp, size);
3716 3715 if (ccmp == NULL) {
3717 3716 tl_ok_ack(wq, ackmp, prim);
3718 3717 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3719 3718 SL_TRACE|SL_ERROR,
3720 3719 "tl_conn_res:conn_con:allocb failure"));
3721 3720 tl_merror(wq, respmp, ENOMEM);
3722 3721 tl_closeok(acc_ep);
3723 3722 if (client_noclose_set)
3724 3723 tl_closeok(cl_ep);
3725 3724 tl_refrele(acc_ep);
3726 3725 return;
3727 3726 }
3728 3727
3729 3728 DB_TYPE(ccmp) = M_PROTO;
3730 3729 cc = (struct T_conn_con *)ccmp->b_rptr;
3731 3730 cc->PRIM_type = T_CONN_CON;
3732 3731 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3733 3732 cc->RES_length = acc_ep->te_alen;
3734 3733 addr_startp = ccmp->b_rptr + cc->RES_offset;
3735 3734 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3736 3735 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3737 3736 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3738 3737 cc->RES_length);
3739 3738 cc->OPT_length = olen;
3740 3739 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3741 3740 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3742 3741 cl_ep->te_credp);
3743 3742 } else {
3744 3743 cc->OPT_offset = 0;
3745 3744 cc->OPT_length = 0;
3746 3745 }
3747 3746 /*
3748 3747 * Forward the credential in the packet so it can be picked up
3749 3748 * at the higher layers for more complete credential processing
3750 3749 */
3751 3750 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3752 3751 } else {
3753 3752 freemsg(respmp);
3754 3753 respmp = NULL;
3755 3754 }
3756 3755
3757 3756 /*
3758 3757 * make connection linking
3759 3758 * accepting and client endpoints
3760 3759 * No need to increment references:
3761 3760 * on client: it should already have one from tip->ti_tep linkage.
3762 3761 * on acceptor is should already have one from the table lookup.
3763 3762 *
3764 3763 * At this point both client and acceptor can't close. Set client
3765 3764 * serializer to acceptor's.
3766 3765 */
3767 3766 ASSERT(cl_ep->te_refcnt >= 2);
3768 3767 ASSERT(acc_ep->te_refcnt >= 2);
3769 3768 ASSERT(cl_ep->te_conp == NULL);
3770 3769 ASSERT(acc_ep->te_conp == NULL);
3771 3770 cl_ep->te_conp = acc_ep;
3772 3771 acc_ep->te_conp = cl_ep;
3773 3772 ASSERT(cl_ep->te_ser == tep->te_ser);
3774 3773 if (switch_client_serializer) {
3775 3774 mutex_enter(&cl_ep->te_ser_lock);
3776 3775 if (cl_ep->te_ser_count > 0) {
3777 3776 switch_client_serializer = B_FALSE;
3778 3777 tl_serializer_noswitch++;
3779 3778 } else {
3780 3779 /*
3781 3780 * Move client to the acceptor's serializer.
3782 3781 */
3783 3782 tl_serializer_refhold(acc_ep->te_ser);
3784 3783 tl_serializer_refrele(cl_ep->te_ser);
3785 3784 cl_ep->te_ser = acc_ep->te_ser;
3786 3785 }
3787 3786 mutex_exit(&cl_ep->te_ser_lock);
3788 3787 }
3789 3788 if (!switch_client_serializer) {
3790 3789 /*
3791 3790 * It is not possible to switch client to use acceptor's.
3792 3791 * Move acceptor to client's serializer (which is the same as
3793 3792 * listener's).
3794 3793 */
3795 3794 tl_serializer_refhold(cl_ep->te_ser);
3796 3795 tl_serializer_refrele(acc_ep->te_ser);
3797 3796 acc_ep->te_ser = cl_ep->te_ser;
3798 3797 }
3799 3798
3800 3799 TL_REMOVE_PEER(cl_ep->te_oconp);
3801 3800 TL_REMOVE_PEER(acc_ep->te_oconp);
3802 3801
3803 3802 /*
3804 3803 * remove endpoint from incoming connection
3805 3804 * delete client from list of incoming connections
3806 3805 */
3807 3806 tip->ti_tep = NULL;
3808 3807 tl_freetip(tep, tip);
3809 3808 tl_ok_ack(wq, ackmp, prim);
3810 3809
3811 3810 /*
3812 3811 * data blocks already linked in reallocb()
3813 3812 */
3814 3813
3815 3814 /*
3816 3815 * link queues so that I_SENDFD will work
3817 3816 */
3818 3817 if (! IS_SOCKET(tep)) {
3819 3818 acc_ep->te_wq->q_next = cl_ep->te_rq;
3820 3819 cl_ep->te_wq->q_next = acc_ep->te_rq;
3821 3820 }
3822 3821
3823 3822 /*
3824 3823 * send T_CONN_CON up on client side unless it was already
3825 3824 * done (for a socket). In cases any data or ordrel req has been
3826 3825 * queued make sure that the service procedure runs.
3827 3826 */
3828 3827 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3829 3828 enableok(cl_ep->te_wq);
3830 3829 TL_QENABLE(cl_ep);
3831 3830 if (ccmp != NULL)
3832 3831 freemsg(ccmp);
3833 3832 } else {
3834 3833 /*
3835 3834 * change client state on TE_CONN_CON event
3836 3835 */
3837 3836 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3838 3837 putnext(cl_ep->te_rq, ccmp);
3839 3838 }
3840 3839
3841 3840 /* Mark the both endpoints as accepted */
3842 3841 cl_ep->te_flag |= TL_ACCEPTED;
3843 3842 acc_ep->te_flag |= TL_ACCEPTED;
3844 3843
3845 3844 /*
3846 3845 * Allow client and acceptor to close.
3847 3846 */
3848 3847 tl_closeok(acc_ep);
3849 3848 if (client_noclose_set)
3850 3849 tl_closeok(cl_ep);
3851 3850 }
3852 3851
3853 3852
3854 3853
3855 3854
3856 3855 static void
3857 3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3858 3857 {
3859 3858 queue_t *wq;
3860 3859 struct T_discon_req *dr;
3861 3860 ssize_t msz;
3862 3861 tl_endpt_t *peer_tep = tep->te_conp;
3863 3862 tl_endpt_t *srv_tep = tep->te_oconp;
3864 3863 tl_icon_t *tip;
3865 3864 size_t size;
3866 3865 mblk_t *ackmp, *dimp, *respmp;
3867 3866 struct T_discon_ind *di;
3868 3867 t_scalar_t save_state, new_state;
3869 3868
3870 3869 if (tep->te_closing) {
3871 3870 freemsg(mp);
3872 3871 return;
3873 3872 }
3874 3873
3875 3874 if ((peer_tep != NULL) && peer_tep->te_closing) {
3876 3875 TL_UNCONNECT(tep->te_conp);
3877 3876 peer_tep = NULL;
3878 3877 }
3879 3878 if ((srv_tep != NULL) && srv_tep->te_closing) {
3880 3879 TL_UNCONNECT(tep->te_oconp);
3881 3880 srv_tep = NULL;
3882 3881 }
3883 3882
3884 3883 wq = tep->te_wq;
3885 3884
3886 3885 /*
3887 3886 * preallocate memory for:
3888 3887 * 1. max of T_ERROR_ACK and T_OK_ACK
3889 3888 * ==> known max T_ERROR_ACK
3890 3889 * 2. for T_DISCON_IND
3891 3890 */
3892 3891 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3893 3892 if (! ackmp) {
3894 3893 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3895 3894 return;
3896 3895 }
3897 3896 /*
3898 3897 * memory committed for T_OK_ACK/T_ERROR_ACK now
3899 3898 * will be committed for T_DISCON_IND later
3900 3899 */
3901 3900
3902 3901 dr = (struct T_discon_req *)mp->b_rptr;
3903 3902 msz = MBLKL(mp);
3904 3903
3905 3904 /*
3906 3905 * validate the state
3907 3906 */
3908 3907 save_state = new_state = tep->te_state;
3909 3908 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3910 3909 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3911 3910 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3912 3911 SL_TRACE|SL_ERROR,
3913 3912 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3914 3913 tep->te_state));
3915 3914 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3916 3915 freemsg(mp);
3917 3916 return;
3918 3917 }
3919 3918 /*
3920 3919 * Defer committing the state change until it is determined if
3921 3920 * the message will be queued with the tl_icon or not.
3922 3921 */
3923 3922 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3924 3923
3925 3924 /* validate the message */
3926 3925 if (msz < sizeof (struct T_discon_req)) {
3927 3926 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3928 3927 "tl_discon_req:invalid message"));
3929 3928 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3930 3929 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3931 3930 freemsg(mp);
3932 3931 return;
3933 3932 }
3934 3933
3935 3934 /*
3936 3935 * if server, then validate that client exists
3937 3936 * by connection sequence number etc.
3938 3937 */
3939 3938 if (tep->te_nicon > 0) { /* server */
3940 3939
3941 3940 /*
3942 3941 * search server list for disconnect client
3943 3942 */
3944 3943 tip = tl_icon_find(tep, dr->SEQ_number);
3945 3944 if (tip == NULL) {
3946 3945 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3947 3946 SL_TRACE|SL_ERROR,
3948 3947 "tl_discon_req:no disconnect endpoint"));
3949 3948 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3950 3949 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3951 3950 freemsg(mp);
3952 3951 return;
3953 3952 }
3954 3953 /*
3955 3954 * If ti_tep is NULL the client has already closed. In this case
3956 3955 * the code below will avoid any action on the client side.
3957 3956 */
3958 3957
3959 3958 IMPLY(tip->ti_tep != NULL,
3960 3959 tip->ti_tep->te_seqno == dr->SEQ_number);
3961 3960 peer_tep = tip->ti_tep;
3962 3961 }
3963 3962
3964 3963 /*
3965 3964 * preallocate now for T_DISCON_IND
3966 3965 * ack validity of request (T_OK_ACK) after memory committed
3967 3966 */
3968 3967 size = sizeof (struct T_discon_ind);
3969 3968 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3970 3969 tl_memrecover(wq, mp, size);
3971 3970 freemsg(ackmp);
3972 3971 return;
3973 3972 }
3974 3973
3975 3974 /*
3976 3975 * prepare message to ack validity of request
3977 3976 */
3978 3977 if (tep->te_nicon == 0)
3979 3978 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3980 3979 else
3981 3980 if (tep->te_nicon == 1)
3982 3981 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3983 3982 else
3984 3983 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3985 3984
3986 3985 /*
3987 3986 * Flushing queues according to TPI. Using the old state.
3988 3987 */
3989 3988 if ((tep->te_nicon <= 1) &&
3990 3989 ((save_state == TS_DATA_XFER) ||
3991 3990 (save_state == TS_WIND_ORDREL) ||
3992 3991 (save_state == TS_WREQ_ORDREL)))
3993 3992 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3994 3993
3995 3994 /* send T_OK_ACK up */
3996 3995 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3997 3996
3998 3997 /*
3999 3998 * now do disconnect business
4000 3999 */
4001 4000 if (tep->te_nicon > 0) { /* listener */
4002 4001 if (peer_tep != NULL && !peer_tep->te_closing) {
4003 4002 /*
4004 4003 * disconnect incoming connect request pending to tep
4005 4004 */
4006 4005 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4007 4006 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4008 4007 SL_TRACE|SL_ERROR,
4009 4008 "tl_discon_req: reallocb failed"));
4010 4009 tep->te_state = new_state;
4011 4010 tl_merror(wq, respmp, ENOMEM);
4012 4011 return;
4013 4012 }
4014 4013 di = (struct T_discon_ind *)dimp->b_rptr;
4015 4014 di->SEQ_number = BADSEQNUM;
4016 4015 save_state = peer_tep->te_state;
4017 4016 peer_tep->te_state = TS_IDLE;
4018 4017
4019 4018 TL_REMOVE_PEER(peer_tep->te_oconp);
4020 4019 enableok(peer_tep->te_wq);
4021 4020 TL_QENABLE(peer_tep);
4022 4021 } else {
4023 4022 freemsg(respmp);
4024 4023 dimp = NULL;
4025 4024 }
4026 4025
4027 4026 /*
4028 4027 * remove endpoint from incoming connection list
4029 4028 * - remove disconnect client from list on server
4030 4029 */
4031 4030 tl_freetip(tep, tip);
4032 4031 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4033 4032 /*
4034 4033 * disconnect an outgoing request pending from tep
4035 4034 */
4036 4035
4037 4036 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4038 4037 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4039 4038 SL_TRACE|SL_ERROR,
4040 4039 "tl_discon_req: reallocb failed"));
4041 4040 tep->te_state = new_state;
4042 4041 tl_merror(wq, respmp, ENOMEM);
4043 4042 return;
4044 4043 }
4045 4044 di = (struct T_discon_ind *)dimp->b_rptr;
4046 4045 DB_TYPE(dimp) = M_PROTO;
4047 4046 di->PRIM_type = T_DISCON_IND;
4048 4047 di->DISCON_reason = ECONNRESET;
4049 4048 di->SEQ_number = tep->te_seqno;
4050 4049
4051 4050 /*
4052 4051 * If this is a socket the T_DISCON_IND is queued with
4053 4052 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4054 4053 * from the list of pending connections.
4055 4054 * Note that when te_oconp is set the peer better have
4056 4055 * a t_connind_t for the client.
4057 4056 */
4058 4057 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4059 4058 /*
4060 4059 * No need to check that
4061 4060 * ti_tep == NULL since the T_DISCON_IND
4062 4061 * takes precedence over other queued
4063 4062 * messages.
4064 4063 */
4065 4064 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4066 4065 peer_tep = NULL;
4067 4066 dimp = NULL;
4068 4067 /*
4069 4068 * Can't clear te_oconp since tl_co_unconnect needs
4070 4069 * it as a hint not to free the tep.
4071 4070 * Keep the state unchanged since tl_conn_res inspects
4072 4071 * it.
4073 4072 */
4074 4073 new_state = tep->te_state;
4075 4074 } else {
4076 4075 /* Found - delete it */
4077 4076 tip = tl_icon_find(peer_tep, tep->te_seqno);
4078 4077 if (tip != NULL) {
4079 4078 ASSERT(tep == tip->ti_tep);
4080 4079 save_state = peer_tep->te_state;
4081 4080 if (peer_tep->te_nicon == 1)
4082 4081 peer_tep->te_state =
4083 4082 NEXTSTATE(TE_DISCON_IND2,
4084 4083 peer_tep->te_state);
4085 4084 else
4086 4085 peer_tep->te_state =
4087 4086 NEXTSTATE(TE_DISCON_IND3,
4088 4087 peer_tep->te_state);
4089 4088 tl_freetip(peer_tep, tip);
4090 4089 }
4091 4090 ASSERT(tep->te_oconp != NULL);
4092 4091 TL_UNCONNECT(tep->te_oconp);
4093 4092 }
4094 4093 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4095 4094 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4096 4095 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4097 4096 SL_TRACE|SL_ERROR,
4098 4097 "tl_discon_req: reallocb failed"));
4099 4098 tep->te_state = new_state;
4100 4099 tl_merror(wq, respmp, ENOMEM);
4101 4100 return;
4102 4101 }
4103 4102 di = (struct T_discon_ind *)dimp->b_rptr;
4104 4103 di->SEQ_number = BADSEQNUM;
4105 4104
4106 4105 save_state = peer_tep->te_state;
4107 4106 peer_tep->te_state = TS_IDLE;
4108 4107 } else {
4109 4108 /* Not connected */
4110 4109 tep->te_state = new_state;
4111 4110 freemsg(respmp);
4112 4111 return;
4113 4112 }
4114 4113
4115 4114 /* Commit state changes */
4116 4115 tep->te_state = new_state;
4117 4116
4118 4117 if (peer_tep == NULL) {
4119 4118 ASSERT(dimp == NULL);
4120 4119 goto done;
4121 4120 }
4122 4121 /*
4123 4122 * Flush queues on peer before sending up
4124 4123 * T_DISCON_IND according to TPI
4125 4124 */
4126 4125
4127 4126 if ((save_state == TS_DATA_XFER) ||
4128 4127 (save_state == TS_WIND_ORDREL) ||
4129 4128 (save_state == TS_WREQ_ORDREL))
4130 4129 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4131 4130
4132 4131 DB_TYPE(dimp) = M_PROTO;
4133 4132 di->PRIM_type = T_DISCON_IND;
4134 4133 di->DISCON_reason = ECONNRESET;
4135 4134
4136 4135 /*
4137 4136 * data blocks already linked into dimp by reallocb()
4138 4137 */
4139 4138 /*
4140 4139 * send indication message to peer user module
4141 4140 */
4142 4141 ASSERT(dimp != NULL);
4143 4142 putnext(peer_tep->te_rq, dimp);
4144 4143 done:
4145 4144 if (tep->te_conp) { /* disconnect pointers if connected */
4146 4145 ASSERT(! peer_tep->te_closing);
4147 4146
4148 4147 /*
4149 4148 * Messages may be queued on peer's write queue
4150 4149 * waiting to be processed by its write service
4151 4150 * procedure. Before the pointer to the peer transport
4152 4151 * structure is set to NULL, qenable the peer's write
4153 4152 * queue so that the queued up messages are processed.
4154 4153 */
4155 4154 if ((save_state == TS_DATA_XFER) ||
4156 4155 (save_state == TS_WIND_ORDREL) ||
4157 4156 (save_state == TS_WREQ_ORDREL))
4158 4157 TL_QENABLE(peer_tep);
4159 4158 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4160 4159 TL_UNCONNECT(peer_tep->te_conp);
4161 4160 if (! IS_SOCKET(tep)) {
4162 4161 /*
4163 4162 * unlink the streams
4164 4163 */
4165 4164 tep->te_wq->q_next = NULL;
4166 4165 peer_tep->te_wq->q_next = NULL;
4167 4166 }
4168 4167 TL_UNCONNECT(tep->te_conp);
4169 4168 }
4170 4169 }
4171 4170
4172 4171 static void
4173 4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4174 4173 {
4175 4174 if (!tep->te_closing)
4176 4175 tl_addr_req(mp, tep);
4177 4176 else
4178 4177 freemsg(mp);
4179 4178
4180 4179 tl_serializer_exit(tep);
4181 4180 tl_refrele(tep);
4182 4181 }
4183 4182
4184 4183 static void
4185 4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4186 4185 {
4187 4186 queue_t *wq;
4188 4187 size_t ack_sz;
4189 4188 mblk_t *ackmp;
4190 4189 struct T_addr_ack *taa;
4191 4190
4192 4191 if (tep->te_closing) {
4193 4192 freemsg(mp);
4194 4193 return;
4195 4194 }
4196 4195
4197 4196 wq = tep->te_wq;
4198 4197
4199 4198 /*
4200 4199 * Note: T_ADDR_REQ message has only PRIM_type field
4201 4200 * so it is already validated earlier.
4202 4201 */
4203 4202
4204 4203 if (IS_CLTS(tep) ||
4205 4204 (tep->te_state > TS_WREQ_ORDREL) ||
4206 4205 (tep->te_state < TS_DATA_XFER)) {
4207 4206 /*
4208 4207 * Either connectionless or connection oriented but not
4209 4208 * in connected data transfer state or half-closed states.
4210 4209 */
4211 4210 ack_sz = sizeof (struct T_addr_ack);
4212 4211 if (tep->te_state >= TS_IDLE)
4213 4212 /* is bound */
4214 4213 ack_sz += tep->te_alen;
4215 4214 ackmp = reallocb(mp, ack_sz, 0);
4216 4215 if (ackmp == NULL) {
4217 4216 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4218 4217 SL_TRACE|SL_ERROR,
4219 4218 "tl_addr_req: reallocb failed"));
4220 4219 tl_memrecover(wq, mp, ack_sz);
4221 4220 return;
4222 4221 }
4223 4222
4224 4223 taa = (struct T_addr_ack *)ackmp->b_rptr;
4225 4224
4226 4225 bzero(taa, sizeof (struct T_addr_ack));
4227 4226
4228 4227 taa->PRIM_type = T_ADDR_ACK;
4229 4228 ackmp->b_datap->db_type = M_PCPROTO;
4230 4229 ackmp->b_wptr = (uchar_t *)&taa[1];
4231 4230
4232 4231 if (tep->te_state >= TS_IDLE) {
4233 4232 /* endpoint is bound */
4234 4233 taa->LOCADDR_length = tep->te_alen;
4235 4234 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4236 4235
4237 4236 bcopy(tep->te_abuf, ackmp->b_wptr,
4238 4237 tep->te_alen);
4239 4238 ackmp->b_wptr += tep->te_alen;
4240 4239 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4241 4240 }
4242 4241
4243 4242 (void) qreply(wq, ackmp);
4244 4243 } else {
4245 4244 ASSERT(tep->te_state == TS_DATA_XFER ||
4246 4245 tep->te_state == TS_WIND_ORDREL ||
4247 4246 tep->te_state == TS_WREQ_ORDREL);
4248 4247 /* connection oriented in data transfer */
4249 4248 tl_connected_cots_addr_req(mp, tep);
4250 4249 }
4251 4250 }
4252 4251
4253 4252
4254 4253 static void
4255 4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4256 4255 {
4257 4256 tl_endpt_t *peer_tep = tep->te_conp;
4258 4257 size_t ack_sz;
4259 4258 mblk_t *ackmp;
4260 4259 struct T_addr_ack *taa;
4261 4260 uchar_t *addr_startp;
4262 4261
4263 4262 if (tep->te_closing) {
4264 4263 freemsg(mp);
4265 4264 return;
4266 4265 }
4267 4266
4268 4267 if (peer_tep == NULL || peer_tep->te_closing) {
4269 4268 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4270 4269 return;
4271 4270 }
4272 4271
4273 4272 ASSERT(tep->te_state >= TS_IDLE);
4274 4273
4275 4274 ack_sz = sizeof (struct T_addr_ack);
4276 4275 ack_sz += T_ALIGN(tep->te_alen);
4277 4276 ack_sz += peer_tep->te_alen;
4278 4277
4279 4278 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4280 4279 if (ackmp == NULL) {
4281 4280 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4282 4281 "tl_connected_cots_addr_req: reallocb failed"));
4283 4282 tl_memrecover(tep->te_wq, mp, ack_sz);
4284 4283 return;
4285 4284 }
4286 4285
4287 4286 taa = (struct T_addr_ack *)ackmp->b_rptr;
4288 4287
4289 4288 /* endpoint is bound */
4290 4289 taa->LOCADDR_length = tep->te_alen;
4291 4290 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4292 4291
4293 4292 addr_startp = (uchar_t *)&taa[1];
4294 4293
4295 4294 bcopy(tep->te_abuf, addr_startp,
4296 4295 tep->te_alen);
4297 4296
4298 4297 taa->REMADDR_length = peer_tep->te_alen;
4299 4298 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4300 4299 taa->LOCADDR_length);
4301 4300 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4302 4301 bcopy(peer_tep->te_abuf, addr_startp,
4303 4302 peer_tep->te_alen);
4304 4303 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4305 4304 taa->REMADDR_offset + peer_tep->te_alen;
4306 4305 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4307 4306
4308 4307 putnext(tep->te_rq, ackmp);
4309 4308 }
4310 4309
4311 4310 static void
4312 4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4313 4312 {
4314 4313 if (IS_CLTS(tep)) {
4315 4314 *ia = tl_clts_info_ack;
4316 4315 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4317 4316 } else {
4318 4317 *ia = tl_cots_info_ack;
4319 4318 if (IS_COTSORD(tep))
4320 4319 ia->SERV_type = T_COTS_ORD;
4321 4320 }
4322 4321 ia->TIDU_size = tl_tidusz;
4323 4322 ia->CURRENT_state = tep->te_state;
4324 4323 }
4325 4324
4326 4325 /*
4327 4326 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4328 4327 * tl_wput.
4329 4328 */
4330 4329 static void
4331 4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4332 4331 {
4333 4332 mblk_t *ackmp;
4334 4333 t_uscalar_t cap_bits1;
4335 4334 struct T_capability_ack *tcap;
4336 4335
4337 4336 if (tep->te_closing) {
4338 4337 freemsg(mp);
4339 4338 return;
4340 4339 }
4341 4340
4342 4341 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4343 4342
4344 4343 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4345 4344 M_PCPROTO, T_CAPABILITY_ACK);
4346 4345 if (ackmp == NULL) {
4347 4346 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4348 4347 "tl_capability_req: reallocb failed"));
4349 4348 tl_memrecover(tep->te_wq, mp,
4350 4349 sizeof (struct T_capability_ack));
4351 4350 return;
4352 4351 }
4353 4352
4354 4353 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4355 4354 tcap->CAP_bits1 = 0;
4356 4355
4357 4356 if (cap_bits1 & TC1_INFO) {
4358 4357 tl_copy_info(&tcap->INFO_ack, tep);
4359 4358 tcap->CAP_bits1 |= TC1_INFO;
4360 4359 }
4361 4360
4362 4361 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4363 4362 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4364 4363 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4365 4364 }
4366 4365
4367 4366 putnext(tep->te_rq, ackmp);
4368 4367 }
4369 4368
4370 4369 static void
4371 4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4372 4371 {
4373 4372 if (! tep->te_closing)
4374 4373 tl_info_req(mp, tep);
4375 4374 else
4376 4375 freemsg(mp);
4377 4376
4378 4377 tl_serializer_exit(tep);
4379 4378 tl_refrele(tep);
4380 4379 }
4381 4380
4382 4381 static void
4383 4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4384 4383 {
4385 4384 mblk_t *ackmp;
4386 4385
4387 4386 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4388 4387 M_PCPROTO, T_INFO_ACK);
4389 4388 if (ackmp == NULL) {
4390 4389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4391 4390 "tl_info_req: reallocb failed"));
4392 4391 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4393 4392 return;
4394 4393 }
4395 4394
4396 4395 /*
4397 4396 * fill in T_INFO_ACK contents
4398 4397 */
4399 4398 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4400 4399
4401 4400 /*
4402 4401 * send ack message
4403 4402 */
4404 4403 putnext(tep->te_rq, ackmp);
4405 4404 }
4406 4405
4407 4406 /*
4408 4407 * Handle M_DATA, T_data_req and T_optdata_req.
4409 4408 * If this is a socket pass through T_optdata_req options unmodified.
4410 4409 */
4411 4410 static void
4412 4411 tl_data(mblk_t *mp, tl_endpt_t *tep)
4413 4412 {
4414 4413 queue_t *wq = tep->te_wq;
4415 4414 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4416 4415 ssize_t msz = MBLKL(mp);
4417 4416 tl_endpt_t *peer_tep;
4418 4417 queue_t *peer_rq;
4419 4418 boolean_t closing = tep->te_closing;
4420 4419
4421 4420 if (IS_CLTS(tep)) {
4422 4421 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4423 4422 SL_TRACE|SL_ERROR,
4424 4423 "tl_wput:clts:unattached M_DATA"));
4425 4424 if (!closing) {
4426 4425 tl_merror(wq, mp, EPROTO);
4427 4426 } else {
4428 4427 freemsg(mp);
4429 4428 }
4430 4429 return;
4431 4430 }
4432 4431
4433 4432 /*
4434 4433 * If the endpoint is closing it should still forward any data to the
4435 4434 * peer (if it has one). If it is not allowed to forward it can just
4436 4435 * free the message.
4437 4436 */
4438 4437 if (closing &&
4439 4438 (tep->te_state != TS_DATA_XFER) &&
4440 4439 (tep->te_state != TS_WREQ_ORDREL)) {
4441 4440 freemsg(mp);
4442 4441 return;
4443 4442 }
4444 4443
4445 4444 if (DB_TYPE(mp) == M_PROTO) {
4446 4445 if (prim->type == T_DATA_REQ &&
4447 4446 msz < sizeof (struct T_data_req)) {
4448 4447 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4449 4448 SL_TRACE|SL_ERROR,
4450 4449 "tl_data:T_DATA_REQ:invalid message"));
4451 4450 if (!closing) {
4452 4451 tl_merror(wq, mp, EPROTO);
4453 4452 } else {
4454 4453 freemsg(mp);
4455 4454 }
4456 4455 return;
4457 4456 } else if (prim->type == T_OPTDATA_REQ &&
4458 4457 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4459 4458 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4460 4459 SL_TRACE|SL_ERROR,
4461 4460 "tl_data:T_OPTDATA_REQ:invalid message"));
4462 4461 if (!closing) {
4463 4462 tl_merror(wq, mp, EPROTO);
4464 4463 } else {
4465 4464 freemsg(mp);
4466 4465 }
4467 4466 return;
4468 4467 }
4469 4468 }
4470 4469
4471 4470 /*
4472 4471 * connection oriented provider
4473 4472 */
4474 4473 switch (tep->te_state) {
4475 4474 case TS_IDLE:
4476 4475 /*
4477 4476 * Other end not here - do nothing.
4478 4477 */
4479 4478 freemsg(mp);
4480 4479 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4481 4480 "tl_data:cots with endpoint idle"));
4482 4481 return;
4483 4482
4484 4483 case TS_DATA_XFER:
4485 4484 /* valid states */
4486 4485 if (tep->te_conp != NULL)
4487 4486 break;
4488 4487
4489 4488 if (tep->te_oconp == NULL) {
4490 4489 if (!closing) {
4491 4490 tl_merror(wq, mp, EPROTO);
4492 4491 } else {
4493 4492 freemsg(mp);
4494 4493 }
4495 4494 return;
4496 4495 }
4497 4496 /*
4498 4497 * For a socket the T_CONN_CON is sent early thus
4499 4498 * the peer might not yet have accepted the connection.
4500 4499 * If we are closing queue the packet with the T_CONN_IND.
4501 4500 * Otherwise defer processing the packet until the peer
4502 4501 * accepts the connection.
4503 4502 * Note that the queue is noenabled when we go into this
4504 4503 * state.
4505 4504 */
4506 4505 if (!closing) {
4507 4506 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4508 4507 SL_TRACE|SL_ERROR,
4509 4508 "tl_data: ocon"));
4510 4509 TL_PUTBQ(tep, mp);
4511 4510 return;
4512 4511 }
4513 4512 if (DB_TYPE(mp) == M_PROTO) {
4514 4513 if (msz < sizeof (t_scalar_t)) {
4515 4514 freemsg(mp);
4516 4515 return;
4517 4516 }
4518 4517 /* reuse message block - just change REQ to IND */
4519 4518 if (prim->type == T_DATA_REQ)
4520 4519 prim->type = T_DATA_IND;
4521 4520 else
4522 4521 prim->type = T_OPTDATA_IND;
4523 4522 }
4524 4523 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4525 4524 return;
4526 4525
4527 4526 case TS_WREQ_ORDREL:
4528 4527 if (tep->te_conp == NULL) {
4529 4528 /*
4530 4529 * Other end closed - generate discon_ind
4531 4530 * with reason 0 to cause an EPIPE but no
4532 4531 * read side error on AF_UNIX sockets.
4533 4532 */
4534 4533 freemsg(mp);
4535 4534 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4536 4535 SL_TRACE|SL_ERROR,
4537 4536 "tl_data: WREQ_ORDREL and no peer"));
4538 4537 tl_discon_ind(tep, 0);
4539 4538 return;
4540 4539 }
4541 4540 break;
4542 4541
4543 4542 default:
4544 4543 /* invalid state for event TE_DATA_REQ */
4545 4544 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4546 4545 "tl_data:cots:out of state"));
4547 4546 tl_merror(wq, mp, EPROTO);
4548 4547 return;
4549 4548 }
4550 4549 /*
4551 4550 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4552 4551 * (State stays same on this event)
4553 4552 */
4554 4553
4555 4554 /*
4556 4555 * get connected endpoint
4557 4556 */
4558 4557 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4559 4558 freemsg(mp);
4560 4559 /* Peer closed */
4561 4560 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4562 4561 "tl_data: peer gone"));
4563 4562 return;
4564 4563 }
4565 4564
4566 4565 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4567 4566 peer_rq = peer_tep->te_rq;
4568 4567
4569 4568 /*
4570 4569 * Put it back if flow controlled
4571 4570 * Note: Messages already on queue when we are closing is bounded
4572 4571 * so we can ignore flow control.
4573 4572 */
4574 4573 if (!canputnext(peer_rq) && !closing) {
4575 4574 TL_PUTBQ(tep, mp);
4576 4575 return;
4577 4576 }
4578 4577
4579 4578 /*
4580 4579 * validate peer state
4581 4580 */
4582 4581 switch (peer_tep->te_state) {
4583 4582 case TS_DATA_XFER:
4584 4583 case TS_WIND_ORDREL:
4585 4584 /* valid states */
4586 4585 break;
4587 4586 default:
4588 4587 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4589 4588 "tl_data:rx side:invalid state"));
4590 4589 tl_merror(peer_tep->te_wq, mp, EPROTO);
4591 4590 return;
4592 4591 }
4593 4592 if (DB_TYPE(mp) == M_PROTO) {
4594 4593 /* reuse message block - just change REQ to IND */
4595 4594 if (prim->type == T_DATA_REQ)
4596 4595 prim->type = T_DATA_IND;
4597 4596 else
4598 4597 prim->type = T_OPTDATA_IND;
4599 4598 }
4600 4599 /*
4601 4600 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4602 4601 * (peer state stays same on this event)
4603 4602 */
4604 4603 /*
4605 4604 * send data to connected peer
4606 4605 */
4607 4606 putnext(peer_rq, mp);
4608 4607 }
4609 4608
4610 4609
4611 4610
4612 4611 static void
4613 4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4614 4613 {
4615 4614 queue_t *wq = tep->te_wq;
4616 4615 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4617 4616 ssize_t msz = MBLKL(mp);
4618 4617 tl_endpt_t *peer_tep;
4619 4618 queue_t *peer_rq;
4620 4619 boolean_t closing = tep->te_closing;
4621 4620
4622 4621 if (msz < sizeof (struct T_exdata_req)) {
4623 4622 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4624 4623 "tl_exdata:invalid message"));
4625 4624 if (!closing) {
4626 4625 tl_merror(wq, mp, EPROTO);
4627 4626 } else {
4628 4627 freemsg(mp);
4629 4628 }
4630 4629 return;
4631 4630 }
4632 4631
4633 4632 /*
4634 4633 * If the endpoint is closing it should still forward any data to the
4635 4634 * peer (if it has one). If it is not allowed to forward it can just
4636 4635 * free the message.
4637 4636 */
4638 4637 if (closing &&
4639 4638 (tep->te_state != TS_DATA_XFER) &&
4640 4639 (tep->te_state != TS_WREQ_ORDREL)) {
4641 4640 freemsg(mp);
4642 4641 return;
4643 4642 }
4644 4643
4645 4644 /*
4646 4645 * validate state
4647 4646 */
4648 4647 switch (tep->te_state) {
4649 4648 case TS_IDLE:
4650 4649 /*
4651 4650 * Other end not here - do nothing.
4652 4651 */
4653 4652 freemsg(mp);
4654 4653 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4655 4654 "tl_exdata:cots with endpoint idle"));
4656 4655 return;
4657 4656
4658 4657 case TS_DATA_XFER:
4659 4658 /* valid states */
4660 4659 if (tep->te_conp != NULL)
4661 4660 break;
4662 4661
4663 4662 if (tep->te_oconp == NULL) {
4664 4663 if (!closing) {
4665 4664 tl_merror(wq, mp, EPROTO);
4666 4665 } else {
4667 4666 freemsg(mp);
4668 4667 }
4669 4668 return;
4670 4669 }
4671 4670 /*
4672 4671 * For a socket the T_CONN_CON is sent early thus
4673 4672 * the peer might not yet have accepted the connection.
4674 4673 * If we are closing queue the packet with the T_CONN_IND.
4675 4674 * Otherwise defer processing the packet until the peer
4676 4675 * accepts the connection.
4677 4676 * Note that the queue is noenabled when we go into this
4678 4677 * state.
4679 4678 */
4680 4679 if (!closing) {
4681 4680 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4682 4681 SL_TRACE|SL_ERROR,
4683 4682 "tl_exdata: ocon"));
4684 4683 TL_PUTBQ(tep, mp);
4685 4684 return;
4686 4685 }
4687 4686 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4688 4687 "tl_exdata: closing socket ocon"));
4689 4688 prim->type = T_EXDATA_IND;
4690 4689 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4691 4690 return;
4692 4691
4693 4692 case TS_WREQ_ORDREL:
4694 4693 if (tep->te_conp == NULL) {
4695 4694 /*
4696 4695 * Other end closed - generate discon_ind
4697 4696 * with reason 0 to cause an EPIPE but no
4698 4697 * read side error on AF_UNIX sockets.
4699 4698 */
4700 4699 freemsg(mp);
4701 4700 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4702 4701 SL_TRACE|SL_ERROR,
4703 4702 "tl_exdata: WREQ_ORDREL and no peer"));
4704 4703 tl_discon_ind(tep, 0);
4705 4704 return;
4706 4705 }
4707 4706 break;
4708 4707
4709 4708 default:
4710 4709 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4711 4710 SL_TRACE|SL_ERROR,
4712 4711 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4713 4712 tep->te_state));
4714 4713 tl_merror(wq, mp, EPROTO);
4715 4714 return;
4716 4715 }
4717 4716 /*
4718 4717 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4719 4718 * (state stays same on this event)
4720 4719 */
4721 4720
4722 4721 /*
4723 4722 * get connected endpoint
4724 4723 */
4725 4724 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4726 4725 freemsg(mp);
4727 4726 /* Peer closed */
4728 4727 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4729 4728 "tl_exdata: peer gone"));
4730 4729 return;
4731 4730 }
4732 4731
4733 4732 peer_rq = peer_tep->te_rq;
4734 4733
4735 4734 /*
4736 4735 * Put it back if flow controlled
4737 4736 * Note: Messages already on queue when we are closing is bounded
4738 4737 * so we can ignore flow control.
4739 4738 */
4740 4739 if (!canputnext(peer_rq) && !closing) {
4741 4740 TL_PUTBQ(tep, mp);
4742 4741 return;
4743 4742 }
4744 4743
4745 4744 /*
4746 4745 * validate state on peer
4747 4746 */
4748 4747 switch (peer_tep->te_state) {
4749 4748 case TS_DATA_XFER:
4750 4749 case TS_WIND_ORDREL:
4751 4750 /* valid states */
4752 4751 break;
4753 4752 default:
4754 4753 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4755 4754 "tl_exdata:rx side:invalid state"));
4756 4755 tl_merror(peer_tep->te_wq, mp, EPROTO);
4757 4756 return;
4758 4757 }
4759 4758 /*
4760 4759 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4761 4760 * (peer state stays same on this event)
4762 4761 */
4763 4762 /*
4764 4763 * reuse message block
4765 4764 */
4766 4765 prim->type = T_EXDATA_IND;
4767 4766
4768 4767 /*
4769 4768 * send data to connected peer
4770 4769 */
4771 4770 putnext(peer_rq, mp);
4772 4771 }
4773 4772
4774 4773
4775 4774
4776 4775 static void
4777 4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4778 4777 {
4779 4778 queue_t *wq = tep->te_wq;
4780 4779 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4781 4780 ssize_t msz = MBLKL(mp);
4782 4781 tl_endpt_t *peer_tep;
4783 4782 queue_t *peer_rq;
4784 4783 boolean_t closing = tep->te_closing;
4785 4784
4786 4785 if (msz < sizeof (struct T_ordrel_req)) {
4787 4786 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4788 4787 "tl_ordrel:invalid message"));
4789 4788 if (!closing) {
4790 4789 tl_merror(wq, mp, EPROTO);
4791 4790 } else {
4792 4791 freemsg(mp);
4793 4792 }
4794 4793 return;
4795 4794 }
4796 4795
4797 4796 /*
4798 4797 * validate state
4799 4798 */
4800 4799 switch (tep->te_state) {
4801 4800 case TS_DATA_XFER:
4802 4801 case TS_WREQ_ORDREL:
4803 4802 /* valid states */
4804 4803 if (tep->te_conp != NULL)
4805 4804 break;
4806 4805
4807 4806 if (tep->te_oconp == NULL)
4808 4807 break;
4809 4808
4810 4809 /*
4811 4810 * For a socket the T_CONN_CON is sent early thus
4812 4811 * the peer might not yet have accepted the connection.
4813 4812 * If we are closing queue the packet with the T_CONN_IND.
4814 4813 * Otherwise defer processing the packet until the peer
4815 4814 * accepts the connection.
4816 4815 * Note that the queue is noenabled when we go into this
4817 4816 * state.
4818 4817 */
4819 4818 if (!closing) {
4820 4819 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4821 4820 SL_TRACE|SL_ERROR,
4822 4821 "tl_ordlrel: ocon"));
4823 4822 TL_PUTBQ(tep, mp);
4824 4823 return;
4825 4824 }
4826 4825 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4827 4826 "tl_ordlrel: closing socket ocon"));
4828 4827 prim->type = T_ORDREL_IND;
4829 4828 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4830 4829 return;
4831 4830
4832 4831 default:
4833 4832 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4834 4833 SL_TRACE|SL_ERROR,
4835 4834 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4836 4835 tep->te_state));
4837 4836 if (!closing) {
4838 4837 tl_merror(wq, mp, EPROTO);
4839 4838 } else {
4840 4839 freemsg(mp);
4841 4840 }
4842 4841 return;
4843 4842 }
4844 4843 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4845 4844
4846 4845 /*
4847 4846 * get connected endpoint
4848 4847 */
4849 4848 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4850 4849 /* Peer closed */
4851 4850 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4852 4851 "tl_ordrel: peer gone"));
4853 4852 freemsg(mp);
4854 4853 return;
4855 4854 }
4856 4855
4857 4856 peer_rq = peer_tep->te_rq;
4858 4857
4859 4858 /*
4860 4859 * Put it back if flow controlled except when we are closing.
4861 4860 * Note: Messages already on queue when we are closing is bounded
4862 4861 * so we can ignore flow control.
4863 4862 */
4864 4863 if (! canputnext(peer_rq) && !closing) {
4865 4864 TL_PUTBQ(tep, mp);
4866 4865 return;
4867 4866 }
4868 4867
4869 4868 /*
4870 4869 * validate state on peer
4871 4870 */
4872 4871 switch (peer_tep->te_state) {
4873 4872 case TS_DATA_XFER:
4874 4873 case TS_WIND_ORDREL:
4875 4874 /* valid states */
4876 4875 break;
4877 4876 default:
4878 4877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4879 4878 "tl_ordrel:rx side:invalid state"));
4880 4879 tl_merror(peer_tep->te_wq, mp, EPROTO);
4881 4880 return;
4882 4881 }
4883 4882 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4884 4883
4885 4884 /*
4886 4885 * reuse message block
4887 4886 */
4888 4887 prim->type = T_ORDREL_IND;
4889 4888 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4890 4889 "tl_ordrel: send ordrel_ind"));
4891 4890
4892 4891 /*
4893 4892 * send data to connected peer
4894 4893 */
4895 4894 putnext(peer_rq, mp);
4896 4895 }
4897 4896
4898 4897
4899 4898 /*
4900 4899 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4901 4900 */
4902 4901 static void
4903 4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4904 4903 {
4905 4904 size_t err_sz;
4906 4905 tl_endpt_t *tep;
4907 4906 struct T_unitdata_req *udreq;
4908 4907 mblk_t *err_mp;
4909 4908 t_scalar_t alen;
4910 4909 t_scalar_t olen;
4911 4910 struct T_uderror_ind *uderr;
4912 4911 uchar_t *addr_startp;
4913 4912
4914 4913 err_sz = sizeof (struct T_uderror_ind);
4915 4914 tep = (tl_endpt_t *)wq->q_ptr;
4916 4915 udreq = (struct T_unitdata_req *)mp->b_rptr;
4917 4916 alen = udreq->DEST_length;
4918 4917 olen = udreq->OPT_length;
4919 4918
4920 4919 if (alen > 0)
4921 4920 err_sz = T_ALIGN(err_sz + alen);
4922 4921 if (olen > 0)
4923 4922 err_sz += olen;
4924 4923
4925 4924 err_mp = allocb(err_sz, BPRI_MED);
4926 4925 if (! err_mp) {
4927 4926 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4928 4927 "tl_uderr:allocb failure"));
4929 4928 /*
4930 4929 * Note: no rollback of state needed as it does
4931 4930 * not change in connectionless transport
4932 4931 */
4933 4932 tl_memrecover(wq, mp, err_sz);
4934 4933 return;
4935 4934 }
4936 4935
4937 4936 DB_TYPE(err_mp) = M_PROTO;
4938 4937 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4939 4938 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4940 4939 uderr->PRIM_type = T_UDERROR_IND;
4941 4940 uderr->ERROR_type = err;
4942 4941 uderr->DEST_length = alen;
4943 4942 uderr->OPT_length = olen;
4944 4943 if (alen <= 0) {
4945 4944 uderr->DEST_offset = 0;
4946 4945 } else {
4947 4946 uderr->DEST_offset =
4948 4947 (t_scalar_t)sizeof (struct T_uderror_ind);
4949 4948 addr_startp = mp->b_rptr + udreq->DEST_offset;
4950 4949 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4951 4950 (size_t)alen);
4952 4951 }
4953 4952 if (olen <= 0) {
4954 4953 uderr->OPT_offset = 0;
4955 4954 } else {
4956 4955 uderr->OPT_offset =
4957 4956 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4958 4957 uderr->DEST_length);
4959 4958 addr_startp = mp->b_rptr + udreq->OPT_offset;
4960 4959 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4961 4960 (size_t)olen);
4962 4961 }
4963 4962 freemsg(mp);
4964 4963
4965 4964 /*
4966 4965 * send indication message
4967 4966 */
4968 4967 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4969 4968
4970 4969 qreply(wq, err_mp);
4971 4970 }
4972 4971
4973 4972 static void
4974 4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4975 4974 {
4976 4975 queue_t *wq = tep->te_wq;
4977 4976
4978 4977 if (!tep->te_closing && (wq->q_first != NULL)) {
4979 4978 TL_PUTQ(tep, mp);
4980 4979 } else if (tep->te_rq != NULL)
4981 4980 tl_unitdata(mp, tep);
4982 4981 else
4983 4982 freemsg(mp);
4984 4983
4985 4984 tl_serializer_exit(tep);
4986 4985 tl_refrele(tep);
4987 4986 }
4988 4987
4989 4988 /*
4990 4989 * Handle T_unitdata_req.
4991 4990 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4992 4991 * If this is a socket pass through options unmodified.
4993 4992 */
4994 4993 static void
4995 4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4996 4995 {
4997 4996 queue_t *wq = tep->te_wq;
4998 4997 soux_addr_t ux_addr;
4999 4998 tl_addr_t destaddr;
5000 4999 uchar_t *addr_startp;
5001 5000 tl_endpt_t *peer_tep;
5002 5001 struct T_unitdata_ind *udind;
5003 5002 struct T_unitdata_req *udreq;
5004 5003 ssize_t msz, ui_sz, reuse_mb_sz;
5005 5004 t_scalar_t alen, aoff, olen, ooff;
5006 5005 t_scalar_t oldolen = 0;
5007 5006 cred_t *cr = NULL;
5008 5007 pid_t cpid;
5009 5008
5010 5009 udreq = (struct T_unitdata_req *)mp->b_rptr;
5011 5010 msz = MBLKL(mp);
5012 5011
5013 5012 /*
5014 5013 * validate the state
5015 5014 */
5016 5015 if (tep->te_state != TS_IDLE) {
5017 5016 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5018 5017 SL_TRACE|SL_ERROR,
5019 5018 "tl_wput:T_CONN_REQ:out of state"));
5020 5019 tl_merror(wq, mp, EPROTO);
5021 5020 return;
5022 5021 }
5023 5022 /*
5024 5023 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5025 5024 * (state does not change on this event)
5026 5025 */
5027 5026
5028 5027 /*
5029 5028 * validate the message
5030 5029 * Note: dereference fields in struct inside message only
5031 5030 * after validating the message length.
5032 5031 */
5033 5032 if (msz < sizeof (struct T_unitdata_req)) {
5034 5033 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5035 5034 "tl_unitdata:invalid message length"));
5036 5035 tl_merror(wq, mp, EINVAL);
5037 5036 return;
5038 5037 }
5039 5038 alen = udreq->DEST_length;
5040 5039 aoff = udreq->DEST_offset;
5041 5040 oldolen = olen = udreq->OPT_length;
5042 5041 ooff = udreq->OPT_offset;
5043 5042 if (olen == 0)
5044 5043 ooff = 0;
5045 5044
5046 5045 if (IS_SOCKET(tep)) {
5047 5046 if ((alen != TL_SOUX_ADDRLEN) ||
5048 5047 (aoff < 0) ||
5049 5048 (aoff + alen > msz) ||
5050 5049 (olen < 0) || (ooff < 0) ||
5051 5050 ((olen > 0) && ((ooff + olen) > msz))) {
5052 5051 (void) (STRLOG(TL_ID, tep->te_minor,
5053 5052 1, SL_TRACE|SL_ERROR,
5054 5053 "tl_unitdata_req: invalid socket addr "
5055 5054 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5056 5055 (int)msz, alen, aoff, olen, ooff));
5057 5056 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5058 5057 return;
5059 5058 }
5060 5059 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5061 5060
5062 5061 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5063 5062 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5064 5063 (void) (STRLOG(TL_ID, tep->te_minor,
5065 5064 1, SL_TRACE|SL_ERROR,
5066 5065 "tl_conn_req: invalid socket magic"));
5067 5066 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5068 5067 return;
5069 5068 }
5070 5069 } else {
5071 5070 if ((alen < 0) ||
5072 5071 (aoff < 0) ||
5073 5072 ((alen > 0) && ((aoff + alen) > msz)) ||
5074 5073 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5075 5074 ((aoff + alen) < 0) ||
5076 5075 ((olen > 0) && ((ooff + olen) > msz)) ||
5077 5076 (olen < 0) ||
5078 5077 (ooff < 0) ||
5079 5078 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5080 5079 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5081 5080 SL_TRACE|SL_ERROR,
5082 5081 "tl_unitdata:invalid unit data message"));
5083 5082 tl_merror(wq, mp, EINVAL);
5084 5083 return;
5085 5084 }
5086 5085 }
5087 5086
5088 5087 /* Options not supported unless it's a socket */
5089 5088 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5090 5089 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5091 5090 "tl_unitdata:option use(unsupported) or zero len addr"));
5092 5091 tl_uderr(wq, mp, EPROTO);
5093 5092 return;
5094 5093 }
5095 5094 #ifdef DEBUG
5096 5095 /*
5097 5096 * Mild form of ASSERT()ion to detect broken TPI apps.
5098 5097 * if (! assertion)
5099 5098 * log warning;
5100 5099 */
5101 5100 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5102 5101 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5103 5102 "tl_unitdata:addr overlaps TPI message"));
5104 5103 }
5105 5104 #endif
5106 5105 /*
5107 5106 * get destination endpoint
5108 5107 */
5109 5108 destaddr.ta_alen = alen;
5110 5109 destaddr.ta_abuf = mp->b_rptr + aoff;
5111 5110 destaddr.ta_zoneid = tep->te_zoneid;
5112 5111
5113 5112 /*
5114 5113 * Check whether the destination is the same that was used previously
5115 5114 * and the destination endpoint is in the right state. If something is
5116 5115 * wrong, find destination again and cache it.
5117 5116 */
5118 5117 peer_tep = tep->te_lastep;
5119 5118
5120 5119 if ((peer_tep == NULL) || peer_tep->te_closing ||
5121 5120 (peer_tep->te_state != TS_IDLE) ||
5122 5121 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5123 5122 /*
5124 5123 * Not the same as cached destination , need to find the right
5125 5124 * destination.
5126 5125 */
5127 5126 peer_tep = (IS_SOCKET(tep) ?
5128 5127 tl_sock_find_peer(tep, &ux_addr) :
5129 5128 tl_find_peer(tep, &destaddr));
5130 5129
5131 5130 if (peer_tep == NULL) {
5132 5131 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5133 5132 SL_TRACE|SL_ERROR,
5134 5133 "tl_unitdata:no one at destination address"));
5135 5134 tl_uderr(wq, mp, ECONNRESET);
5136 5135 return;
5137 5136 }
5138 5137
5139 5138 /*
5140 5139 * Cache the new peer.
5141 5140 */
5142 5141 if (tep->te_lastep != NULL)
5143 5142 tl_refrele(tep->te_lastep);
5144 5143
5145 5144 tep->te_lastep = peer_tep;
5146 5145 }
5147 5146
5148 5147 if (peer_tep->te_state != TS_IDLE) {
5149 5148 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5150 5149 "tl_unitdata:provider in invalid state"));
5151 5150 tl_uderr(wq, mp, EPROTO);
5152 5151 return;
5153 5152 }
5154 5153
5155 5154 ASSERT(peer_tep->te_rq != NULL);
5156 5155
5157 5156 /*
5158 5157 * Put it back if flow controlled except when we are closing.
5159 5158 * Note: Messages already on queue when we are closing is bounded
5160 5159 * so we can ignore flow control.
5161 5160 */
5162 5161 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5163 5162 /* record what we are flow controlled on */
5164 5163 if (tep->te_flowq != NULL) {
5165 5164 list_remove(&tep->te_flowq->te_flowlist, tep);
5166 5165 }
5167 5166 list_insert_head(&peer_tep->te_flowlist, tep);
5168 5167 tep->te_flowq = peer_tep;
5169 5168 TL_PUTBQ(tep, mp);
5170 5169 return;
5171 5170 }
5172 5171 /*
5173 5172 * prepare indication message
5174 5173 */
5175 5174
5176 5175 /*
5177 5176 * calculate length of message
5178 5177 */
5179 5178 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5180 5179 cr = msg_getcred(mp, &cpid);
5181 5180 ASSERT(cr != NULL);
5182 5181
5183 5182 if (peer_tep->te_flag & TL_SETCRED) {
5184 5183 ASSERT(olen == 0);
5185 5184 olen = (t_scalar_t)sizeof (struct opthdr) +
5186 5185 OPTLEN(sizeof (tl_credopt_t));
5187 5186 /* 1 option only */
5188 5187 } else if (peer_tep->te_flag & TL_SETUCRED) {
5189 5188 ASSERT(olen == 0);
5190 5189 olen = (t_scalar_t)sizeof (struct opthdr) +
5191 5190 OPTLEN(ucredminsize(cr));
5192 5191 /* 1 option only */
5193 5192 } else {
5194 5193 /* Possibly more than one option */
5195 5194 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5196 5195 OPTLEN(ucredminsize(cr));
5197 5196 }
5198 5197 }
5199 5198
5200 5199 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5201 5200 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5202 5201
5203 5202 /*
5204 5203 * If the unitdata_ind fits and we are not adding options
5205 5204 * reuse the udreq mblk.
5206 5205 *
5207 5206 * Otherwise, it is possible we need to append an option if one of the
5208 5207 * te_flag bits is set. This requires extra space in the data block for
5209 5208 * the additional option but the traditional technique used below to
5210 5209 * allocate a new block and copy into it will not work when there is a
5211 5210 * message block with a free pointer (since we don't know anything
5212 5211 * about the layout of the data, pointers referencing or within the
5213 5212 * data, etc.). To handle this possibility the upper layers may have
5214 5213 * preallocated some space to use for appending an option. We check the
5215 5214 * overall mblock size against the size we need ('reuse_mb_sz' with the
5216 5215 * original address length [alen] to ensure we won't overrun the
5217 5216 * current mblk data size) to see if there is free space and thus
5218 5217 * avoid allocating a new message block.
5219 5218 */
5220 5219 if (msz >= ui_sz && alen >= tep->te_alen &&
5221 5220 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5222 5221 /*
5223 5222 * Reuse the original mblk. Leave options in place.
5224 5223 */
5225 5224 udind = (struct T_unitdata_ind *)mp->b_rptr;
5226 5225 udind->PRIM_type = T_UNITDATA_IND;
5227 5226 udind->SRC_length = tep->te_alen;
5228 5227 addr_startp = mp->b_rptr + udind->SRC_offset;
5229 5228 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5230 5229
5231 5230 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5232 5231 mp->b_datap->db_frtnp != NULL) {
5233 5232 /*
5234 5233 * We have a message block with a free pointer, but extra space
5235 5234 * has been pre-allocated for us in case we need to append an
5236 5235 * option. Reuse the original mblk, leaving existing options in
5237 5236 * place.
5238 5237 */
5239 5238 udind = (struct T_unitdata_ind *)mp->b_rptr;
5240 5239 udind->PRIM_type = T_UNITDATA_IND;
5241 5240 udind->SRC_length = tep->te_alen;
5242 5241 addr_startp = mp->b_rptr + udind->SRC_offset;
5243 5242 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5244 5243
5245 5244 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5246 5245 ASSERT(cr != NULL);
5247 5246 /*
5248 5247 * We're appending one new option here after the
5249 5248 * original ones.
5250 5249 */
5251 5250 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5252 5251 cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5253 5252 }
5254 5253
5255 5254 } else if (mp->b_datap->db_frtnp != NULL) {
5256 5255 /*
5257 5256 * The next block creates a new mp and tries to copy the data
5258 5257 * block into it, but that cannot handle a message with a free
5259 5258 * pointer (for more details see the comment in kstrputmsg()
5260 5259 * where dupmsg() is called). Since we can never properly
5261 5260 * duplicate the mp while also extending the data, just error
5262 5261 * out now.
5263 5262 */
5264 5263 tl_uderr(wq, mp, EPROTO);
5265 5264 return;
5266 5265 } else {
5267 5266 /* Allocate a new T_unitdata_ind message */
5268 5267 mblk_t *ui_mp;
5269 5268
5270 5269 ui_mp = allocb(ui_sz, BPRI_MED);
5271 5270 if (! ui_mp) {
5272 5271 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5273 5272 "tl_unitdata:allocb failure:message queued"));
5274 5273 tl_memrecover(wq, mp, ui_sz);
5275 5274 return;
5276 5275 }
5277 5276
5278 5277 /*
5279 5278 * fill in T_UNITDATA_IND contents
5280 5279 */
5281 5280 DB_TYPE(ui_mp) = M_PROTO;
5282 5281 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5283 5282 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5284 5283 udind->PRIM_type = T_UNITDATA_IND;
5285 5284 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5286 5285 udind->SRC_length = tep->te_alen;
5287 5286 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5288 5287 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5289 5288 udind->OPT_offset =
5290 5289 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5291 5290 udind->OPT_length = olen;
5292 5291 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5293 5292
5294 5293 if (oldolen != 0) {
5295 5294 bcopy((void *)((uintptr_t)udreq + ooff),
5296 5295 (void *)((uintptr_t)udind +
5297 5296 udind->OPT_offset),
5298 5297 oldolen);
5299 5298 }
5300 5299 ASSERT(cr != NULL);
5301 5300
5302 5301 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5303 5302 oldolen, cr, cpid,
5304 5303 peer_tep->te_flag, peer_tep->te_credp);
5305 5304 } else {
5306 5305 bcopy((void *)((uintptr_t)udreq + ooff),
5307 5306 (void *)((uintptr_t)udind + udind->OPT_offset),
5308 5307 olen);
5309 5308 }
5310 5309
5311 5310 /*
5312 5311 * relink data blocks from mp to ui_mp
5313 5312 */
5314 5313 ui_mp->b_cont = mp->b_cont;
5315 5314 freeb(mp);
5316 5315 mp = ui_mp;
5317 5316 }
5318 5317 /*
5319 5318 * send indication message
5320 5319 */
5321 5320 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5322 5321 putnext(peer_tep->te_rq, mp);
5323 5322 }
5324 5323
5325 5324
5326 5325
5327 5326 /*
5328 5327 * Check if a given addr is in use.
5329 5328 * Endpoint ptr returned or NULL if not found.
5330 5329 * The name space is separate for each mode. This implies that
5331 5330 * sockets get their own name space.
5332 5331 */
5333 5332 static tl_endpt_t *
5334 5333 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5335 5334 {
5336 5335 tl_endpt_t *peer_tep = NULL;
5337 5336 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5338 5337 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5339 5338
5340 5339 ASSERT(! IS_SOCKET(tep));
5341 5340
5342 5341 ASSERT(ap != NULL && ap->ta_alen > 0);
5343 5342 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5344 5343 ASSERT(ap->ta_abuf != NULL);
5345 5344 EQUIV(rc == 0, peer_tep != NULL);
5346 5345 IMPLY(rc == 0,
5347 5346 (tep->te_zoneid == peer_tep->te_zoneid) &&
5348 5347 (tep->te_transport == peer_tep->te_transport));
5349 5348
5350 5349 if ((rc == 0) && (peer_tep->te_closing)) {
5351 5350 tl_refrele(peer_tep);
5352 5351 peer_tep = NULL;
5353 5352 }
5354 5353
5355 5354 return (peer_tep);
5356 5355 }
5357 5356
5358 5357 /*
5359 5358 * Find peer for a socket based on unix domain address.
5360 5359 * For implicit addresses our peer can be found by minor number in ai hash. For
5361 5360 * explicit binds we look vnode address at addr_hash.
5362 5361 */
5363 5362 static tl_endpt_t *
5364 5363 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5365 5364 {
5366 5365 tl_endpt_t *peer_tep = NULL;
5367 5366 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5368 5367 tep->te_aihash : tep->te_addrhash;
5369 5368 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5370 5369 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5371 5370
5372 5371 ASSERT(IS_SOCKET(tep));
5373 5372 EQUIV(rc == 0, peer_tep != NULL);
5374 5373 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5375 5374
5376 5375 if (peer_tep != NULL) {
5377 5376 /* Don't attempt to use closing peer. */
5378 5377 if (peer_tep->te_closing)
5379 5378 goto errout;
5380 5379
5381 5380 /*
5382 5381 * Cross-zone unix sockets are permitted, but for Trusted
5383 5382 * Extensions only, the "server" for these must be in the
5384 5383 * global zone.
5385 5384 */
5386 5385 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5387 5386 is_system_labeled() &&
5388 5387 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5389 5388 goto errout;
5390 5389 }
5391 5390
5392 5391 return (peer_tep);
5393 5392
5394 5393 errout:
5395 5394 tl_refrele(peer_tep);
5396 5395 return (NULL);
5397 5396 }
5398 5397
5399 5398 /*
5400 5399 * Generate a free addr and return it in struct pointed by ap
5401 5400 * but allocating space for address buffer.
5402 5401 * The generated address will be at least 4 bytes long and, if req->ta_alen
5403 5402 * exceeds 4 bytes, be req->ta_alen bytes long.
5404 5403 *
5405 5404 * If address is found it will be inserted in the hash.
5406 5405 *
5407 5406 * If req->ta_alen is larger than the default alen (4 bytes) the last
5408 5407 * alen-4 bytes will always be the same as in req.
5409 5408 *
5410 5409 * Return 0 for failure.
5411 5410 * Return non-zero for success.
5412 5411 */
5413 5412 static boolean_t
5414 5413 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5415 5414 {
5416 5415 t_scalar_t alen;
5417 5416 uint32_t loopcnt; /* Limit loop to 2^32 */
5418 5417
5419 5418 ASSERT(tep->te_hash_hndl != NULL);
5420 5419 ASSERT(! IS_SOCKET(tep));
5421 5420
5422 5421 if (tep->te_hash_hndl == NULL)
5423 5422 return (B_FALSE);
5424 5423
5425 5424 /*
5426 5425 * check if default addr is in use
5427 5426 * if it is - bump it and try again
5428 5427 */
5429 5428 if (req == NULL) {
5430 5429 alen = sizeof (uint32_t);
5431 5430 } else {
5432 5431 alen = max(req->ta_alen, sizeof (uint32_t));
5433 5432 ASSERT(tep->te_zoneid == req->ta_zoneid);
5434 5433 }
5435 5434
5436 5435 if (tep->te_alen < alen) {
5437 5436 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5438 5437
5439 5438 /*
5440 5439 * Not enough space in tep->ta_ap to hold the address,
5441 5440 * allocate a bigger space.
5442 5441 */
5443 5442 if (abuf == NULL)
5444 5443 return (B_FALSE);
5445 5444
5446 5445 if (tep->te_alen > 0)
5447 5446 kmem_free(tep->te_abuf, tep->te_alen);
5448 5447
5449 5448 tep->te_alen = alen;
5450 5449 tep->te_abuf = abuf;
5451 5450 }
5452 5451
5453 5452 /* Copy in the address in req */
5454 5453 if (req != NULL) {
5455 5454 ASSERT(alen >= req->ta_alen);
5456 5455 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5457 5456 }
5458 5457
5459 5458 /*
5460 5459 * First try minor number then try default addresses.
5461 5460 */
5462 5461 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5463 5462
5464 5463 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5465 5464 if (mod_hash_insert_reserve(tep->te_addrhash,
5466 5465 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5467 5466 tep->te_hash_hndl) == 0) {
5468 5467 /*
5469 5468 * found free address
5470 5469 */
5471 5470 tep->te_flag |= TL_ADDRHASHED;
5472 5471 tep->te_hash_hndl = NULL;
5473 5472
5474 5473 return (B_TRUE); /* successful return */
5475 5474 }
5476 5475 /*
5477 5476 * Use default address.
5478 5477 */
5479 5478 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5480 5479 atomic_inc_32(&tep->te_defaddr);
5481 5480 }
5482 5481
5483 5482 /*
5484 5483 * Failed to find anything.
5485 5484 */
5486 5485 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5487 5486 "tl_get_any_addr:looped 2^32 times"));
5488 5487 return (B_FALSE);
5489 5488 }
5490 5489
5491 5490 /*
5492 5491 * reallocb + set r/w ptrs to reflect size.
5493 5492 */
5494 5493 static mblk_t *
5495 5494 tl_resizemp(mblk_t *mp, ssize_t new_size)
5496 5495 {
5497 5496 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5498 5497 return (NULL);
5499 5498
5500 5499 mp->b_rptr = DB_BASE(mp);
5501 5500 mp->b_wptr = mp->b_rptr + new_size;
5502 5501 return (mp);
5503 5502 }
5504 5503
5505 5504 static void
5506 5505 tl_cl_backenable(tl_endpt_t *tep)
5507 5506 {
5508 5507 list_t *l = &tep->te_flowlist;
5509 5508 tl_endpt_t *elp;
5510 5509
5511 5510 ASSERT(IS_CLTS(tep));
5512 5511
5513 5512 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5514 5513 ASSERT(tep->te_ser == elp->te_ser);
5515 5514 ASSERT(elp->te_flowq == tep);
5516 5515 if (! elp->te_closing)
5517 5516 TL_QENABLE(elp);
5518 5517 elp->te_flowq = NULL;
5519 5518 list_remove(l, elp);
5520 5519 }
5521 5520 }
5522 5521
5523 5522 /*
5524 5523 * Unconnect endpoints.
5525 5524 */
5526 5525 static void
5527 5526 tl_co_unconnect(tl_endpt_t *tep)
5528 5527 {
5529 5528 tl_endpt_t *peer_tep = tep->te_conp;
5530 5529 tl_endpt_t *srv_tep = tep->te_oconp;
5531 5530 list_t *l;
5532 5531 tl_icon_t *tip;
5533 5532 tl_endpt_t *cl_tep;
5534 5533 mblk_t *d_mp;
5535 5534
5536 5535 ASSERT(IS_COTS(tep));
5537 5536 /*
5538 5537 * If our peer is closing, don't use it.
5539 5538 */
5540 5539 if ((peer_tep != NULL) && peer_tep->te_closing) {
5541 5540 TL_UNCONNECT(tep->te_conp);
5542 5541 peer_tep = NULL;
5543 5542 }
5544 5543 if ((srv_tep != NULL) && srv_tep->te_closing) {
5545 5544 TL_UNCONNECT(tep->te_oconp);
5546 5545 srv_tep = NULL;
5547 5546 }
5548 5547
5549 5548 if (tep->te_nicon > 0) {
5550 5549 l = &tep->te_iconp;
5551 5550 /*
5552 5551 * If incoming requests pending, change state
5553 5552 * of clients on disconnect ind event and send
5554 5553 * discon_ind pdu to modules above them
5555 5554 * for server: all clients get disconnect
5556 5555 */
5557 5556
5558 5557 while (tep->te_nicon > 0) {
5559 5558 tip = list_head(l);
5560 5559 cl_tep = tip->ti_tep;
5561 5560
5562 5561 if (cl_tep == NULL) {
5563 5562 tl_freetip(tep, tip);
5564 5563 continue;
5565 5564 }
5566 5565
5567 5566 if (cl_tep->te_oconp != NULL) {
5568 5567 ASSERT(cl_tep != cl_tep->te_oconp);
5569 5568 TL_UNCONNECT(cl_tep->te_oconp);
5570 5569 }
5571 5570
5572 5571 if (cl_tep->te_closing) {
5573 5572 tl_freetip(tep, tip);
5574 5573 continue;
5575 5574 }
5576 5575
5577 5576 enableok(cl_tep->te_wq);
5578 5577 TL_QENABLE(cl_tep);
5579 5578 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5580 5579 if (d_mp != NULL) {
5581 5580 cl_tep->te_state = TS_IDLE;
5582 5581 putnext(cl_tep->te_rq, d_mp);
5583 5582 } else {
5584 5583 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5585 5584 SL_TRACE|SL_ERROR,
5586 5585 "tl_co_unconnect:icmng: "
5587 5586 "allocb failure"));
5588 5587 }
5589 5588 tl_freetip(tep, tip);
5590 5589 }
5591 5590 } else if (srv_tep != NULL) {
5592 5591 /*
5593 5592 * If outgoing request pending, change state
5594 5593 * of server on discon ind event
5595 5594 */
5596 5595
5597 5596 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5598 5597 IS_COTSORD(srv_tep) &&
5599 5598 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5600 5599 /*
5601 5600 * Queue ordrel_ind for server to be picked up
5602 5601 * when the connection is accepted.
5603 5602 */
5604 5603 d_mp = tl_ordrel_ind_alloc();
5605 5604 } else {
5606 5605 /*
5607 5606 * send discon_ind to server
5608 5607 */
5609 5608 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5610 5609 }
5611 5610 if (d_mp == NULL) {
5612 5611 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5613 5612 SL_TRACE|SL_ERROR,
5614 5613 "tl_co_unconnect:outgoing:allocb failure"));
5615 5614 TL_UNCONNECT(tep->te_oconp);
5616 5615 goto discon_peer;
5617 5616 }
5618 5617
5619 5618 /*
5620 5619 * If this is a socket the T_DISCON_IND is queued with
5621 5620 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5622 5621 * from the list of pending connections.
5623 5622 * Note that when te_oconp is set the peer better have
5624 5623 * a t_connind_t for the client.
5625 5624 */
5626 5625 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5627 5626 /*
5628 5627 * Queue the disconnection message.
5629 5628 */
5630 5629 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5631 5630 } else {
5632 5631 tip = tl_icon_find(srv_tep, tep->te_seqno);
5633 5632 if (tip == NULL) {
5634 5633 freemsg(d_mp);
5635 5634 } else {
5636 5635 ASSERT(tep == tip->ti_tep);
5637 5636 ASSERT(tep->te_ser == srv_tep->te_ser);
5638 5637 /*
5639 5638 * Delete tip from the server list.
5640 5639 */
5641 5640 if (srv_tep->te_nicon == 1) {
5642 5641 srv_tep->te_state =
5643 5642 NEXTSTATE(TE_DISCON_IND2,
5644 5643 srv_tep->te_state);
5645 5644 } else {
5646 5645 srv_tep->te_state =
5647 5646 NEXTSTATE(TE_DISCON_IND3,
5648 5647 srv_tep->te_state);
5649 5648 }
5650 5649 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5651 5650 T_DISCON_IND);
5652 5651 putnext(srv_tep->te_rq, d_mp);
5653 5652 tl_freetip(srv_tep, tip);
5654 5653 }
5655 5654 TL_UNCONNECT(tep->te_oconp);
5656 5655 srv_tep = NULL;
5657 5656 }
5658 5657 } else if (peer_tep != NULL) {
5659 5658 /*
5660 5659 * unconnect existing connection
5661 5660 * If connected, change state of peer on
5662 5661 * discon ind event and send discon ind pdu
5663 5662 * to module above it
5664 5663 */
5665 5664
5666 5665 ASSERT(tep->te_ser == peer_tep->te_ser);
5667 5666 if (IS_COTSORD(peer_tep) &&
5668 5667 (peer_tep->te_state == TS_WIND_ORDREL ||
5669 5668 peer_tep->te_state == TS_DATA_XFER)) {
5670 5669 /*
5671 5670 * send ordrel ind
5672 5671 */
5673 5672 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5674 5673 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5675 5674 peer_tep->te_state,
5676 5675 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5677 5676 d_mp = tl_ordrel_ind_alloc();
5678 5677 if (! d_mp) {
5679 5678 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5680 5679 SL_TRACE|SL_ERROR,
5681 5680 "tl_co_unconnect:connected:"
5682 5681 "allocb failure"));
5683 5682 /*
5684 5683 * Continue with cleaning up peer as
5685 5684 * this side may go away with the close
5686 5685 */
5687 5686 TL_QENABLE(peer_tep);
5688 5687 goto discon_peer;
5689 5688 }
5690 5689 peer_tep->te_state =
5691 5690 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5692 5691
5693 5692 putnext(peer_tep->te_rq, d_mp);
5694 5693 /*
5695 5694 * Handle flow control case. This will generate
5696 5695 * a t_discon_ind message with reason 0 if there
5697 5696 * is data queued on the write side.
5698 5697 */
5699 5698 TL_QENABLE(peer_tep);
5700 5699 } else if (IS_COTSORD(peer_tep) &&
5701 5700 peer_tep->te_state == TS_WREQ_ORDREL) {
5702 5701 /*
5703 5702 * Sent an ordrel_ind. We send a discon with
5704 5703 * with error 0 to inform that the peer is gone.
5705 5704 */
5706 5705 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5707 5706 SL_TRACE|SL_ERROR,
5708 5707 "tl_co_unconnect: discon in state %d",
5709 5708 tep->te_state));
5710 5709 tl_discon_ind(peer_tep, 0);
5711 5710 } else {
5712 5711 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5713 5712 SL_TRACE|SL_ERROR,
5714 5713 "tl_co_unconnect: state %d", tep->te_state));
5715 5714 tl_discon_ind(peer_tep, ECONNRESET);
5716 5715 }
5717 5716
5718 5717 discon_peer:
5719 5718 /*
5720 5719 * Disconnect cross-pointers only for close
5721 5720 */
5722 5721 if (tep->te_closing) {
5723 5722 peer_tep = tep->te_conp;
5724 5723 TL_REMOVE_PEER(peer_tep->te_conp);
5725 5724 TL_REMOVE_PEER(tep->te_conp);
5726 5725 }
5727 5726 }
5728 5727 }
5729 5728
5730 5729 /*
5731 5730 * Note: The following routine does not recover from allocb()
5732 5731 * failures
5733 5732 * The reason should be from the <sys/errno.h> space.
5734 5733 */
5735 5734 static void
5736 5735 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5737 5736 {
5738 5737 mblk_t *d_mp;
5739 5738
5740 5739 if (tep->te_closing)
5741 5740 return;
5742 5741
5743 5742 /*
5744 5743 * flush the queues.
5745 5744 */
5746 5745 flushq(tep->te_rq, FLUSHDATA);
5747 5746 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5748 5747
5749 5748 /*
5750 5749 * send discon ind
5751 5750 */
5752 5751 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5753 5752 if (! d_mp) {
5754 5753 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5755 5754 "tl_discon_ind:allocb failure"));
5756 5755 return;
5757 5756 }
5758 5757 tep->te_state = TS_IDLE;
5759 5758 putnext(tep->te_rq, d_mp);
5760 5759 }
5761 5760
5762 5761 /*
5763 5762 * Note: The following routine does not recover from allocb()
5764 5763 * failures
5765 5764 * The reason should be from the <sys/errno.h> space.
5766 5765 */
5767 5766 static mblk_t *
5768 5767 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5769 5768 {
5770 5769 mblk_t *mp;
5771 5770 struct T_discon_ind *tdi;
5772 5771
5773 5772 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5774 5773 DB_TYPE(mp) = M_PROTO;
5775 5774 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5776 5775 tdi = (struct T_discon_ind *)mp->b_rptr;
5777 5776 tdi->PRIM_type = T_DISCON_IND;
5778 5777 tdi->DISCON_reason = reason;
5779 5778 tdi->SEQ_number = seqnum;
5780 5779 }
5781 5780 return (mp);
5782 5781 }
5783 5782
5784 5783
5785 5784 /*
5786 5785 * Note: The following routine does not recover from allocb()
5787 5786 * failures
5788 5787 */
5789 5788 static mblk_t *
5790 5789 tl_ordrel_ind_alloc(void)
5791 5790 {
5792 5791 mblk_t *mp;
5793 5792 struct T_ordrel_ind *toi;
5794 5793
5795 5794 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5796 5795 DB_TYPE(mp) = M_PROTO;
5797 5796 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5798 5797 toi = (struct T_ordrel_ind *)mp->b_rptr;
5799 5798 toi->PRIM_type = T_ORDREL_IND;
5800 5799 }
5801 5800 return (mp);
5802 5801 }
5803 5802
5804 5803
5805 5804 /*
5806 5805 * Lookup the seqno in the list of queued connections.
5807 5806 */
5808 5807 static tl_icon_t *
5809 5808 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5810 5809 {
5811 5810 list_t *l = &tep->te_iconp;
5812 5811 tl_icon_t *tip = list_head(l);
5813 5812
5814 5813 ASSERT(seqno != 0);
5815 5814
5816 5815 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5817 5816 ;
5818 5817
5819 5818 return (tip);
5820 5819 }
5821 5820
5822 5821 /*
5823 5822 * Queue data for a given T_CONN_IND while verifying that redundant
5824 5823 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5825 5824 * Used when the originator of the connection closes.
5826 5825 */
5827 5826 static void
5828 5827 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5829 5828 {
5830 5829 tl_icon_t *tip;
5831 5830 mblk_t **mpp, *mp;
5832 5831 int prim, nprim;
5833 5832
5834 5833 if (nmp->b_datap->db_type == M_PROTO)
5835 5834 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5836 5835 else
5837 5836 nprim = -1; /* M_DATA */
5838 5837
5839 5838 tip = tl_icon_find(tep, seqno);
5840 5839 if (tip == NULL) {
5841 5840 freemsg(nmp);
5842 5841 return;
5843 5842 }
5844 5843
5845 5844 ASSERT(tip->ti_seqno != 0);
5846 5845 mpp = &tip->ti_mp;
5847 5846 while (*mpp != NULL) {
5848 5847 mp = *mpp;
5849 5848
5850 5849 if (mp->b_datap->db_type == M_PROTO)
5851 5850 prim = ((union T_primitives *)mp->b_rptr)->type;
5852 5851 else
5853 5852 prim = -1; /* M_DATA */
5854 5853
5855 5854 /*
5856 5855 * Allow nothing after a T_DISCON_IND
5857 5856 */
5858 5857 if (prim == T_DISCON_IND) {
5859 5858 freemsg(nmp);
5860 5859 return;
5861 5860 }
5862 5861 /*
5863 5862 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5864 5863 */
5865 5864 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5866 5865 freemsg(nmp);
5867 5866 return;
5868 5867 }
5869 5868 mpp = &(mp->b_next);
5870 5869 }
5871 5870 *mpp = nmp;
5872 5871 }
5873 5872
5874 5873 /*
5875 5874 * Verify if a certain TPI primitive exists on the connind queue.
5876 5875 * Use prim -1 for M_DATA.
5877 5876 * Return non-zero if found.
5878 5877 */
5879 5878 static boolean_t
5880 5879 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5881 5880 {
5882 5881 tl_icon_t *tip = tl_icon_find(tep, seqno);
5883 5882 boolean_t found = B_FALSE;
5884 5883
5885 5884 if (tip != NULL) {
5886 5885 mblk_t *mp;
5887 5886 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5888 5887 found = (DB_TYPE(mp) == M_PROTO &&
5889 5888 ((union T_primitives *)mp->b_rptr)->type == prim);
5890 5889 }
5891 5890 }
5892 5891 return (found);
5893 5892 }
5894 5893
5895 5894 /*
5896 5895 * Send the b_next mblk chain that has accumulated before the connection
5897 5896 * was accepted. Perform the necessary state transitions.
5898 5897 */
5899 5898 static void
5900 5899 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5901 5900 {
5902 5901 mblk_t *mp;
5903 5902 union T_primitives *primp;
5904 5903
5905 5904 if (tep->te_closing) {
5906 5905 tl_icon_freemsgs(mpp);
5907 5906 return;
5908 5907 }
5909 5908
5910 5909 ASSERT(tep->te_state == TS_DATA_XFER);
5911 5910 ASSERT(tep->te_rq->q_first == NULL);
5912 5911
5913 5912 while ((mp = *mpp) != NULL) {
5914 5913 *mpp = mp->b_next;
5915 5914 mp->b_next = NULL;
5916 5915
5917 5916 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5918 5917 switch (DB_TYPE(mp)) {
5919 5918 default:
5920 5919 freemsg(mp);
5921 5920 break;
5922 5921 case M_DATA:
5923 5922 putnext(tep->te_rq, mp);
5924 5923 break;
5925 5924 case M_PROTO:
5926 5925 primp = (union T_primitives *)mp->b_rptr;
5927 5926 switch (primp->type) {
5928 5927 case T_UNITDATA_IND:
5929 5928 case T_DATA_IND:
5930 5929 case T_OPTDATA_IND:
5931 5930 case T_EXDATA_IND:
5932 5931 putnext(tep->te_rq, mp);
5933 5932 break;
5934 5933 case T_ORDREL_IND:
5935 5934 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5936 5935 tep->te_state);
5937 5936 putnext(tep->te_rq, mp);
5938 5937 break;
5939 5938 case T_DISCON_IND:
5940 5939 tep->te_state = TS_IDLE;
5941 5940 putnext(tep->te_rq, mp);
5942 5941 break;
5943 5942 default:
5944 5943 #ifdef DEBUG
5945 5944 cmn_err(CE_PANIC,
5946 5945 "tl_icon_sendmsgs: unknown primitive");
5947 5946 #endif /* DEBUG */
5948 5947 freemsg(mp);
5949 5948 break;
5950 5949 }
5951 5950 break;
5952 5951 }
5953 5952 }
5954 5953 }
5955 5954
5956 5955 /*
5957 5956 * Free the b_next mblk chain that has accumulated before the connection
5958 5957 * was accepted.
5959 5958 */
5960 5959 static void
5961 5960 tl_icon_freemsgs(mblk_t **mpp)
5962 5961 {
5963 5962 mblk_t *mp;
5964 5963
5965 5964 while ((mp = *mpp) != NULL) {
5966 5965 *mpp = mp->b_next;
5967 5966 mp->b_next = NULL;
5968 5967 freemsg(mp);
5969 5968 }
5970 5969 }
5971 5970
5972 5971 /*
5973 5972 * Send M_ERROR
5974 5973 * Note: assumes caller ensured enough space in mp or enough
5975 5974 * memory available. Does not attempt recovery from allocb()
5976 5975 * failures
5977 5976 */
5978 5977
5979 5978 static void
5980 5979 tl_merror(queue_t *wq, mblk_t *mp, int error)
5981 5980 {
5982 5981 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5983 5982
5984 5983 if (tep->te_closing) {
5985 5984 freemsg(mp);
5986 5985 return;
5987 5986 }
5988 5987
5989 5988 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5990 5989 SL_TRACE|SL_ERROR,
5991 5990 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5992 5991
5993 5992 /*
5994 5993 * flush all messages on queue. we are shutting
5995 5994 * the stream down on fatal error
5996 5995 */
5997 5996 flushq(wq, FLUSHALL);
5998 5997 if (IS_COTS(tep)) {
5999 5998 /* connection oriented - unconnect endpoints */
6000 5999 tl_co_unconnect(tep);
6001 6000 }
6002 6001 if (mp->b_cont) {
6003 6002 freemsg(mp->b_cont);
6004 6003 mp->b_cont = NULL;
6005 6004 }
6006 6005
6007 6006 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
6008 6007 freemsg(mp);
6009 6008 mp = allocb(1, BPRI_HI);
6010 6009 if (!mp) {
6011 6010 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6012 6011 SL_TRACE|SL_ERROR,
6013 6012 "tl_merror:M_PROTO: out of memory"));
6014 6013 return;
6015 6014 }
6016 6015 }
6017 6016 if (mp) {
6018 6017 DB_TYPE(mp) = M_ERROR;
6019 6018 mp->b_rptr = DB_BASE(mp);
6020 6019 *mp->b_rptr = (char)error;
6021 6020 mp->b_wptr = mp->b_rptr + sizeof (char);
6022 6021 qreply(wq, mp);
6023 6022 } else {
6024 6023 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6025 6024 }
6026 6025 }
6027 6026
6028 6027 static void
6029 6028 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6030 6029 {
6031 6030 ASSERT(cr != NULL);
6032 6031
6033 6032 if (flag & TL_SETCRED) {
6034 6033 struct opthdr *opt = (struct opthdr *)buf;
6035 6034 tl_credopt_t *tlcred;
6036 6035
6037 6036 opt->level = TL_PROT_LEVEL;
6038 6037 opt->name = TL_OPT_PEER_CRED;
6039 6038 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6040 6039
6041 6040 tlcred = (tl_credopt_t *)(opt + 1);
6042 6041 tlcred->tc_uid = crgetuid(cr);
6043 6042 tlcred->tc_gid = crgetgid(cr);
6044 6043 tlcred->tc_ruid = crgetruid(cr);
6045 6044 tlcred->tc_rgid = crgetrgid(cr);
6046 6045 tlcred->tc_suid = crgetsuid(cr);
6047 6046 tlcred->tc_sgid = crgetsgid(cr);
6048 6047 tlcred->tc_ngroups = crgetngroups(cr);
6049 6048 } else if (flag & TL_SETUCRED) {
6050 6049 struct opthdr *opt = (struct opthdr *)buf;
6051 6050
6052 6051 opt->level = TL_PROT_LEVEL;
6053 6052 opt->name = TL_OPT_PEER_UCRED;
6054 6053 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6055 6054
6056 6055 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6057 6056 } else {
6058 6057 struct T_opthdr *topt = (struct T_opthdr *)buf;
6059 6058 ASSERT(flag & TL_SOCKUCRED);
6060 6059
6061 6060 topt->level = SOL_SOCKET;
6062 6061 topt->name = SCM_UCRED;
6063 6062 topt->len = ucredminsize(cr) + sizeof (*topt);
6064 6063 topt->status = 0;
6065 6064 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6066 6065 }
6067 6066 }
6068 6067
6069 6068 /* ARGSUSED */
6070 6069 static int
6071 6070 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6072 6071 {
6073 6072 /* no default value processed in protocol specific code currently */
6074 6073 return (-1);
6075 6074 }
6076 6075
6077 6076 /* ARGSUSED */
6078 6077 static int
6079 6078 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6080 6079 {
6081 6080 int len;
6082 6081 tl_endpt_t *tep;
6083 6082 int *valp;
6084 6083
6085 6084 tep = (tl_endpt_t *)wq->q_ptr;
6086 6085
6087 6086 len = 0;
6088 6087
6089 6088 /*
6090 6089 * Assumes: option level and name sanity check done elsewhere
6091 6090 */
6092 6091
6093 6092 switch (level) {
6094 6093 case SOL_SOCKET:
6095 6094 if (! IS_SOCKET(tep))
6096 6095 break;
6097 6096 switch (name) {
6098 6097 case SO_RECVUCRED:
6099 6098 len = sizeof (int);
6100 6099 valp = (int *)ptr;
6101 6100 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6102 6101 break;
6103 6102 default:
6104 6103 break;
6105 6104 }
6106 6105 break;
6107 6106 case TL_PROT_LEVEL:
6108 6107 switch (name) {
6109 6108 case TL_OPT_PEER_CRED:
6110 6109 case TL_OPT_PEER_UCRED:
6111 6110 /*
6112 6111 * option not supposed to retrieved directly
6113 6112 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6114 6113 * when some internal flags set by other options
6115 6114 * Direct retrieval always designed to fail(ignored)
6116 6115 * for this option.
6117 6116 */
6118 6117 break;
6119 6118 }
6120 6119 }
6121 6120 return (len);
6122 6121 }
6123 6122
6124 6123 /* ARGSUSED */
6125 6124 static int
6126 6125 tl_set_opt(
6127 6126 queue_t *wq,
6128 6127 uint_t mgmt_flags,
6129 6128 int level,
6130 6129 int name,
6131 6130 uint_t inlen,
6132 6131 uchar_t *invalp,
6133 6132 uint_t *outlenp,
6134 6133 uchar_t *outvalp,
6135 6134 void *thisdg_attrs,
6136 6135 cred_t *cr)
6137 6136 {
6138 6137 int error;
6139 6138 tl_endpt_t *tep;
6140 6139
6141 6140 tep = (tl_endpt_t *)wq->q_ptr;
6142 6141
6143 6142 error = 0; /* NOERROR */
6144 6143
6145 6144 /*
6146 6145 * Assumes: option level and name sanity checks done elsewhere
6147 6146 */
6148 6147
6149 6148 switch (level) {
6150 6149 case SOL_SOCKET:
6151 6150 if (! IS_SOCKET(tep)) {
6152 6151 error = EINVAL;
6153 6152 break;
6154 6153 }
6155 6154 /*
6156 6155 * TBD: fill in other AF_UNIX socket options and then stop
6157 6156 * returning error.
6158 6157 */
6159 6158 switch (name) {
6160 6159 case SO_RECVUCRED:
6161 6160 /*
6162 6161 * We only support this for datagram sockets;
6163 6162 * getpeerucred handles the connection oriented
6164 6163 * transports.
6165 6164 */
6166 6165 if (! IS_CLTS(tep)) {
6167 6166 error = EINVAL;
6168 6167 break;
6169 6168 }
6170 6169 if (*(int *)invalp == 0)
6171 6170 tep->te_flag &= ~TL_SOCKUCRED;
6172 6171 else
6173 6172 tep->te_flag |= TL_SOCKUCRED;
6174 6173 break;
6175 6174 default:
6176 6175 error = EINVAL;
6177 6176 break;
6178 6177 }
6179 6178 break;
6180 6179 case TL_PROT_LEVEL:
6181 6180 switch (name) {
6182 6181 case TL_OPT_PEER_CRED:
6183 6182 case TL_OPT_PEER_UCRED:
6184 6183 /*
6185 6184 * option not supposed to be set directly
6186 6185 * Its value in initialized for each endpoint at
6187 6186 * driver open time.
6188 6187 * Direct setting always designed to fail for this
6189 6188 * option.
6190 6189 */
6191 6190 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6192 6191 SL_TRACE|SL_ERROR,
6193 6192 "tl_set_opt: option is not supported"));
6194 6193 error = EPROTO;
6195 6194 break;
6196 6195 }
6197 6196 }
6198 6197 return (error);
6199 6198 }
6200 6199
6201 6200
6202 6201 static void
6203 6202 tl_timer(void *arg)
6204 6203 {
6205 6204 queue_t *wq = arg;
6206 6205 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6207 6206
6208 6207 ASSERT(tep);
6209 6208
6210 6209 tep->te_timoutid = 0;
6211 6210
6212 6211 enableok(wq);
6213 6212 /*
6214 6213 * Note: can call wsrv directly here and save context switch
6215 6214 * Consider change when qtimeout (not timeout) is active
6216 6215 */
6217 6216 qenable(wq);
6218 6217 }
6219 6218
6220 6219 static void
6221 6220 tl_buffer(void *arg)
6222 6221 {
6223 6222 queue_t *wq = arg;
6224 6223 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6225 6224
6226 6225 ASSERT(tep);
6227 6226
6228 6227 tep->te_bufcid = 0;
6229 6228 tep->te_nowsrv = B_FALSE;
6230 6229
6231 6230 enableok(wq);
6232 6231 /*
6233 6232 * Note: can call wsrv directly here and save context switch
6234 6233 * Consider change when qbufcall (not bufcall) is active
6235 6234 */
6236 6235 qenable(wq);
6237 6236 }
6238 6237
6239 6238 static void
6240 6239 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6241 6240 {
6242 6241 tl_endpt_t *tep;
6243 6242
6244 6243 tep = (tl_endpt_t *)wq->q_ptr;
6245 6244
6246 6245 if (tep->te_closing) {
6247 6246 freemsg(mp);
6248 6247 return;
6249 6248 }
6250 6249 noenable(wq);
6251 6250
6252 6251 (void) insq(wq, wq->q_first, mp);
6253 6252
6254 6253 if (tep->te_bufcid || tep->te_timoutid) {
6255 6254 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6256 6255 "tl_memrecover:recover %p pending", (void *)wq));
6257 6256 return;
6258 6257 }
6259 6258
6260 6259 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6261 6260 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6262 6261 drv_usectohz(TL_BUFWAIT));
6263 6262 }
6264 6263 }
6265 6264
6266 6265 static void
6267 6266 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6268 6267 {
6269 6268 ASSERT(tip->ti_seqno != 0);
6270 6269
6271 6270 if (tip->ti_mp != NULL) {
6272 6271 tl_icon_freemsgs(&tip->ti_mp);
6273 6272 tip->ti_mp = NULL;
6274 6273 }
6275 6274 if (tip->ti_tep != NULL) {
6276 6275 tl_refrele(tip->ti_tep);
6277 6276 tip->ti_tep = NULL;
6278 6277 }
6279 6278 list_remove(&tep->te_iconp, tip);
6280 6279 kmem_free(tip, sizeof (tl_icon_t));
6281 6280 tep->te_nicon--;
6282 6281 }
6283 6282
6284 6283 /*
6285 6284 * Remove address from address hash.
6286 6285 */
6287 6286 static void
6288 6287 tl_addr_unbind(tl_endpt_t *tep)
6289 6288 {
6290 6289 tl_endpt_t *elp;
6291 6290
6292 6291 if (tep->te_flag & TL_ADDRHASHED) {
6293 6292 if (IS_SOCKET(tep)) {
6294 6293 (void) mod_hash_remove(tep->te_addrhash,
6295 6294 (mod_hash_key_t)tep->te_vp,
6296 6295 (mod_hash_val_t *)&elp);
6297 6296 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6298 6297 tep->te_magic = SOU_MAGIC_IMPLICIT;
6299 6298 } else {
6300 6299 (void) mod_hash_remove(tep->te_addrhash,
6301 6300 (mod_hash_key_t)&tep->te_ap,
6302 6301 (mod_hash_val_t *)&elp);
6303 6302 (void) kmem_free(tep->te_abuf, tep->te_alen);
6304 6303 tep->te_alen = -1;
6305 6304 tep->te_abuf = NULL;
6306 6305 }
6307 6306 tep->te_flag &= ~TL_ADDRHASHED;
6308 6307 }
6309 6308 }
↓ open down ↓ |
5547 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX