Print this page
195 Need replacement for nfs/lockd+klm
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Reviewed by: Jeremy Jones <jeremy@delphix.com>
Reviewed by: Jeff Biseda <jbiseda@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/tl.c
+++ new/usr/src/uts/common/io/tl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 + * Copyright (c) 2012 by Delphix. All rights reserved.
27 28 */
28 29
29 30 /*
30 31 * Multithreaded STREAMS Local Transport Provider.
31 32 *
32 33 * OVERVIEW
33 34 * ========
34 35 *
35 36 * This driver provides TLI as well as socket semantics. It provides
36 37 * connectionless, connection oriented, and connection oriented with orderly
37 38 * release transports for TLI and sockets. Each transport type has separate name
38 39 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
39 40 * this removes any name space conflicts when binding to socket style transport
40 41 * addresses.
41 42 *
42 43 * NOTE: There is one exception: Socket ticots and ticotsord transports share
43 44 * the same namespace. In fact, sockets always use ticotsord type transport.
44 45 *
45 46 * The driver mode is specified during open() by the minor number used for
46 47 * open.
47 48 *
48 49 * The sockets in addition have the following semantic differences:
49 50 * No support for passing up credentials (TL_SET[U]CRED).
50 51 *
51 52 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
52 53 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
53 54 * T_OPTDATA_IND.
54 55 *
55 56 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
56 57 * a T_CONN_RES is received from the acceptor. This means that a socket
57 58 * connect will complete before the peer has called accept.
58 59 *
59 60 *
60 61 * MULTITHREADING
61 62 * ==============
62 63 *
63 64 * The driver does not use STREAMS protection mechanisms. Instead it uses a
64 65 * generic "serializer" abstraction. Most of the operations are executed behind
65 66 * the serializer and are, essentially single-threaded. All functions executed
66 67 * behind the same serializer are strictly serialized. So if one thread calls
67 68 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
68 69 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
69 70 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
70 71 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
71 72 * same time.
72 73 *
73 74 * Connectionless transport use a single serializer per transport type (one for
74 75 * TLI and one for sockets. Connection-oriented transports use finer-grained
75 76 * serializers.
76 77 *
77 78 * All COTS-type endpoints start their life with private serializers. During
78 79 * connection request processing the endpoint serializer is switched to the
79 80 * listener's serializer and the rest of T_CONN_REQ processing is done on the
80 81 * listener serializer. During T_CONN_RES processing the eager serializer is
81 82 * switched from listener to acceptor serializer and after that point all
82 83 * processing for eager and acceptor happens on this serializer. To avoid races
83 84 * with endpoint closes while its serializer may be changing closes are blocked
84 85 * while serializers are manipulated.
85 86 *
86 87 * References accounting
87 88 * ---------------------
88 89 *
89 90 * Endpoints are reference counted and freed when the last reference is
90 91 * dropped. Functions within the serializer may access an endpoint state even
91 92 * after an endpoint closed. The te_closing being set on the endpoint indicates
92 93 * that the endpoint entered its close routine.
93 94 *
94 95 * One reference is held for each opened endpoint instance. The reference
95 96 * counter is incremented when the endpoint is linked to another endpoint and
96 97 * decremented when the link disappears. It is also incremented when the
97 98 * endpoint is found by the hash table lookup. This increment is atomic with the
98 99 * lookup itself and happens while the hash table read lock is held.
99 100 *
100 101 * Close synchronization
101 102 * ---------------------
102 103 *
103 104 * During close the endpoint as marked as closing using te_closing flag. It is
104 105 * usually enough to check for te_closing flag since all other state changes
105 106 * happen after this flag is set and the close entered serializer. Immediately
106 107 * after setting te_closing flag tl_close() enters serializer and waits until
107 108 * the callback finishes. This allows all functions called within serializer to
108 109 * simply check te_closing without any locks.
109 110 *
110 111 * Serializer management.
111 112 * ---------------------
112 113 *
113 114 * For COTS transports serializers are created when the endpoint is constructed
114 115 * and destroyed when the endpoint is destructed. CLTS transports use global
115 116 * serializers - one for sockets and one for TLI.
116 117 *
117 118 * COTS serializers have separate reference counts to deal with several
118 119 * endpoints sharing the same serializer. There is a subtle problem related to
119 120 * the serializer destruction. The serializer should never be destroyed by any
120 121 * function executed inside serializer. This means that close has to wait till
121 122 * all serializer activity for this endpoint is finished before it can drop the
122 123 * last reference on the endpoint (which may as well free the serializer). This
123 124 * is only relevant for COTS transports which manage serializers
124 125 * dynamically. For CLTS transports close may complete without waiting for all
125 126 * serializer activity to finish since serializer is only destroyed at driver
126 127 * detach time.
127 128 *
128 129 * COTS endpoints keep track of the number of outstanding requests on the
129 130 * serializer for the endpoint. The code handling accept() avoids changing
130 131 * client serializer if it has any pending messages on the serializer and
131 132 * instead moves acceptor to listener's serializer.
132 133 *
133 134 *
134 135 * Use of hash tables
135 136 * ------------------
136 137 *
137 138 * The driver uses modhash hash table implementation. Each transport uses two
138 139 * hash tables - one for finding endpoints by acceptor ID and another one for
139 140 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
140 141 * pair of hash tables since sockets only use TICOTSORD.
141 142 *
142 143 * All hash tables lookups increment a reference count for returned endpoints,
143 144 * so we may safely check the endpoint state even when the endpoint is removed
144 145 * from the hash by another thread immediately after it is found.
145 146 *
146 147 *
147 148 * CLOSE processing
148 149 * ================
149 150 *
150 151 * The driver enters serializer twice on close(). The close sequence is the
151 152 * following:
152 153 *
153 154 * 1) Wait until closing is safe (te_closewait becomes zero)
154 155 * This step is needed to prevent close during serializer switches. In most
155 156 * cases (close happening after connection establishment) te_closewait is
156 157 * zero.
157 158 * 1) Set te_closing.
158 159 * 2) Call tl_close_ser() within serializer and wait for it to complete.
159 160 *
160 161 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
161 162 * It also needs to clear write-side q_next pointers - this should be done
162 163 * before qprocsoff().
163 164 *
164 165 * This synchronous serializer entry during close is needed to ensure that
165 166 * the queue is valid everywhere inside the serializer.
166 167 *
167 168 * Note that in many cases close will execute tl_close_ser() synchronously,
168 169 * so it will not wait at all.
169 170 *
170 171 * 3) Calls qprocsoff().
171 172 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
172 173 * complete (for COTS transports). For CLTS transport there is no wait.
173 174 *
174 175 * tl_close_finish_ser() Finishes the close process and wakes up waiting
175 176 * close if there is any.
176 177 *
177 178 * Note that in most cases close will enter te_close_ser_finish()
178 179 * synchronously and will not wait at all.
179 180 *
180 181 *
181 182 * Flow Control
182 183 * ============
183 184 *
184 185 * The driver implements both read and write side service routines. No one calls
185 186 * putq() on the read queue. The read side service routine tl_rsrv() is called
186 187 * when the read side stream is back-enabled. It enters serializer synchronously
187 188 * (waits till serializer processing is complete). Within serializer it
188 189 * back-enables all endpoints blocked by the queue for connection-less
189 190 * transports and enables write side service processing for the peer for
190 191 * connection-oriented transports.
191 192 *
192 193 * Read and write side service routines use special mblk_sized space in the
193 194 * endpoint structure to enter perimeter.
194 195 *
195 196 * Write-side flow control
196 197 * -----------------------
197 198 *
198 199 * Write side flow control is a bit tricky. The driver needs to deal with two
199 200 * message queues - the explicit STREAMS message queue maintained by
200 201 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
201 202 * queues should be synchronized to preserve message ordering and should
202 203 * maintain a single order determined by the order in which messages enter
203 204 * tl_wput(). In order to maintain the ordering between these two queues the
204 205 * STREAMS queue is only manipulated within the serializer, so the ordering is
205 206 * provided by the serializer.
206 207 *
207 208 * Functions called from the tl_wsrv() sometimes may call putbq(). To
208 209 * immediately stop any further processing of the STREAMS message queues the
209 210 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
210 211 * side service processing stops when the flag is set.
211 212 *
212 213 * The tl_wsrv() function enters serializer synchronously and waits for it to
213 214 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
214 215 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
215 216 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
216 217 * always bounded by the amount of messages on the STREAMS queue at the time
217 218 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
218 219 * queue from another serialized entry which can't happen in parallel. This
219 220 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
220 221 * of it draining forever while writer places new messages on the STREAMS
221 222 * queue).
222 223 *
223 224 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
224 225 *
225 226 *
226 227 * Unix Domain Sockets
227 228 * ===================
228 229 *
229 230 * The driver knows the structure of Unix Domain sockets addresses and treats
230 231 * them differently from generic TLI addresses. For sockets implicit binds are
231 232 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
232 233 * instead of using address length of zero. Explicit binds specify
233 234 * SOU_MAGIC_EXPLICIT as magic.
234 235 *
235 236 * For implicit binds we always use minor number as soua_vp part of the address
236 237 * and avoid any hash table lookups. This saves two hash tables lookups per
237 238 * anonymous bind.
238 239 *
239 240 * For explicit address we hash the vnode pointer instead of hashing the
240 241 * full-scale address+zone+length. Hashing by pointer is more efficient then
241 242 * hashing by the full address.
242 243 *
243 244 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
244 245 * tep structure, so it should be never freed.
245 246 *
246 247 * Also for sockets the driver always uses minor number as acceptor id.
247 248 *
248 249 * TPI VIOLATIONS
249 250 * --------------
250 251 *
251 252 * This driver violates TPI in several respects for Unix Domain Sockets:
252 253 *
253 254 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
254 255 * is requested and the endpoint is already in use. There is no point in
255 256 * generating an unused address since this address will be rejected by
256 257 * sockfs anyway. For implicit binds it always generates a new address
257 258 * (sets soua_vp to its minor number).
258 259 *
259 260 * 2) It always uses minor number as acceptor ID and never uses queue
260 261 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
261 262 * message and they do not use the queue pointer.
262 263 *
263 264 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
264 265 * followed by listen(). The listen() should be issued with non-zero
265 266 * backlog, so sotpi_listen() issues unbind request followed by bind
266 267 * request to the same address but with a non-zero qlen value. Both
267 268 * tl_bind() and tl_unbind() require write lock on the hash table to
268 269 * insert/remove the address. The driver does not remove the address from
269 270 * the hash for endpoints that are bound to the explicit address and have
270 271 * backlog of zero. During T_BIND_REQ processing if the address requested
271 272 * is equal to the address the endpoint already has it updates the backlog
272 273 * without reinserting the address in the hash table. This optimization
273 274 * avoids two hash table updates for each listener created. It always
274 275 * avoids the problem of a "stolen" address when another listener may use
275 276 * the same address between the unbind and bind and suddenly listen() fails
276 277 * because address is in use even though the bind() succeeded.
277 278 *
278 279 *
279 280 * CONNECTIONLESS TRANSPORTS
280 281 * =========================
281 282 *
282 283 * Connectionless transports all share the same serializer (one for TLI and one
283 284 * for Sockets). Functions executing behind serializer can check or modify state
284 285 * of any endpoint.
285 286 *
286 287 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
287 288 * te_lastep field. The next time X talks to some address A it checks whether A
288 289 * is the same as Y's address and if it is there is no need to lookup Y. If the
289 290 * address is different or the state of Y is not appropriate (e.g. closed or not
290 291 * idle) X does a lookup using tl_find_peer() and caches the new address.
291 292 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
292 293 * on the endpoint found.
293 294 *
294 295 * During close of endpoint Y it doesn't try to remove itself from other
295 296 * endpoints caches. They will detect that Y is gone and will search the peer
296 297 * endpoint again.
297 298 *
298 299 * Flow Control Handling.
299 300 * ----------------------
300 301 *
301 302 * Each connectionless endpoint keeps a list of endpoints which are
302 303 * flow-controlled by its queue. It also keeps a pointer to the queue which
303 304 * flow-controls itself. Whenever flow control releases for endpoint X it
304 305 * enables all queues from the list. During close it also back-enables everyone
305 306 * in the list. If X is flow-controlled when it is closing it removes it from
306 307 * the peers list.
307 308 *
308 309 * DATA STRUCTURES
309 310 * ===============
310 311 *
311 312 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
312 313 * endpoint state. For connection-oriented transports it has a keeps a list
313 314 * of pending connections (tl_icon_t). For connectionless transports it keeps a
314 315 * list of endpoints flow controlled by this one.
315 316 *
316 317 * Each transport type is represented by a per-transport data structure
317 318 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
318 319 * endpoint address hash tables for each transport. It also contains pointer to
319 320 * transport serializer for connectionless transports.
320 321 *
321 322 * Each endpoint keeps a link to its transport structure, so the code can find
322 323 * all per-transport information quickly.
323 324 */
324 325
325 326 #include <sys/types.h>
326 327 #include <sys/inttypes.h>
327 328 #include <sys/stream.h>
328 329 #include <sys/stropts.h>
329 330 #define _SUN_TPI_VERSION 2
330 331 #include <sys/tihdr.h>
331 332 #include <sys/strlog.h>
332 333 #include <sys/debug.h>
333 334 #include <sys/cred.h>
334 335 #include <sys/errno.h>
335 336 #include <sys/kmem.h>
336 337 #include <sys/id_space.h>
337 338 #include <sys/modhash.h>
338 339 #include <sys/mkdev.h>
339 340 #include <sys/tl.h>
340 341 #include <sys/stat.h>
341 342 #include <sys/conf.h>
342 343 #include <sys/modctl.h>
343 344 #include <sys/strsun.h>
344 345 #include <sys/socket.h>
345 346 #include <sys/socketvar.h>
346 347 #include <sys/sysmacros.h>
347 348 #include <sys/xti_xtiopt.h>
348 349 #include <sys/ddi.h>
349 350 #include <sys/sunddi.h>
350 351 #include <sys/zone.h>
351 352 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
352 353 #include <inet/optcom.h>
353 354 #include <sys/strsubr.h>
354 355 #include <sys/ucred.h>
355 356 #include <sys/suntpi.h>
356 357 #include <sys/list.h>
357 358 #include <sys/serializer.h>
358 359
359 360 /*
360 361 * TBD List
361 362 * 14 Eliminate state changes through table
362 363 * 16. AF_UNIX socket options
363 364 * 17. connect() for ticlts
364 365 * 18. support for "netstat" to show AF_UNIX plus TLI local
365 366 * transport connections
366 367 * 21. sanity check to flushing on sending M_ERROR
367 368 */
368 369
369 370 /*
370 371 * CONSTANT DECLARATIONS
371 372 * --------------------
372 373 */
373 374
374 375 /*
375 376 * Local declarations
376 377 */
377 378 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
378 379
379 380 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
380 381 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
381 382 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
382 383 /*
383 384 * Hash tables size.
384 385 */
385 386 #define TL_HASH_SIZE 311
386 387
387 388 /*
388 389 * Definitions for module_info
389 390 */
390 391 #define TL_ID (104) /* module ID number */
391 392 #define TL_NAME "tl" /* module name */
392 393 #define TL_MINPSZ (0) /* min packet size */
393 394 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
394 395 #define TL_HIWAT (16*1024) /* hi water mark */
395 396 #define TL_LOWAT (256) /* lo water mark */
396 397 /*
397 398 * Definition of minor numbers/modes for new transport provider modes.
398 399 * We view the socket use as a separate mode to get a separate name space.
399 400 */
400 401 #define TL_TICOTS 0 /* connection oriented transport */
401 402 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
402 403 #define TL_TICLTS 2 /* connectionless transport */
403 404 #define TL_UNUSED 3
404 405 #define TL_SOCKET 4 /* Socket */
405 406 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
406 407 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
407 408 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
408 409
409 410 #define TL_MINOR_MASK 0x7
410 411 #define TL_MINOR_START (TL_TICLTS + 1)
411 412
412 413 /*
413 414 * LOCAL MACROS
414 415 */
415 416 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
416 417
417 418 /*
418 419 * EXTERNAL VARIABLE DECLARATIONS
419 420 * -----------------------------
420 421 */
421 422 /*
422 423 * state table defined in the OS space.c
423 424 */
424 425 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
425 426
426 427 /*
427 428 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
428 429 */
429 430 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
430 431 static int tl_close(queue_t *, int, cred_t *);
431 432 static void tl_wput(queue_t *, mblk_t *);
432 433 static void tl_wsrv(queue_t *);
433 434 static void tl_rsrv(queue_t *);
434 435
435 436 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
436 437 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
437 438 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
438 439
439 440
440 441 /*
441 442 * GLOBAL DATA STRUCTURES AND VARIABLES
442 443 * -----------------------------------
443 444 */
444 445
445 446 /*
446 447 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
447 448 * For now, we only manage the SO_RECVUCRED option but we also have
448 449 * harmless dummy options to make things work with some common code we access.
449 450 */
450 451 opdes_t tl_opt_arr[] = {
451 452 /* The SO_TYPE is needed for the hack below */
452 453 {
453 454 SO_TYPE,
454 455 SOL_SOCKET,
455 456 OA_R,
456 457 OA_R,
457 458 OP_NP,
458 459 0,
459 460 sizeof (t_scalar_t),
460 461 0
461 462 },
462 463 {
463 464 SO_RECVUCRED,
464 465 SOL_SOCKET,
465 466 OA_RW,
466 467 OA_RW,
467 468 OP_NP,
468 469 0,
469 470 sizeof (int),
470 471 0
471 472 }
472 473 };
473 474
474 475 /*
475 476 * Table of all supported levels
476 477 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
477 478 * any supported options so we need this info separately.
478 479 *
479 480 * This is needed only for topmost tpi providers.
480 481 */
481 482 optlevel_t tl_valid_levels_arr[] = {
482 483 XTI_GENERIC,
483 484 SOL_SOCKET,
484 485 TL_PROT_LEVEL
485 486 };
486 487
487 488 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
488 489 /*
489 490 * Current upper bound on the amount of space needed to return all options.
490 491 * Additional options with data size of sizeof(long) are handled automatically.
491 492 * Others need hand job.
492 493 */
493 494 #define TL_MAX_OPT_BUF_LEN \
494 495 ((A_CNT(tl_opt_arr) << 2) + \
495 496 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
496 497 + 64 + sizeof (struct T_optmgmt_ack))
497 498
498 499 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
499 500
500 501 /*
501 502 * transport addr structure
502 503 */
503 504 typedef struct tl_addr {
504 505 zoneid_t ta_zoneid; /* Zone scope of address */
505 506 t_scalar_t ta_alen; /* length of abuf */
506 507 void *ta_abuf; /* the addr itself */
507 508 } tl_addr_t;
508 509
509 510 /*
510 511 * Refcounted version of serializer.
511 512 */
512 513 typedef struct tl_serializer {
513 514 uint_t ts_refcnt;
514 515 serializer_t *ts_serializer;
515 516 } tl_serializer_t;
516 517
517 518 /*
518 519 * Each transport type has a separate state.
519 520 * Per-transport state.
520 521 */
521 522 typedef struct tl_transport_state {
522 523 char *tr_name;
523 524 minor_t tr_minor;
524 525 uint32_t tr_defaddr;
525 526 mod_hash_t *tr_ai_hash;
526 527 mod_hash_t *tr_addr_hash;
527 528 tl_serializer_t *tr_serializer;
528 529 } tl_transport_state_t;
529 530
530 531 #define TL_DFADDR 0x1000
531 532
532 533 static tl_transport_state_t tl_transports[] = {
533 534 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
534 535 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
535 536 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
536 537 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
537 538 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
538 539 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
539 540 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
540 541 };
541 542
542 543 #define TL_MAXTRANSPORT A_CNT(tl_transports)
543 544
544 545 struct tl_endpt;
545 546 typedef struct tl_endpt tl_endpt_t;
546 547
547 548 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
548 549
549 550 /*
550 551 * Data structure used to represent pending connects.
551 552 * Records enough information so that the connecting peer can close
552 553 * before the connection gets accepted.
553 554 */
554 555 typedef struct tl_icon {
555 556 list_node_t ti_node;
556 557 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
557 558 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
558 559 t_scalar_t ti_seqno; /* Sequence number */
559 560 } tl_icon_t;
560 561
561 562 typedef struct so_ux_addr soux_addr_t;
562 563 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
563 564
564 565 /*
565 566 * Maximum number of unaccepted connection indications allowed per listener.
566 567 */
567 568 #define TL_MAXQLEN 4096
568 569 int tl_maxqlen = TL_MAXQLEN;
569 570
570 571 /*
571 572 * transport endpoint structure
572 573 */
573 574 struct tl_endpt {
574 575 queue_t *te_rq; /* stream read queue */
575 576 queue_t *te_wq; /* stream write queue */
576 577 uint32_t te_refcnt;
577 578 int32_t te_state; /* TPI state of endpoint */
578 579 minor_t te_minor; /* minor number */
579 580 #define te_seqno te_minor
580 581 uint_t te_flag; /* flag field */
581 582 boolean_t te_nowsrv;
582 583 tl_serializer_t *te_ser; /* Serializer to use */
583 584 #define te_serializer te_ser->ts_serializer
584 585
585 586 soux_addr_t te_uxaddr; /* Socket address */
586 587 #define te_magic te_uxaddr.soua_magic
587 588 #define te_vp te_uxaddr.soua_vp
588 589 tl_addr_t te_ap; /* addr bound to this endpt */
589 590 #define te_zoneid te_ap.ta_zoneid
590 591 #define te_alen te_ap.ta_alen
591 592 #define te_abuf te_ap.ta_abuf
592 593
593 594 tl_transport_state_t *te_transport;
594 595 #define te_addrhash te_transport->tr_addr_hash
595 596 #define te_aihash te_transport->tr_ai_hash
596 597 #define te_defaddr te_transport->tr_defaddr
597 598 cred_t *te_credp; /* endpoint user credentials */
598 599 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
599 600
600 601 /*
601 602 * State specific for connection-oriented and connectionless transports.
602 603 */
603 604 union {
604 605 /* Connection-oriented state. */
605 606 struct {
606 607 t_uscalar_t _te_nicon; /* count of conn requests */
607 608 t_uscalar_t _te_qlen; /* max conn requests */
608 609 tl_endpt_t *_te_oconp; /* conn request pending */
609 610 tl_endpt_t *_te_conp; /* connected endpt */
610 611 #ifndef _ILP32
611 612 void *_te_pad;
612 613 #endif
613 614 list_t _te_iconp; /* list of conn ind. pending */
614 615 } _te_cots_state;
615 616 /* Connection-less state. */
616 617 struct {
617 618 tl_endpt_t *_te_lastep; /* last dest. endpoint */
618 619 tl_endpt_t *_te_flowq; /* flow controlled on whom */
619 620 list_node_t _te_flows; /* lists of connections */
620 621 list_t _te_flowlist; /* Who flowcontrols on me */
621 622 } _te_clts_state;
622 623 } _te_transport_state;
623 624 #define te_nicon _te_transport_state._te_cots_state._te_nicon
624 625 #define te_qlen _te_transport_state._te_cots_state._te_qlen
625 626 #define te_oconp _te_transport_state._te_cots_state._te_oconp
626 627 #define te_conp _te_transport_state._te_cots_state._te_conp
627 628 #define te_iconp _te_transport_state._te_cots_state._te_iconp
628 629 #define te_lastep _te_transport_state._te_clts_state._te_lastep
629 630 #define te_flowq _te_transport_state._te_clts_state._te_flowq
630 631 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
631 632 #define te_flows _te_transport_state._te_clts_state._te_flows
632 633
633 634 bufcall_id_t te_bufcid; /* outstanding bufcall id */
634 635 timeout_id_t te_timoutid; /* outstanding timeout id */
635 636 pid_t te_cpid; /* cached pid of endpoint */
636 637 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
637 638 /*
638 639 * Pieces of the endpoint state needed for closing.
639 640 */
640 641 kmutex_t te_closelock;
641 642 kcondvar_t te_closecv;
642 643 uint8_t te_closing; /* The endpoint started closing */
643 644 uint8_t te_closewait; /* Wait in close until zero */
644 645 mblk_t te_closemp; /* for entering serializer on close */
645 646 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
646 647 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
647 648 kmutex_t te_srv_lock;
648 649 kcondvar_t te_srv_cv;
649 650 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
650 651 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
651 652 /*
652 653 * Pieces of the endpoint state needed for serializer transitions.
653 654 */
654 655 kmutex_t te_ser_lock; /* Protects the count below */
655 656 uint_t te_ser_count; /* Number of messages on serializer */
656 657 };
657 658
658 659 /*
659 660 * Flag values. Lower 4 bits specify that transport used.
660 661 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
661 662 * they allow to identify the endpoint more easily.
662 663 */
663 664 #define TL_LISTENER 0x00010 /* the listener endpoint */
664 665 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
665 666 #define TL_EAGER 0x00040 /* connecting endpoint */
666 667 #define TL_ACCEPTED 0x00080 /* accepted connection */
667 668 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
668 669 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
669 670 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
670 671 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
671 672 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
672 673 /*
673 674 * Boolean checks for the endpoint type.
674 675 */
675 676 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
676 677 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
677 678 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
678 679 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
679 680
680 681 /*
681 682 * Certain operations are always used together. These macros reduce the chance
682 683 * of missing a part of a combination.
683 684 */
684 685 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
685 686 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
686 687
687 688 #define TL_PUTBQ(x, mp) { \
688 689 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
689 690 (x)->te_nowsrv = B_TRUE; \
690 691 (void) putbq((x)->te_wq, mp); \
691 692 }
692 693
693 694 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
694 695 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
695 696
696 697 /*
697 698 * STREAMS driver glue data structures.
698 699 */
699 700 static struct module_info tl_minfo = {
700 701 TL_ID, /* mi_idnum */
701 702 TL_NAME, /* mi_idname */
702 703 TL_MINPSZ, /* mi_minpsz */
703 704 TL_MAXPSZ, /* mi_maxpsz */
704 705 TL_HIWAT, /* mi_hiwat */
705 706 TL_LOWAT /* mi_lowat */
706 707 };
707 708
708 709 static struct qinit tl_rinit = {
709 710 NULL, /* qi_putp */
710 711 (int (*)())tl_rsrv, /* qi_srvp */
711 712 tl_open, /* qi_qopen */
712 713 tl_close, /* qi_qclose */
713 714 NULL, /* qi_qadmin */
714 715 &tl_minfo, /* qi_minfo */
715 716 NULL /* qi_mstat */
716 717 };
717 718
718 719 static struct qinit tl_winit = {
719 720 (int (*)())tl_wput, /* qi_putp */
720 721 (int (*)())tl_wsrv, /* qi_srvp */
721 722 NULL, /* qi_qopen */
722 723 NULL, /* qi_qclose */
723 724 NULL, /* qi_qadmin */
724 725 &tl_minfo, /* qi_minfo */
725 726 NULL /* qi_mstat */
726 727 };
727 728
728 729 static struct streamtab tlinfo = {
729 730 &tl_rinit, /* st_rdinit */
730 731 &tl_winit, /* st_wrinit */
731 732 NULL, /* st_muxrinit */
732 733 NULL /* st_muxwrinit */
733 734 };
734 735
735 736 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
736 737 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
737 738
738 739 static struct modldrv modldrv = {
739 740 &mod_driverops, /* Type of module -- pseudo driver here */
740 741 "TPI Local Transport (tl)",
741 742 &tl_devops, /* driver ops */
742 743 };
743 744
744 745 /*
745 746 * Module linkage information for the kernel.
746 747 */
747 748 static struct modlinkage modlinkage = {
748 749 MODREV_1,
749 750 &modldrv,
750 751 NULL
751 752 };
752 753
753 754 /*
754 755 * Templates for response to info request
755 756 * Check sanity of unlimited connect data etc.
756 757 */
757 758
758 759 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
759 760 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
760 761
761 762 static struct T_info_ack tl_cots_info_ack =
762 763 {
763 764 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
764 765 T_INFINITE, /* TSDU size */
765 766 T_INFINITE, /* ETSDU size */
766 767 T_INFINITE, /* CDATA_size */
767 768 T_INFINITE, /* DDATA_size */
768 769 T_INFINITE, /* ADDR_size */
769 770 T_INFINITE, /* OPT_size */
770 771 0, /* TIDU_size - fill at run time */
771 772 T_COTS, /* SERV_type */
772 773 -1, /* CURRENT_state */
773 774 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
774 775 };
775 776
776 777 static struct T_info_ack tl_clts_info_ack =
777 778 {
778 779 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
779 780 0, /* TSDU_size - fill at run time */
780 781 -2, /* ETSDU_size -2 => not supported */
781 782 -2, /* CDATA_size -2 => not supported */
782 783 -2, /* DDATA_size -2 => not supported */
783 784 -1, /* ADDR_size -1 => infinite */
784 785 -1, /* OPT_size */
785 786 0, /* TIDU_size - fill at run time */
786 787 T_CLTS, /* SERV_type */
787 788 -1, /* CURRENT_state */
788 789 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
789 790 };
790 791
791 792 /*
792 793 * private copy of devinfo pointer used in tl_info
793 794 */
794 795 static dev_info_t *tl_dip;
795 796
796 797 /*
797 798 * Endpoints cache.
798 799 */
799 800 static kmem_cache_t *tl_cache;
800 801 /*
801 802 * Minor number space.
802 803 */
803 804 static id_space_t *tl_minors;
804 805
805 806 /*
806 807 * Default Data Unit size.
807 808 */
808 809 static t_scalar_t tl_tidusz;
809 810
810 811 /*
811 812 * Size of hash tables.
812 813 */
813 814 static size_t tl_hash_size = TL_HASH_SIZE;
814 815
815 816 /*
816 817 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
817 818 * for sockets.
818 819 */
819 820 static int tl_disable_early_connect = 0;
820 821 static int tl_client_closing_when_accepting;
821 822
822 823 static int tl_serializer_noswitch;
823 824
824 825 /*
825 826 * LOCAL FUNCTION PROTOTYPES
826 827 * -------------------------
827 828 */
828 829 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
829 830 static void tl_do_proto(mblk_t *, tl_endpt_t *);
830 831 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
831 832 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
832 833 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
833 834 t_scalar_t);
834 835 static void tl_bind(mblk_t *, tl_endpt_t *);
835 836 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
836 837 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
837 838 static void tl_unbind(mblk_t *, tl_endpt_t *);
838 839 static void tl_optmgmt(queue_t *, mblk_t *);
839 840 static void tl_conn_req(queue_t *, mblk_t *);
840 841 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
841 842 static void tl_conn_res(mblk_t *, tl_endpt_t *);
842 843 static void tl_discon_req(mblk_t *, tl_endpt_t *);
843 844 static void tl_capability_req(mblk_t *, tl_endpt_t *);
844 845 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
845 846 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
846 847 static void tl_info_req(mblk_t *, tl_endpt_t *);
847 848 static void tl_addr_req(mblk_t *, tl_endpt_t *);
848 849 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
849 850 static void tl_data(mblk_t *, tl_endpt_t *);
850 851 static void tl_exdata(mblk_t *, tl_endpt_t *);
851 852 static void tl_ordrel(mblk_t *, tl_endpt_t *);
852 853 static void tl_unitdata(mblk_t *, tl_endpt_t *);
853 854 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
854 855 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
855 856 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
856 857 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
857 858 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
858 859 static void tl_cl_backenable(tl_endpt_t *);
859 860 static void tl_co_unconnect(tl_endpt_t *);
860 861 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
861 862 static void tl_discon_ind(tl_endpt_t *, uint32_t);
862 863 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
863 864 static mblk_t *tl_ordrel_ind_alloc(void);
864 865 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
865 866 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
866 867 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
867 868 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
868 869 static void tl_icon_freemsgs(mblk_t **);
869 870 static void tl_merror(queue_t *, mblk_t *, int);
870 871 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
871 872 static int tl_default_opt(queue_t *, int, int, uchar_t *);
872 873 static int tl_get_opt(queue_t *, int, int, uchar_t *);
873 874 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
874 875 uchar_t *, void *, cred_t *);
875 876 static void tl_memrecover(queue_t *, mblk_t *, size_t);
876 877 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
877 878 static void tl_free(tl_endpt_t *);
878 879 static int tl_constructor(void *, void *, int);
879 880 static void tl_destructor(void *, void *);
880 881 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
881 882 static tl_serializer_t *tl_serializer_alloc(int);
882 883 static void tl_serializer_refhold(tl_serializer_t *);
883 884 static void tl_serializer_refrele(tl_serializer_t *);
884 885 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
885 886 static void tl_serializer_exit(tl_endpt_t *);
886 887 static boolean_t tl_noclose(tl_endpt_t *);
887 888 static void tl_closeok(tl_endpt_t *);
888 889 static void tl_refhold(tl_endpt_t *);
889 890 static void tl_refrele(tl_endpt_t *);
890 891 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
891 892 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
892 893 static void tl_close_ser(mblk_t *, tl_endpt_t *);
893 894 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
894 895 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
895 896 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
896 897 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
897 898 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
898 899 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
899 900 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
900 901 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
901 902 static void tl_addr_unbind(tl_endpt_t *);
902 903
903 904 /*
904 905 * Intialize option database object for TL
905 906 */
906 907
907 908 optdb_obj_t tl_opt_obj = {
908 909 tl_default_opt, /* TL default value function pointer */
909 910 tl_get_opt, /* TL get function pointer */
910 911 tl_set_opt, /* TL set function pointer */
911 912 TL_OPT_ARR_CNT, /* TL option database count of entries */
912 913 tl_opt_arr, /* TL option database */
913 914 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
914 915 tl_valid_levels_arr /* TL valid level array */
915 916 };
916 917
917 918 /*
918 919 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
919 920 * ---------------------------------------
920 921 */
921 922
922 923 /*
923 924 * Loadable module routines
924 925 */
925 926 int
926 927 _init(void)
927 928 {
928 929 return (mod_install(&modlinkage));
929 930 }
930 931
931 932 int
932 933 _fini(void)
933 934 {
934 935 return (mod_remove(&modlinkage));
935 936 }
936 937
937 938 int
938 939 _info(struct modinfo *modinfop)
939 940 {
940 941 return (mod_info(&modlinkage, modinfop));
941 942 }
942 943
943 944 /*
944 945 * Driver Entry Points and Other routines
945 946 */
946 947 static int
947 948 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
948 949 {
949 950 int i;
950 951 char name[32];
951 952
952 953 /*
953 954 * Resume from a checkpoint state.
954 955 */
955 956 if (cmd == DDI_RESUME)
956 957 return (DDI_SUCCESS);
957 958
958 959 if (cmd != DDI_ATTACH)
959 960 return (DDI_FAILURE);
960 961
961 962 /*
962 963 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
963 964 * streams message sizes can be unlimited. We use a defined constant
964 965 * instead.
965 966 */
966 967 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
967 968
968 969 /*
969 970 * Create subdevices for each transport.
970 971 */
971 972 for (i = 0; i < TL_UNUSED; i++) {
972 973 if (ddi_create_minor_node(devi,
973 974 tl_transports[i].tr_name,
974 975 S_IFCHR, tl_transports[i].tr_minor,
975 976 DDI_PSEUDO, NULL) == DDI_FAILURE) {
976 977 ddi_remove_minor_node(devi, NULL);
977 978 return (DDI_FAILURE);
978 979 }
979 980 }
980 981
981 982 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
982 983 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
983 984
984 985 if (tl_cache == NULL) {
985 986 ddi_remove_minor_node(devi, NULL);
986 987 return (DDI_FAILURE);
987 988 }
988 989
989 990 tl_minors = id_space_create("tl_minor_space",
990 991 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
991 992
992 993 /*
993 994 * Create ID space for minor numbers
994 995 */
995 996 for (i = 0; i < TL_MAXTRANSPORT; i++) {
996 997 tl_transport_state_t *t = &tl_transports[i];
997 998
998 999 if (i == TL_UNUSED)
999 1000 continue;
1000 1001
1001 1002 /* Socket COTSORD shares namespace with COTS */
1002 1003 if (i == TL_SOCK_COTSORD) {
1003 1004 t->tr_ai_hash =
1004 1005 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1005 1006 ASSERT(t->tr_ai_hash != NULL);
1006 1007 t->tr_addr_hash =
1007 1008 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1008 1009 ASSERT(t->tr_addr_hash != NULL);
1009 1010 continue;
1010 1011 }
1011 1012
1012 1013 /*
1013 1014 * Create hash tables.
1014 1015 */
1015 1016 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1016 1017 t->tr_name);
1017 1018 #ifdef _ILP32
1018 1019 if (i & TL_SOCKET)
1019 1020 t->tr_ai_hash =
1020 1021 mod_hash_create_idhash(name, tl_hash_size - 1,
1021 1022 mod_hash_null_valdtor);
1022 1023 else
1023 1024 t->tr_ai_hash =
1024 1025 mod_hash_create_ptrhash(name, tl_hash_size,
1025 1026 mod_hash_null_valdtor, sizeof (queue_t));
1026 1027 #else
1027 1028 t->tr_ai_hash =
1028 1029 mod_hash_create_idhash(name, tl_hash_size - 1,
1029 1030 mod_hash_null_valdtor);
1030 1031 #endif /* _ILP32 */
1031 1032
1032 1033 if (i & TL_SOCKET) {
1033 1034 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1034 1035 t->tr_name);
1035 1036 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1036 1037 tl_hash_size, mod_hash_null_valdtor,
1037 1038 sizeof (uintptr_t));
1038 1039 } else {
1039 1040 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1040 1041 t->tr_name);
1041 1042 t->tr_addr_hash = mod_hash_create_extended(name,
1042 1043 tl_hash_size, mod_hash_null_keydtor,
1043 1044 mod_hash_null_valdtor,
1044 1045 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1045 1046 }
1046 1047
1047 1048 /* Create serializer for connectionless transports. */
1048 1049 if (i & TL_TICLTS)
1049 1050 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1050 1051 }
1051 1052
1052 1053 tl_dip = devi;
1053 1054
1054 1055 return (DDI_SUCCESS);
1055 1056 }
1056 1057
1057 1058 static int
1058 1059 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1059 1060 {
1060 1061 int i;
1061 1062
1062 1063 if (cmd == DDI_SUSPEND)
1063 1064 return (DDI_SUCCESS);
1064 1065
1065 1066 if (cmd != DDI_DETACH)
1066 1067 return (DDI_FAILURE);
1067 1068
1068 1069 /*
1069 1070 * Destroy arenas and hash tables.
1070 1071 */
1071 1072 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1072 1073 tl_transport_state_t *t = &tl_transports[i];
1073 1074
1074 1075 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1075 1076 continue;
1076 1077
1077 1078 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1078 1079 if (t->tr_serializer != NULL) {
1079 1080 tl_serializer_refrele(t->tr_serializer);
1080 1081 t->tr_serializer = NULL;
1081 1082 }
1082 1083
1083 1084 #ifdef _ILP32
1084 1085 if (i & TL_SOCKET)
1085 1086 mod_hash_destroy_idhash(t->tr_ai_hash);
1086 1087 else
1087 1088 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1088 1089 #else
1089 1090 mod_hash_destroy_idhash(t->tr_ai_hash);
1090 1091 #endif /* _ILP32 */
1091 1092 t->tr_ai_hash = NULL;
1092 1093 if (i & TL_SOCKET)
1093 1094 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1094 1095 else
1095 1096 mod_hash_destroy_hash(t->tr_addr_hash);
1096 1097 t->tr_addr_hash = NULL;
1097 1098 }
1098 1099
1099 1100 kmem_cache_destroy(tl_cache);
1100 1101 tl_cache = NULL;
1101 1102 id_space_destroy(tl_minors);
1102 1103 tl_minors = NULL;
1103 1104 ddi_remove_minor_node(devi, NULL);
1104 1105 return (DDI_SUCCESS);
1105 1106 }
1106 1107
1107 1108 /* ARGSUSED */
1108 1109 static int
1109 1110 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1110 1111 {
1111 1112
1112 1113 int retcode = DDI_FAILURE;
1113 1114
1114 1115 switch (infocmd) {
1115 1116
1116 1117 case DDI_INFO_DEVT2DEVINFO:
1117 1118 if (tl_dip != NULL) {
1118 1119 *result = (void *)tl_dip;
1119 1120 retcode = DDI_SUCCESS;
1120 1121 }
1121 1122 break;
1122 1123
1123 1124 case DDI_INFO_DEVT2INSTANCE:
1124 1125 *result = (void *)0;
1125 1126 retcode = DDI_SUCCESS;
1126 1127 break;
1127 1128
1128 1129 default:
1129 1130 break;
1130 1131 }
1131 1132 return (retcode);
1132 1133 }
1133 1134
1134 1135 /*
1135 1136 * Endpoint reference management.
1136 1137 */
1137 1138 static void
1138 1139 tl_refhold(tl_endpt_t *tep)
1139 1140 {
1140 1141 atomic_add_32(&tep->te_refcnt, 1);
1141 1142 }
1142 1143
1143 1144 static void
1144 1145 tl_refrele(tl_endpt_t *tep)
1145 1146 {
1146 1147 ASSERT(tep->te_refcnt != 0);
1147 1148
1148 1149 if (atomic_add_32_nv(&tep->te_refcnt, -1) == 0)
1149 1150 tl_free(tep);
1150 1151 }
1151 1152
1152 1153 /*ARGSUSED*/
1153 1154 static int
1154 1155 tl_constructor(void *buf, void *cdrarg, int kmflags)
1155 1156 {
1156 1157 tl_endpt_t *tep = buf;
1157 1158
1158 1159 bzero(tep, sizeof (tl_endpt_t));
1159 1160 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1160 1161 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1161 1162 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1162 1163 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1163 1164 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1164 1165
1165 1166 return (0);
1166 1167 }
1167 1168
1168 1169 /*ARGSUSED*/
1169 1170 static void
1170 1171 tl_destructor(void *buf, void *cdrarg)
1171 1172 {
1172 1173 tl_endpt_t *tep = buf;
1173 1174
1174 1175 mutex_destroy(&tep->te_closelock);
1175 1176 cv_destroy(&tep->te_closecv);
1176 1177 mutex_destroy(&tep->te_srv_lock);
1177 1178 cv_destroy(&tep->te_srv_cv);
1178 1179 mutex_destroy(&tep->te_ser_lock);
1179 1180 }
1180 1181
1181 1182 static void
1182 1183 tl_free(tl_endpt_t *tep)
1183 1184 {
1184 1185 ASSERT(tep->te_refcnt == 0);
1185 1186 ASSERT(tep->te_transport != NULL);
1186 1187 ASSERT(tep->te_rq == NULL);
1187 1188 ASSERT(tep->te_wq == NULL);
1188 1189 ASSERT(tep->te_ser != NULL);
1189 1190 ASSERT(tep->te_ser_count == 0);
1190 1191 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1191 1192
1192 1193 if (IS_SOCKET(tep)) {
1193 1194 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1194 1195 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1195 1196 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1196 1197 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1197 1198 } else if (tep->te_abuf != NULL) {
1198 1199 kmem_free(tep->te_abuf, tep->te_alen);
1199 1200 tep->te_alen = -1; /* uninitialized */
1200 1201 tep->te_abuf = NULL;
1201 1202 } else {
1202 1203 ASSERT(tep->te_alen == -1);
1203 1204 }
1204 1205
1205 1206 id_free(tl_minors, tep->te_minor);
1206 1207 ASSERT(tep->te_credp == NULL);
1207 1208
1208 1209 if (tep->te_hash_hndl != NULL)
1209 1210 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1210 1211
1211 1212 if (IS_COTS(tep)) {
1212 1213 TL_REMOVE_PEER(tep->te_conp);
1213 1214 TL_REMOVE_PEER(tep->te_oconp);
1214 1215 tl_serializer_refrele(tep->te_ser);
1215 1216 tep->te_ser = NULL;
1216 1217 ASSERT(tep->te_nicon == 0);
1217 1218 ASSERT(list_head(&tep->te_iconp) == NULL);
1218 1219 } else {
1219 1220 ASSERT(tep->te_lastep == NULL);
1220 1221 ASSERT(list_head(&tep->te_flowlist) == NULL);
1221 1222 ASSERT(tep->te_flowq == NULL);
1222 1223 }
1223 1224
1224 1225 ASSERT(tep->te_bufcid == 0);
1225 1226 ASSERT(tep->te_timoutid == 0);
1226 1227 bzero(&tep->te_ap, sizeof (tep->te_ap));
1227 1228 tep->te_acceptor_id = 0;
1228 1229
1229 1230 ASSERT(tep->te_closewait == 0);
1230 1231 ASSERT(!tep->te_rsrv_active);
1231 1232 ASSERT(!tep->te_wsrv_active);
1232 1233 tep->te_closing = 0;
1233 1234 tep->te_nowsrv = B_FALSE;
1234 1235 tep->te_flag = 0;
1235 1236
1236 1237 kmem_cache_free(tl_cache, tep);
1237 1238 }
1238 1239
1239 1240 /*
1240 1241 * Allocate/free reference-counted wrappers for serializers.
1241 1242 */
1242 1243 static tl_serializer_t *
1243 1244 tl_serializer_alloc(int flags)
1244 1245 {
1245 1246 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1246 1247 serializer_t *ser;
1247 1248
1248 1249 if (s == NULL)
1249 1250 return (NULL);
1250 1251
1251 1252 ser = serializer_create(flags);
1252 1253
1253 1254 if (ser == NULL) {
1254 1255 kmem_free(s, sizeof (tl_serializer_t));
1255 1256 return (NULL);
1256 1257 }
1257 1258
1258 1259 s->ts_refcnt = 1;
1259 1260 s->ts_serializer = ser;
1260 1261 return (s);
1261 1262 }
1262 1263
1263 1264 static void
1264 1265 tl_serializer_refhold(tl_serializer_t *s)
1265 1266 {
1266 1267 atomic_add_32(&s->ts_refcnt, 1);
1267 1268 }
1268 1269
1269 1270 static void
1270 1271 tl_serializer_refrele(tl_serializer_t *s)
1271 1272 {
1272 1273 if (atomic_add_32_nv(&s->ts_refcnt, -1) == 0) {
1273 1274 serializer_destroy(s->ts_serializer);
1274 1275 kmem_free(s, sizeof (tl_serializer_t));
1275 1276 }
1276 1277 }
1277 1278
1278 1279 /*
1279 1280 * Post a request on the endpoint serializer. For COTS transports keep track of
1280 1281 * the number of pending requests.
1281 1282 */
1282 1283 static void
1283 1284 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1284 1285 {
1285 1286 if (IS_COTS(tep)) {
1286 1287 mutex_enter(&tep->te_ser_lock);
1287 1288 tep->te_ser_count++;
1288 1289 mutex_exit(&tep->te_ser_lock);
1289 1290 }
1290 1291 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1291 1292 }
1292 1293
1293 1294 /*
1294 1295 * Complete processing the request on the serializer. Decrement the counter for
1295 1296 * pending requests for COTS transports.
1296 1297 */
1297 1298 static void
1298 1299 tl_serializer_exit(tl_endpt_t *tep)
1299 1300 {
1300 1301 if (IS_COTS(tep)) {
1301 1302 mutex_enter(&tep->te_ser_lock);
1302 1303 ASSERT(tep->te_ser_count != 0);
1303 1304 tep->te_ser_count--;
1304 1305 mutex_exit(&tep->te_ser_lock);
1305 1306 }
1306 1307 }
1307 1308
1308 1309 /*
1309 1310 * Hash management functions.
1310 1311 */
1311 1312
1312 1313 /*
1313 1314 * Return TRUE if two addresses are equal, false otherwise.
1314 1315 */
1315 1316 static boolean_t
1316 1317 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1317 1318 {
1318 1319 return ((ap1->ta_alen > 0) &&
1319 1320 (ap1->ta_alen == ap2->ta_alen) &&
1320 1321 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1321 1322 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1322 1323 }
1323 1324
1324 1325 /*
1325 1326 * This function is called whenever an endpoint is found in the hash table.
1326 1327 */
1327 1328 /* ARGSUSED0 */
1328 1329 static void
1329 1330 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1330 1331 {
1331 1332 tl_refhold((tl_endpt_t *)val);
1332 1333 }
1333 1334
1334 1335 /*
1335 1336 * Address hash function.
1336 1337 */
1337 1338 /* ARGSUSED */
1338 1339 static uint_t
1339 1340 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1340 1341 {
1341 1342 tl_addr_t *ap = (tl_addr_t *)key;
1342 1343 size_t len = ap->ta_alen;
1343 1344 uchar_t *p = ap->ta_abuf;
1344 1345 uint_t i, g;
1345 1346
1346 1347 ASSERT((len > 0) && (p != NULL));
1347 1348
1348 1349 for (i = ap->ta_zoneid; len -- != 0; p++) {
1349 1350 i = (i << 4) + (*p);
1350 1351 if ((g = (i & 0xf0000000U)) != 0) {
1351 1352 i ^= (g >> 24);
1352 1353 i ^= g;
1353 1354 }
1354 1355 }
1355 1356 return (i);
1356 1357 }
1357 1358
1358 1359 /*
1359 1360 * This function is used by hash lookups. It compares two generic addresses.
1360 1361 */
1361 1362 static int
1362 1363 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1363 1364 {
1364 1365 #ifdef DEBUG
1365 1366 tl_addr_t *ap1 = (tl_addr_t *)key1;
1366 1367 tl_addr_t *ap2 = (tl_addr_t *)key2;
1367 1368
1368 1369 ASSERT(key1 != NULL);
1369 1370 ASSERT(key2 != NULL);
1370 1371
1371 1372 ASSERT(ap1->ta_abuf != NULL);
1372 1373 ASSERT(ap2->ta_abuf != NULL);
1373 1374 ASSERT(ap1->ta_alen > 0);
1374 1375 ASSERT(ap2->ta_alen > 0);
1375 1376 #endif
1376 1377
1377 1378 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1378 1379 }
1379 1380
1380 1381 /*
1381 1382 * Prevent endpoint from closing if possible.
1382 1383 * Return B_TRUE on success, B_FALSE on failure.
1383 1384 */
1384 1385 static boolean_t
1385 1386 tl_noclose(tl_endpt_t *tep)
1386 1387 {
1387 1388 boolean_t rc = B_FALSE;
1388 1389
1389 1390 mutex_enter(&tep->te_closelock);
1390 1391 if (! tep->te_closing) {
1391 1392 ASSERT(tep->te_closewait == 0);
1392 1393 tep->te_closewait++;
1393 1394 rc = B_TRUE;
1394 1395 }
1395 1396 mutex_exit(&tep->te_closelock);
1396 1397 return (rc);
1397 1398 }
1398 1399
1399 1400 /*
1400 1401 * Allow endpoint to close if needed.
1401 1402 */
1402 1403 static void
1403 1404 tl_closeok(tl_endpt_t *tep)
1404 1405 {
1405 1406 ASSERT(tep->te_closewait > 0);
1406 1407 mutex_enter(&tep->te_closelock);
1407 1408 ASSERT(tep->te_closewait == 1);
1408 1409 tep->te_closewait--;
1409 1410 cv_signal(&tep->te_closecv);
1410 1411 mutex_exit(&tep->te_closelock);
1411 1412 }
1412 1413
1413 1414 /*
1414 1415 * STREAMS open entry point.
1415 1416 */
1416 1417 /* ARGSUSED */
1417 1418 static int
1418 1419 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1419 1420 {
1420 1421 tl_endpt_t *tep;
1421 1422 minor_t minor = getminor(*devp);
1422 1423
1423 1424 /*
1424 1425 * Driver is called directly. Both CLONEOPEN and MODOPEN
1425 1426 * are illegal
1426 1427 */
1427 1428 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1428 1429 return (ENXIO);
1429 1430
1430 1431 if (rq->q_ptr != NULL)
1431 1432 return (0);
1432 1433
1433 1434 /* Minor number should specify the mode used for the driver. */
1434 1435 if ((minor >= TL_UNUSED))
1435 1436 return (ENXIO);
1436 1437
1437 1438 if (oflag & SO_SOCKSTR) {
1438 1439 minor |= TL_SOCKET;
1439 1440 }
1440 1441
1441 1442 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1442 1443 tep->te_refcnt = 1;
1443 1444 tep->te_cpid = curproc->p_pid;
1444 1445 rq->q_ptr = WR(rq)->q_ptr = tep;
1445 1446 tep->te_state = TS_UNBND;
1446 1447 tep->te_credp = credp;
1447 1448 crhold(credp);
1448 1449 tep->te_zoneid = getzoneid();
1449 1450
1450 1451 tep->te_flag = minor & TL_MINOR_MASK;
1451 1452 tep->te_transport = &tl_transports[minor];
1452 1453
1453 1454 /* Allocate a unique minor number for this instance. */
1454 1455 tep->te_minor = (minor_t)id_alloc(tl_minors);
1455 1456
1456 1457 /* Reserve hash handle for bind(). */
1457 1458 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1458 1459
1459 1460 /* Transport-specific initialization */
1460 1461 if (IS_COTS(tep)) {
1461 1462 /* Use private serializer */
1462 1463 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1463 1464
1464 1465 /* Create list for pending connections */
1465 1466 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1466 1467 offsetof(tl_icon_t, ti_node));
1467 1468 tep->te_qlen = 0;
1468 1469 tep->te_nicon = 0;
1469 1470 tep->te_oconp = NULL;
1470 1471 tep->te_conp = NULL;
1471 1472 } else {
1472 1473 /* Use shared serializer */
1473 1474 tep->te_ser = tep->te_transport->tr_serializer;
1474 1475 bzero(&tep->te_flows, sizeof (list_node_t));
1475 1476 /* Create list for flow control */
1476 1477 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1477 1478 offsetof(tl_endpt_t, te_flows));
1478 1479 tep->te_flowq = NULL;
1479 1480 tep->te_lastep = NULL;
1480 1481
1481 1482 }
1482 1483
1483 1484 /* Initialize endpoint address */
1484 1485 if (IS_SOCKET(tep)) {
1485 1486 /* Socket-specific address handling. */
1486 1487 tep->te_alen = TL_SOUX_ADDRLEN;
1487 1488 tep->te_abuf = &tep->te_uxaddr;
1488 1489 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1489 1490 tep->te_magic = SOU_MAGIC_IMPLICIT;
1490 1491 } else {
1491 1492 tep->te_alen = -1;
1492 1493 tep->te_abuf = NULL;
1493 1494 }
1494 1495
1495 1496 /* clone the driver */
1496 1497 *devp = makedevice(getmajor(*devp), tep->te_minor);
1497 1498
1498 1499 tep->te_rq = rq;
1499 1500 tep->te_wq = WR(rq);
1500 1501
1501 1502 #ifdef _ILP32
1502 1503 if (IS_SOCKET(tep))
1503 1504 tep->te_acceptor_id = tep->te_minor;
1504 1505 else
1505 1506 tep->te_acceptor_id = (t_uscalar_t)rq;
1506 1507 #else
1507 1508 tep->te_acceptor_id = tep->te_minor;
1508 1509 #endif /* _ILP32 */
1509 1510
1510 1511
1511 1512 qprocson(rq);
1512 1513
1513 1514 /*
1514 1515 * Insert acceptor ID in the hash. The AI hash always sleeps on
1515 1516 * insertion so insertion can't fail.
1516 1517 */
1517 1518 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1518 1519 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1519 1520 (mod_hash_val_t)tep);
1520 1521
1521 1522 return (0);
1522 1523 }
1523 1524
1524 1525 /* ARGSUSED1 */
1525 1526 static int
1526 1527 tl_close(queue_t *rq, int flag, cred_t *credp)
1527 1528 {
1528 1529 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1529 1530 tl_endpt_t *elp = NULL;
1530 1531 queue_t *wq = tep->te_wq;
1531 1532 int rc;
1532 1533
1533 1534 ASSERT(wq == WR(rq));
1534 1535
1535 1536 /*
1536 1537 * Remove the endpoint from acceptor hash.
1537 1538 */
1538 1539 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1539 1540 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1540 1541 (mod_hash_val_t *)&elp);
1541 1542 ASSERT(rc == 0 && tep == elp);
1542 1543 if ((rc != 0) || (tep != elp)) {
1543 1544 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1544 1545 SL_TRACE|SL_ERROR,
1545 1546 "tl_close:inconsistency in AI hash"));
1546 1547 }
1547 1548
1548 1549 /*
1549 1550 * Wait till close is safe, then mark endpoint as closing.
1550 1551 */
1551 1552 mutex_enter(&tep->te_closelock);
1552 1553 while (tep->te_closewait)
1553 1554 cv_wait(&tep->te_closecv, &tep->te_closelock);
1554 1555 tep->te_closing = B_TRUE;
1555 1556 /*
1556 1557 * Will wait for the serializer part of the close to finish, so set
1557 1558 * te_closewait now.
1558 1559 */
1559 1560 tep->te_closewait = 1;
1560 1561 tep->te_nowsrv = B_FALSE;
1561 1562 mutex_exit(&tep->te_closelock);
1562 1563
1563 1564 /*
1564 1565 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1565 1566 * It is safe because close will wait for tl_close_ser to finish.
1566 1567 */
1567 1568 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1568 1569
1569 1570 /*
1570 1571 * Wait for the first phase of close to complete before qprocsoff().
1571 1572 */
1572 1573 mutex_enter(&tep->te_closelock);
1573 1574 while (tep->te_closewait)
1574 1575 cv_wait(&tep->te_closecv, &tep->te_closelock);
1575 1576 mutex_exit(&tep->te_closelock);
1576 1577
1577 1578 qprocsoff(rq);
1578 1579
1579 1580 if (tep->te_bufcid) {
1580 1581 qunbufcall(rq, tep->te_bufcid);
1581 1582 tep->te_bufcid = 0;
1582 1583 }
1583 1584 if (tep->te_timoutid) {
1584 1585 (void) quntimeout(rq, tep->te_timoutid);
1585 1586 tep->te_timoutid = 0;
1586 1587 }
1587 1588
1588 1589 /*
1589 1590 * Finish close behind serializer.
1590 1591 *
1591 1592 * For a CLTS endpoint increase a refcount and continue close processing
1592 1593 * with serializer protection. This processing may happen asynchronously
1593 1594 * with the completion of tl_close().
1594 1595 *
1595 1596 * Fot a COTS endpoint wait before destroying tep since the serializer
1596 1597 * may go away together with tep and we need to destroy serializer
1597 1598 * outside of serializer context.
1598 1599 */
1599 1600 ASSERT(tep->te_closewait == 0);
1600 1601 if (IS_COTS(tep))
1601 1602 tep->te_closewait = 1;
1602 1603 else
1603 1604 tl_refhold(tep);
1604 1605
1605 1606 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1606 1607
1607 1608 /*
1608 1609 * For connection-oriented transports wait for all serializer activity
1609 1610 * to settle down.
1610 1611 */
1611 1612 if (IS_COTS(tep)) {
1612 1613 mutex_enter(&tep->te_closelock);
1613 1614 while (tep->te_closewait)
1614 1615 cv_wait(&tep->te_closecv, &tep->te_closelock);
1615 1616 mutex_exit(&tep->te_closelock);
1616 1617 }
1617 1618
1618 1619 crfree(tep->te_credp);
1619 1620 tep->te_credp = NULL;
1620 1621 tep->te_wq = NULL;
1621 1622 tl_refrele(tep);
1622 1623 /*
1623 1624 * tep is likely to be destroyed now, so can't reference it any more.
1624 1625 */
1625 1626
1626 1627 rq->q_ptr = wq->q_ptr = NULL;
1627 1628 return (0);
1628 1629 }
1629 1630
1630 1631 /*
1631 1632 * First phase of close processing done behind the serializer.
1632 1633 *
1633 1634 * Do not drop the reference in the end - tl_close() wants this reference to
1634 1635 * stay.
1635 1636 */
1636 1637 /* ARGSUSED0 */
1637 1638 static void
1638 1639 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1639 1640 {
1640 1641 ASSERT(tep->te_closing);
1641 1642 ASSERT(tep->te_closewait == 1);
1642 1643 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1643 1644
1644 1645 tep->te_flag |= TL_CLOSE_SER;
1645 1646
1646 1647 /*
1647 1648 * Drain out all messages on queue except for TL_TICOTS where the
1648 1649 * abortive release semantics permit discarding of data on close
1649 1650 */
1650 1651 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1651 1652 tl_wsrv_ser(NULL, tep);
1652 1653 }
1653 1654
1654 1655 /* Remove address from hash table. */
1655 1656 tl_addr_unbind(tep);
1656 1657 /*
1657 1658 * qprocsoff() gets confused when q->q_next is not NULL on the write
1658 1659 * queue of the driver, so clear these before qprocsoff() is called.
1659 1660 * Also clear q_next for the peer since this queue is going away.
1660 1661 */
1661 1662 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1662 1663 tl_endpt_t *peer_tep = tep->te_conp;
1663 1664
1664 1665 tep->te_wq->q_next = NULL;
1665 1666 if ((peer_tep != NULL) && !peer_tep->te_closing)
1666 1667 peer_tep->te_wq->q_next = NULL;
1667 1668 }
1668 1669
1669 1670 tep->te_rq = NULL;
1670 1671
1671 1672 /* wake up tl_close() */
1672 1673 tl_closeok(tep);
1673 1674 tl_serializer_exit(tep);
1674 1675 }
1675 1676
1676 1677 /*
1677 1678 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1678 1679 * the reference for CLTS.
1679 1680 *
1680 1681 * Called from serializer. Should drop reference count for CLTS only.
1681 1682 */
1682 1683 /* ARGSUSED0 */
1683 1684 static void
1684 1685 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1685 1686 {
1686 1687 ASSERT(tep->te_closing);
1687 1688 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1688 1689 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1689 1690
1690 1691 tep->te_state = -1; /* Uninitialized */
1691 1692 if (IS_COTS(tep)) {
1692 1693 tl_co_unconnect(tep);
1693 1694 } else {
1694 1695 /* Connectionless specific cleanup */
1695 1696 TL_REMOVE_PEER(tep->te_lastep);
1696 1697 /*
1697 1698 * Backenable anybody that is flow controlled waiting for
1698 1699 * this endpoint.
1699 1700 */
1700 1701 tl_cl_backenable(tep);
1701 1702 if (tep->te_flowq != NULL) {
1702 1703 list_remove(&(tep->te_flowq->te_flowlist), tep);
1703 1704 tep->te_flowq = NULL;
1704 1705 }
1705 1706 }
1706 1707
1707 1708 tl_serializer_exit(tep);
1708 1709 if (IS_COTS(tep))
1709 1710 tl_closeok(tep);
1710 1711 else
1711 1712 tl_refrele(tep);
1712 1713 }
1713 1714
1714 1715 /*
1715 1716 * STREAMS write-side put procedure.
1716 1717 * Enter serializer for most of the processing.
1717 1718 *
1718 1719 * The T_CONN_REQ is processed outside of serializer.
1719 1720 */
1720 1721 static void
1721 1722 tl_wput(queue_t *wq, mblk_t *mp)
1722 1723 {
1723 1724 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1724 1725 ssize_t msz = MBLKL(mp);
1725 1726 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1726 1727 tlproc_t *tl_proc = NULL;
1727 1728
1728 1729 switch (DB_TYPE(mp)) {
1729 1730 case M_DATA:
1730 1731 /* Only valid for connection-oriented transports */
1731 1732 if (IS_CLTS(tep)) {
1732 1733 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1733 1734 SL_TRACE|SL_ERROR,
1734 1735 "tl_wput:M_DATA invalid for ticlts driver"));
1735 1736 tl_merror(wq, mp, EPROTO);
1736 1737 return;
1737 1738 }
1738 1739 tl_proc = tl_wput_data_ser;
1739 1740 break;
1740 1741
1741 1742 case M_IOCTL:
1742 1743 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1743 1744 case TL_IOC_CREDOPT:
1744 1745 /* FALLTHROUGH */
1745 1746 case TL_IOC_UCREDOPT:
1746 1747 /*
1747 1748 * Serialize endpoint state change.
1748 1749 */
1749 1750 tl_proc = tl_do_ioctl_ser;
1750 1751 break;
1751 1752
1752 1753 default:
1753 1754 miocnak(wq, mp, 0, EINVAL);
1754 1755 return;
1755 1756 }
1756 1757 break;
1757 1758
1758 1759 case M_FLUSH:
1759 1760 /*
1760 1761 * do canonical M_FLUSH processing
1761 1762 */
1762 1763 if (*mp->b_rptr & FLUSHW) {
1763 1764 flushq(wq, FLUSHALL);
1764 1765 *mp->b_rptr &= ~FLUSHW;
1765 1766 }
1766 1767 if (*mp->b_rptr & FLUSHR) {
1767 1768 flushq(RD(wq), FLUSHALL);
1768 1769 qreply(wq, mp);
1769 1770 } else {
1770 1771 freemsg(mp);
1771 1772 }
1772 1773 return;
1773 1774
1774 1775 case M_PROTO:
1775 1776 if (msz < sizeof (prim->type)) {
1776 1777 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1777 1778 SL_TRACE|SL_ERROR,
1778 1779 "tl_wput:M_PROTO data too short"));
1779 1780 tl_merror(wq, mp, EPROTO);
1780 1781 return;
1781 1782 }
1782 1783 switch (prim->type) {
1783 1784 case T_OPTMGMT_REQ:
1784 1785 case T_SVR4_OPTMGMT_REQ:
1785 1786 /*
1786 1787 * Process TPI option management requests immediately
1787 1788 * in put procedure regardless of in-order processing
1788 1789 * of already queued messages.
1789 1790 * (Note: This driver supports AF_UNIX socket
1790 1791 * implementation. Unless we implement this processing,
1791 1792 * setsockopt() on socket endpoint will block on flow
1792 1793 * controlled endpoints which it should not. That is
1793 1794 * required for successful execution of VSU socket tests
1794 1795 * and is consistent with BSD socket behavior).
1795 1796 */
1796 1797 tl_optmgmt(wq, mp);
1797 1798 return;
1798 1799 case O_T_BIND_REQ:
1799 1800 case T_BIND_REQ:
1800 1801 tl_proc = tl_bind_ser;
1801 1802 break;
1802 1803 case T_CONN_REQ:
1803 1804 if (IS_CLTS(tep)) {
1804 1805 tl_merror(wq, mp, EPROTO);
1805 1806 return;
1806 1807 }
1807 1808 tl_conn_req(wq, mp);
1808 1809 return;
1809 1810 case T_DATA_REQ:
1810 1811 case T_OPTDATA_REQ:
1811 1812 case T_EXDATA_REQ:
1812 1813 case T_ORDREL_REQ:
1813 1814 tl_proc = tl_putq_ser;
1814 1815 break;
1815 1816 case T_UNITDATA_REQ:
1816 1817 if (IS_COTS(tep) ||
1817 1818 (msz < sizeof (struct T_unitdata_req))) {
1818 1819 tl_merror(wq, mp, EPROTO);
1819 1820 return;
1820 1821 }
1821 1822 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1822 1823 tl_proc = tl_unitdata_ser;
1823 1824 } else {
1824 1825 tl_proc = tl_putq_ser;
1825 1826 }
1826 1827 break;
1827 1828 default:
1828 1829 /*
1829 1830 * process in service procedure if message already
1830 1831 * queued (maintain in-order processing)
1831 1832 */
1832 1833 if (wq->q_first != NULL) {
1833 1834 tl_proc = tl_putq_ser;
1834 1835 } else {
1835 1836 tl_proc = tl_wput_ser;
1836 1837 }
1837 1838 break;
1838 1839 }
1839 1840 break;
1840 1841
1841 1842 case M_PCPROTO:
1842 1843 /*
1843 1844 * Check that the message has enough data to figure out TPI
1844 1845 * primitive.
1845 1846 */
1846 1847 if (msz < sizeof (prim->type)) {
1847 1848 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1848 1849 SL_TRACE|SL_ERROR,
1849 1850 "tl_wput:M_PCROTO data too short"));
1850 1851 tl_merror(wq, mp, EPROTO);
1851 1852 return;
1852 1853 }
1853 1854 switch (prim->type) {
1854 1855 case T_CAPABILITY_REQ:
1855 1856 tl_capability_req(mp, tep);
1856 1857 return;
1857 1858 case T_INFO_REQ:
1858 1859 tl_proc = tl_info_req_ser;
1859 1860 break;
1860 1861 case T_ADDR_REQ:
1861 1862 tl_proc = tl_addr_req_ser;
1862 1863 break;
1863 1864
1864 1865 default:
1865 1866 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1866 1867 SL_TRACE|SL_ERROR,
1867 1868 "tl_wput:unknown TPI msg primitive"));
1868 1869 tl_merror(wq, mp, EPROTO);
1869 1870 return;
1870 1871 }
1871 1872 break;
1872 1873 default:
1873 1874 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1874 1875 "tl_wput:default:unexpected Streams message"));
1875 1876 freemsg(mp);
1876 1877 return;
1877 1878 }
1878 1879
1879 1880 /*
1880 1881 * Continue processing via serializer.
1881 1882 */
1882 1883 ASSERT(tl_proc != NULL);
1883 1884 tl_refhold(tep);
1884 1885 tl_serializer_enter(tep, tl_proc, mp);
1885 1886 }
1886 1887
1887 1888 /*
1888 1889 * Place message on the queue while preserving order.
1889 1890 */
1890 1891 static void
1891 1892 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1892 1893 {
1893 1894 if (tep->te_closing) {
1894 1895 tl_wput_ser(mp, tep);
1895 1896 } else {
1896 1897 TL_PUTQ(tep, mp);
1897 1898 tl_serializer_exit(tep);
1898 1899 tl_refrele(tep);
1899 1900 }
1900 1901
1901 1902 }
1902 1903
1903 1904 static void
1904 1905 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1905 1906 {
1906 1907 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1907 1908
1908 1909 switch (DB_TYPE(mp)) {
1909 1910 case M_DATA:
1910 1911 tl_data(mp, tep);
1911 1912 break;
1912 1913 case M_PROTO:
1913 1914 tl_do_proto(mp, tep);
1914 1915 break;
1915 1916 default:
1916 1917 freemsg(mp);
1917 1918 break;
1918 1919 }
1919 1920 }
1920 1921
1921 1922 /*
1922 1923 * Write side put procedure called from serializer.
1923 1924 */
1924 1925 static void
1925 1926 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1926 1927 {
1927 1928 tl_wput_common_ser(mp, tep);
1928 1929 tl_serializer_exit(tep);
1929 1930 tl_refrele(tep);
1930 1931 }
1931 1932
1932 1933 /*
1933 1934 * M_DATA processing. Called from serializer.
1934 1935 */
1935 1936 static void
1936 1937 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1937 1938 {
1938 1939 tl_endpt_t *peer_tep = tep->te_conp;
1939 1940 queue_t *peer_rq;
1940 1941
1941 1942 ASSERT(DB_TYPE(mp) == M_DATA);
1942 1943 ASSERT(IS_COTS(tep));
1943 1944
1944 1945 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1945 1946
1946 1947 /*
1947 1948 * fastpath for data. Ignore flow control if tep is closing.
1948 1949 */
1949 1950 if ((peer_tep != NULL) &&
1950 1951 !peer_tep->te_closing &&
1951 1952 ((tep->te_state == TS_DATA_XFER) ||
1952 1953 (tep->te_state == TS_WREQ_ORDREL)) &&
1953 1954 (tep->te_wq != NULL) &&
1954 1955 (tep->te_wq->q_first == NULL) &&
1955 1956 ((peer_tep->te_state == TS_DATA_XFER) ||
1956 1957 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1957 1958 ((peer_rq = peer_tep->te_rq) != NULL) &&
1958 1959 (canputnext(peer_rq) || tep->te_closing)) {
1959 1960 putnext(peer_rq, mp);
1960 1961 } else if (tep->te_closing) {
1961 1962 /*
1962 1963 * It is possible that by the time we got here tep started to
1963 1964 * close. If the write queue is not empty, and the state is
1964 1965 * TS_DATA_XFER the data should be delivered in order, so we
1965 1966 * call putq() instead of freeing the data.
1966 1967 */
1967 1968 if ((tep->te_wq != NULL) &&
1968 1969 ((tep->te_state == TS_DATA_XFER) ||
1969 1970 (tep->te_state == TS_WREQ_ORDREL))) {
1970 1971 TL_PUTQ(tep, mp);
1971 1972 } else {
1972 1973 freemsg(mp);
1973 1974 }
1974 1975 } else {
1975 1976 TL_PUTQ(tep, mp);
1976 1977 }
1977 1978
1978 1979 tl_serializer_exit(tep);
1979 1980 tl_refrele(tep);
1980 1981 }
1981 1982
1982 1983 /*
1983 1984 * Write side service routine.
1984 1985 *
1985 1986 * All actual processing happens within serializer which is entered
1986 1987 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1987 1988 * messages that need processing may have arrived, so tl_wsrv repeats until
1988 1989 * queue is empty or te_nowsrv is set.
1989 1990 */
1990 1991 static void
1991 1992 tl_wsrv(queue_t *wq)
1992 1993 {
1993 1994 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1994 1995
1995 1996 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1996 1997 mutex_enter(&tep->te_srv_lock);
1997 1998 ASSERT(tep->te_wsrv_active == B_FALSE);
1998 1999 tep->te_wsrv_active = B_TRUE;
1999 2000 mutex_exit(&tep->te_srv_lock);
2000 2001
2001 2002 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2002 2003
2003 2004 /*
2004 2005 * Wait for serializer job to complete.
2005 2006 */
2006 2007 mutex_enter(&tep->te_srv_lock);
2007 2008 while (tep->te_wsrv_active) {
2008 2009 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2009 2010 }
2010 2011 cv_signal(&tep->te_srv_cv);
2011 2012 mutex_exit(&tep->te_srv_lock);
2012 2013 }
2013 2014 }
2014 2015
2015 2016 /*
2016 2017 * Serialized write side processing of the STREAMS queue.
2017 2018 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2018 2019 * is NULL.
2019 2020 */
2020 2021 static void
2021 2022 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2022 2023 {
2023 2024 mblk_t *mp;
2024 2025 queue_t *wq = tep->te_wq;
2025 2026
2026 2027 ASSERT(wq != NULL);
2027 2028 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2028 2029 tl_wput_common_ser(mp, tep);
2029 2030 }
2030 2031
2031 2032 /*
2032 2033 * Wakeup service routine unless called from close.
2033 2034 * If ser_mp is specified, the caller is tl_wsrv().
2034 2035 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2035 2036 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2036 2037 * be no matching tl_serializer_exit() in this case.
2037 2038 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2038 2039 * waiting on te_srv_cv.
2039 2040 */
2040 2041 if (ser_mp != NULL) {
2041 2042 /*
2042 2043 * We are called from tl_wsrv.
2043 2044 */
2044 2045 mutex_enter(&tep->te_srv_lock);
2045 2046 ASSERT(tep->te_wsrv_active);
2046 2047 tep->te_wsrv_active = B_FALSE;
2047 2048 cv_signal(&tep->te_srv_cv);
2048 2049 mutex_exit(&tep->te_srv_lock);
2049 2050 tl_serializer_exit(tep);
2050 2051 }
2051 2052 }
2052 2053
2053 2054 /*
2054 2055 * Called when the stream is backenabled. Enter serializer and qenable everyone
2055 2056 * flow controlled by tep.
2056 2057 *
2057 2058 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2058 2059 * is possible that two instances of tl_rsrv will be running reusing the same
2059 2060 * rsrv mblk.
2060 2061 */
2061 2062 static void
2062 2063 tl_rsrv(queue_t *rq)
2063 2064 {
2064 2065 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2065 2066
2066 2067 ASSERT(rq->q_first == NULL);
2067 2068 ASSERT(tep->te_rsrv_active == 0);
2068 2069
2069 2070 tep->te_rsrv_active = B_TRUE;
2070 2071 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2071 2072 /*
2072 2073 * Wait for serializer job to complete.
2073 2074 */
2074 2075 mutex_enter(&tep->te_srv_lock);
2075 2076 while (tep->te_rsrv_active) {
2076 2077 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2077 2078 }
2078 2079 cv_signal(&tep->te_srv_cv);
2079 2080 mutex_exit(&tep->te_srv_lock);
2080 2081 }
2081 2082
2082 2083 /* ARGSUSED */
2083 2084 static void
2084 2085 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2085 2086 {
2086 2087 tl_endpt_t *peer_tep;
2087 2088
2088 2089 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2089 2090 tl_cl_backenable(tep);
2090 2091 } else if (
2091 2092 IS_COTS(tep) &&
2092 2093 ((peer_tep = tep->te_conp) != NULL) &&
2093 2094 !peer_tep->te_closing &&
2094 2095 ((tep->te_state == TS_DATA_XFER) ||
2095 2096 (tep->te_state == TS_WIND_ORDREL)||
2096 2097 (tep->te_state == TS_WREQ_ORDREL))) {
2097 2098 TL_QENABLE(peer_tep);
2098 2099 }
2099 2100
2100 2101 /*
2101 2102 * Wakeup read side service routine.
2102 2103 */
2103 2104 mutex_enter(&tep->te_srv_lock);
2104 2105 ASSERT(tep->te_rsrv_active);
2105 2106 tep->te_rsrv_active = B_FALSE;
2106 2107 cv_signal(&tep->te_srv_cv);
2107 2108 mutex_exit(&tep->te_srv_lock);
2108 2109 tl_serializer_exit(tep);
2109 2110 }
2110 2111
2111 2112 /*
2112 2113 * process M_PROTO messages. Always called from serializer.
2113 2114 */
2114 2115 static void
2115 2116 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2116 2117 {
2117 2118 ssize_t msz = MBLKL(mp);
2118 2119 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2119 2120
2120 2121 /* Message size was validated by tl_wput(). */
2121 2122 ASSERT(msz >= sizeof (prim->type));
2122 2123
2123 2124 switch (prim->type) {
2124 2125 case T_UNBIND_REQ:
2125 2126 tl_unbind(mp, tep);
2126 2127 break;
2127 2128
2128 2129 case T_ADDR_REQ:
2129 2130 tl_addr_req(mp, tep);
2130 2131 break;
2131 2132
2132 2133 case O_T_CONN_RES:
2133 2134 case T_CONN_RES:
2134 2135 if (IS_CLTS(tep)) {
2135 2136 tl_merror(tep->te_wq, mp, EPROTO);
2136 2137 break;
2137 2138 }
2138 2139 tl_conn_res(mp, tep);
2139 2140 break;
2140 2141
2141 2142 case T_DISCON_REQ:
2142 2143 if (IS_CLTS(tep)) {
2143 2144 tl_merror(tep->te_wq, mp, EPROTO);
2144 2145 break;
2145 2146 }
2146 2147 tl_discon_req(mp, tep);
2147 2148 break;
2148 2149
2149 2150 case T_DATA_REQ:
2150 2151 if (IS_CLTS(tep)) {
2151 2152 tl_merror(tep->te_wq, mp, EPROTO);
2152 2153 break;
2153 2154 }
2154 2155 tl_data(mp, tep);
2155 2156 break;
2156 2157
2157 2158 case T_OPTDATA_REQ:
2158 2159 if (IS_CLTS(tep)) {
2159 2160 tl_merror(tep->te_wq, mp, EPROTO);
2160 2161 break;
2161 2162 }
2162 2163 tl_data(mp, tep);
2163 2164 break;
2164 2165
2165 2166 case T_EXDATA_REQ:
2166 2167 if (IS_CLTS(tep)) {
2167 2168 tl_merror(tep->te_wq, mp, EPROTO);
2168 2169 break;
2169 2170 }
2170 2171 tl_exdata(mp, tep);
2171 2172 break;
2172 2173
2173 2174 case T_ORDREL_REQ:
2174 2175 if (! IS_COTSORD(tep)) {
2175 2176 tl_merror(tep->te_wq, mp, EPROTO);
2176 2177 break;
2177 2178 }
2178 2179 tl_ordrel(mp, tep);
2179 2180 break;
2180 2181
2181 2182 case T_UNITDATA_REQ:
2182 2183 if (IS_COTS(tep)) {
2183 2184 tl_merror(tep->te_wq, mp, EPROTO);
2184 2185 break;
2185 2186 }
2186 2187 tl_unitdata(mp, tep);
2187 2188 break;
2188 2189
2189 2190 default:
2190 2191 tl_merror(tep->te_wq, mp, EPROTO);
2191 2192 break;
2192 2193 }
2193 2194 }
2194 2195
2195 2196 /*
2196 2197 * Process ioctl from serializer.
2197 2198 * This is a wrapper around tl_do_ioctl().
2198 2199 */
2199 2200 static void
2200 2201 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2201 2202 {
2202 2203 if (! tep->te_closing)
2203 2204 tl_do_ioctl(mp, tep);
2204 2205 else
2205 2206 freemsg(mp);
2206 2207
2207 2208 tl_serializer_exit(tep);
2208 2209 tl_refrele(tep);
2209 2210 }
2210 2211
2211 2212 static void
2212 2213 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2213 2214 {
2214 2215 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2215 2216 int cmd = iocbp->ioc_cmd;
2216 2217 queue_t *wq = tep->te_wq;
2217 2218 int error;
2218 2219 int thisopt, otheropt;
2219 2220
2220 2221 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2221 2222
2222 2223 switch (cmd) {
2223 2224 case TL_IOC_CREDOPT:
2224 2225 if (cmd == TL_IOC_CREDOPT) {
2225 2226 thisopt = TL_SETCRED;
2226 2227 otheropt = TL_SETUCRED;
2227 2228 } else {
2228 2229 /* FALLTHROUGH */
2229 2230 case TL_IOC_UCREDOPT:
2230 2231 thisopt = TL_SETUCRED;
2231 2232 otheropt = TL_SETCRED;
2232 2233 }
2233 2234 /*
2234 2235 * The credentials passing does not apply to sockets.
2235 2236 * Only one of the cred options can be set at a given time.
2236 2237 */
2237 2238 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2238 2239 miocnak(wq, mp, 0, EINVAL);
2239 2240 return;
2240 2241 }
2241 2242
2242 2243 /*
2243 2244 * Turn on generation of credential options for
2244 2245 * T_conn_req, T_conn_con, T_unidata_ind.
2245 2246 */
2246 2247 error = miocpullup(mp, sizeof (uint32_t));
2247 2248 if (error != 0) {
2248 2249 miocnak(wq, mp, 0, error);
2249 2250 return;
2250 2251 }
2251 2252 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2252 2253 miocnak(wq, mp, 0, EINVAL);
2253 2254 return;
2254 2255 }
2255 2256
2256 2257 if (*(uint32_t *)mp->b_cont->b_rptr)
2257 2258 tep->te_flag |= thisopt;
2258 2259 else
2259 2260 tep->te_flag &= ~thisopt;
2260 2261
2261 2262 miocack(wq, mp, 0, 0);
2262 2263 break;
2263 2264
2264 2265 default:
2265 2266 /* Should not be here */
2266 2267 miocnak(wq, mp, 0, EINVAL);
2267 2268 break;
2268 2269 }
2269 2270 }
2270 2271
2271 2272
2272 2273 /*
2273 2274 * send T_ERROR_ACK
2274 2275 * Note: assumes enough memory or caller passed big enough mp
2275 2276 * - no recovery from allocb failures
2276 2277 */
2277 2278
2278 2279 static void
2279 2280 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2280 2281 t_scalar_t unix_err, t_scalar_t type)
2281 2282 {
2282 2283 struct T_error_ack *err_ack;
2283 2284 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2284 2285 M_PCPROTO, T_ERROR_ACK);
2285 2286
2286 2287 if (ackmp == NULL) {
2287 2288 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2288 2289 "tl_error_ack:out of mblk memory"));
2289 2290 tl_merror(wq, NULL, ENOSR);
2290 2291 return;
2291 2292 }
2292 2293 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2293 2294 err_ack->ERROR_prim = type;
2294 2295 err_ack->TLI_error = tli_err;
2295 2296 err_ack->UNIX_error = unix_err;
2296 2297
2297 2298 /*
2298 2299 * send error ack message
2299 2300 */
2300 2301 qreply(wq, ackmp);
2301 2302 }
2302 2303
2303 2304
2304 2305
2305 2306 /*
2306 2307 * send T_OK_ACK
2307 2308 * Note: assumes enough memory or caller passed big enough mp
2308 2309 * - no recovery from allocb failures
2309 2310 */
2310 2311 static void
2311 2312 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2312 2313 {
2313 2314 struct T_ok_ack *ok_ack;
2314 2315 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2315 2316 M_PCPROTO, T_OK_ACK);
2316 2317
2317 2318 if (ackmp == NULL) {
2318 2319 tl_merror(wq, NULL, ENOMEM);
2319 2320 return;
2320 2321 }
2321 2322
2322 2323 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2323 2324 ok_ack->CORRECT_prim = type;
2324 2325
2325 2326 (void) qreply(wq, ackmp);
2326 2327 }
2327 2328
2328 2329 /*
2329 2330 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2330 2331 * This is a wrapper around tl_bind().
2331 2332 */
2332 2333 static void
2333 2334 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2334 2335 {
2335 2336 if (! tep->te_closing)
2336 2337 tl_bind(mp, tep);
2337 2338 else
2338 2339 freemsg(mp);
2339 2340
2340 2341 tl_serializer_exit(tep);
2341 2342 tl_refrele(tep);
2342 2343 }
2343 2344
2344 2345 /*
2345 2346 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2346 2347 * Assumes that the endpoint is in the unbound.
2347 2348 */
2348 2349 static void
2349 2350 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2350 2351 {
2351 2352 queue_t *wq = tep->te_wq;
2352 2353 struct T_bind_ack *b_ack;
2353 2354 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2354 2355 mblk_t *ackmp, *bamp;
2355 2356 soux_addr_t ux_addr;
2356 2357 t_uscalar_t qlen = 0;
2357 2358 t_scalar_t alen, aoff;
2358 2359 tl_addr_t addr_req;
2359 2360 void *addr_startp;
2360 2361 ssize_t msz = MBLKL(mp), basize;
2361 2362 t_scalar_t tli_err = 0, unix_err = 0;
2362 2363 t_scalar_t save_prim_type = bind->PRIM_type;
2363 2364 t_scalar_t save_state = tep->te_state;
2364 2365
2365 2366 if (tep->te_state != TS_UNBND) {
2366 2367 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2367 2368 SL_TRACE|SL_ERROR,
2368 2369 "tl_wput:bind_request:out of state, state=%d",
2369 2370 tep->te_state));
2370 2371 tli_err = TOUTSTATE;
2371 2372 goto error;
2372 2373 }
2373 2374
2374 2375 if (msz < sizeof (struct T_bind_req)) {
2375 2376 tli_err = TSYSERR; unix_err = EINVAL;
2376 2377 goto error;
2377 2378 }
2378 2379
2379 2380 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2380 2381
2381 2382 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2382 2383 (bind->PRIM_type == T_BIND_REQ));
2383 2384
2384 2385 alen = bind->ADDR_length;
2385 2386 aoff = bind->ADDR_offset;
2386 2387
2387 2388 /* negotiate max conn req pending */
2388 2389 if (IS_COTS(tep)) {
2389 2390 qlen = bind->CONIND_number;
2390 2391 if (qlen > tl_maxqlen)
2391 2392 qlen = tl_maxqlen;
2392 2393 }
2393 2394
2394 2395 /*
2395 2396 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2396 2397 * and bound again.
2397 2398 */
2398 2399 if ((tep->te_hash_hndl == NULL) &&
2399 2400 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2400 2401 mod_hash_reserve_nosleep(tep->te_addrhash,
2401 2402 &tep->te_hash_hndl) != 0) {
2402 2403 tli_err = TSYSERR; unix_err = ENOSR;
2403 2404 goto error;
2404 2405 }
2405 2406
2406 2407 /*
2407 2408 * Verify address correctness.
2408 2409 */
2409 2410 if (IS_SOCKET(tep)) {
2410 2411 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2411 2412
2412 2413 if ((alen != TL_SOUX_ADDRLEN) ||
2413 2414 (aoff < 0) ||
2414 2415 (aoff + alen > msz)) {
2415 2416 (void) (STRLOG(TL_ID, tep->te_minor,
2416 2417 1, SL_TRACE|SL_ERROR,
2417 2418 "tl_bind: invalid socket addr"));
2418 2419 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2419 2420 tli_err = TSYSERR; unix_err = EINVAL;
2420 2421 goto error;
2421 2422 }
2422 2423 /* Copy address from message to local buffer. */
2423 2424 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2424 2425 /*
2425 2426 * Check that we got correct address from sockets
2426 2427 */
2427 2428 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2428 2429 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2429 2430 (void) (STRLOG(TL_ID, tep->te_minor,
2430 2431 1, SL_TRACE|SL_ERROR,
2431 2432 "tl_bind: invalid socket magic"));
2432 2433 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2433 2434 tli_err = TSYSERR; unix_err = EINVAL;
2434 2435 goto error;
2435 2436 }
2436 2437 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2437 2438 (ux_addr.soua_vp != NULL)) {
2438 2439 (void) (STRLOG(TL_ID, tep->te_minor,
2439 2440 1, SL_TRACE|SL_ERROR,
2440 2441 "tl_bind: implicit addr non-empty"));
2441 2442 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2442 2443 tli_err = TSYSERR; unix_err = EINVAL;
2443 2444 goto error;
2444 2445 }
2445 2446 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2446 2447 (ux_addr.soua_vp == NULL)) {
2447 2448 (void) (STRLOG(TL_ID, tep->te_minor,
2448 2449 1, SL_TRACE|SL_ERROR,
2449 2450 "tl_bind: explicit addr empty"));
2450 2451 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2451 2452 tli_err = TSYSERR; unix_err = EINVAL;
2452 2453 goto error;
2453 2454 }
2454 2455 } else {
2455 2456 if ((alen > 0) && ((aoff < 0) ||
2456 2457 ((ssize_t)(aoff + alen) > msz) ||
2457 2458 ((aoff + alen) < 0))) {
2458 2459 (void) (STRLOG(TL_ID, tep->te_minor,
2459 2460 1, SL_TRACE|SL_ERROR,
2460 2461 "tl_bind: invalid message"));
2461 2462 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2462 2463 tli_err = TSYSERR; unix_err = EINVAL;
2463 2464 goto error;
2464 2465 }
2465 2466 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2466 2467 (void) (STRLOG(TL_ID, tep->te_minor,
2467 2468 1, SL_TRACE|SL_ERROR,
2468 2469 "tl_bind: bad addr in message"));
2469 2470 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2470 2471 tli_err = TBADADDR;
2471 2472 goto error;
2472 2473 }
2473 2474 #ifdef DEBUG
2474 2475 /*
2475 2476 * Mild form of ASSERT()ion to detect broken TPI apps.
2476 2477 * if (! assertion)
2477 2478 * log warning;
2478 2479 */
2479 2480 if (! ((alen == 0 && aoff == 0) ||
2480 2481 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2481 2482 (void) (STRLOG(TL_ID, tep->te_minor,
2482 2483 3, SL_TRACE|SL_ERROR,
2483 2484 "tl_bind: addr overlaps TPI message"));
2484 2485 }
2485 2486 #endif
2486 2487 }
2487 2488
2488 2489 /*
2489 2490 * Bind the address provided or allocate one if requested.
2490 2491 * Allow rebinds with a new qlen value.
2491 2492 */
2492 2493 if (IS_SOCKET(tep)) {
2493 2494 /*
2494 2495 * For anonymous requests the te_ap is already set up properly
2495 2496 * so use minor number as an address.
2496 2497 * For explicit requests need to check whether the address is
2497 2498 * already in use.
2498 2499 */
2499 2500 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2500 2501 int rc;
2501 2502
2502 2503 if (tep->te_flag & TL_ADDRHASHED) {
2503 2504 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2504 2505 if (tep->te_vp == ux_addr.soua_vp)
2505 2506 goto skip_addr_bind;
2506 2507 else /* Rebind to a new address. */
2507 2508 tl_addr_unbind(tep);
2508 2509 }
2509 2510 /*
2510 2511 * Insert address in the hash if it is not already
2511 2512 * there. Since we use preallocated handle, the insert
2512 2513 * can fail only if the key is already present.
2513 2514 */
2514 2515 rc = mod_hash_insert_reserve(tep->te_addrhash,
2515 2516 (mod_hash_key_t)ux_addr.soua_vp,
2516 2517 (mod_hash_val_t)tep, tep->te_hash_hndl);
2517 2518
2518 2519 if (rc != 0) {
2519 2520 ASSERT(rc == MH_ERR_DUPLICATE);
2520 2521 /*
2521 2522 * Violate O_T_BIND_REQ semantics and fail with
2522 2523 * TADDRBUSY - sockets will not use any address
2523 2524 * other than supplied one for explicit binds.
2524 2525 */
2525 2526 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2526 2527 SL_TRACE|SL_ERROR,
2527 2528 "tl_bind:requested addr %p is busy",
2528 2529 ux_addr.soua_vp));
2529 2530 tli_err = TADDRBUSY; unix_err = 0;
2530 2531 goto error;
2531 2532 }
2532 2533 tep->te_uxaddr = ux_addr;
2533 2534 tep->te_flag |= TL_ADDRHASHED;
2534 2535 tep->te_hash_hndl = NULL;
2535 2536 }
2536 2537 } else if (alen == 0) {
2537 2538 /*
2538 2539 * assign any free address
2539 2540 */
2540 2541 if (! tl_get_any_addr(tep, NULL)) {
2541 2542 (void) (STRLOG(TL_ID, tep->te_minor,
2542 2543 1, SL_TRACE|SL_ERROR,
2543 2544 "tl_bind:failed to get buffer for any "
2544 2545 "address"));
2545 2546 tli_err = TSYSERR; unix_err = ENOSR;
2546 2547 goto error;
2547 2548 }
2548 2549 } else {
2549 2550 addr_req.ta_alen = alen;
2550 2551 addr_req.ta_abuf = (mp->b_rptr + aoff);
2551 2552 addr_req.ta_zoneid = tep->te_zoneid;
2552 2553
2553 2554 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2554 2555 if (tep->te_abuf == NULL) {
2555 2556 tli_err = TSYSERR; unix_err = ENOSR;
2556 2557 goto error;
2557 2558 }
2558 2559 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2559 2560 tep->te_alen = alen;
2560 2561
2561 2562 if (mod_hash_insert_reserve(tep->te_addrhash,
2562 2563 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2563 2564 tep->te_hash_hndl) != 0) {
2564 2565 if (save_prim_type == T_BIND_REQ) {
2565 2566 /*
2566 2567 * The bind semantics for this primitive
2567 2568 * require a failure if the exact address
2568 2569 * requested is busy
2569 2570 */
2570 2571 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2571 2572 SL_TRACE|SL_ERROR,
2572 2573 "tl_bind:requested addr is busy"));
2573 2574 tli_err = TADDRBUSY; unix_err = 0;
2574 2575 goto error;
2575 2576 }
2576 2577
2577 2578 /*
2578 2579 * O_T_BIND_REQ semantics say if address if requested
2579 2580 * address is busy, bind to any available free address
2580 2581 */
2581 2582 if (! tl_get_any_addr(tep, &addr_req)) {
2582 2583 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2583 2584 SL_TRACE|SL_ERROR,
2584 2585 "tl_bind:unable to get any addr buf"));
2585 2586 tli_err = TSYSERR; unix_err = ENOMEM;
2586 2587 goto error;
2587 2588 }
2588 2589 } else {
2589 2590 tep->te_flag |= TL_ADDRHASHED;
2590 2591 tep->te_hash_hndl = NULL;
2591 2592 }
2592 2593 }
2593 2594
2594 2595 ASSERT(tep->te_alen >= 0);
2595 2596
2596 2597 skip_addr_bind:
2597 2598 /*
2598 2599 * prepare T_BIND_ACK TPI message
2599 2600 */
2600 2601 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2601 2602 bamp = reallocb(mp, basize, 0);
2602 2603 if (bamp == NULL) {
2603 2604 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2604 2605 "tl_wput:tl_bind: allocb failed"));
2605 2606 /*
2606 2607 * roll back state changes
2607 2608 */
2608 2609 tl_addr_unbind(tep);
2609 2610 tep->te_state = TS_UNBND;
2610 2611 tl_memrecover(wq, mp, basize);
2611 2612 return;
2612 2613 }
2613 2614
2614 2615 DB_TYPE(bamp) = M_PCPROTO;
2615 2616 bamp->b_wptr = bamp->b_rptr + basize;
2616 2617 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2617 2618 b_ack->PRIM_type = T_BIND_ACK;
2618 2619 b_ack->CONIND_number = qlen;
2619 2620 b_ack->ADDR_length = tep->te_alen;
2620 2621 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2621 2622 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2622 2623 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2623 2624
2624 2625 if (IS_COTS(tep)) {
2625 2626 tep->te_qlen = qlen;
2626 2627 if (qlen > 0)
2627 2628 tep->te_flag |= TL_LISTENER;
2628 2629 }
2629 2630
2630 2631 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2631 2632 /*
2632 2633 * send T_BIND_ACK message
2633 2634 */
2634 2635 (void) qreply(wq, bamp);
2635 2636 return;
2636 2637
2637 2638 error:
2638 2639 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2639 2640 if (ackmp == NULL) {
2640 2641 /*
2641 2642 * roll back state changes
2642 2643 */
2643 2644 tep->te_state = save_state;
2644 2645 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2645 2646 return;
2646 2647 }
2647 2648 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2648 2649 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2649 2650 }
2650 2651
2651 2652 /*
2652 2653 * Process T_UNBIND_REQ.
2653 2654 * Called from serializer.
2654 2655 */
2655 2656 static void
2656 2657 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2657 2658 {
2658 2659 queue_t *wq;
2659 2660 mblk_t *ackmp;
2660 2661
2661 2662 if (tep->te_closing) {
2662 2663 freemsg(mp);
2663 2664 return;
2664 2665 }
2665 2666
2666 2667 wq = tep->te_wq;
2667 2668
2668 2669 /*
2669 2670 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2670 2671 * ==> allocate for T_ERROR_ACK (known max)
2671 2672 */
2672 2673 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2673 2674 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2674 2675 return;
2675 2676 }
2676 2677 /*
2677 2678 * memory resources committed
2678 2679 * Note: no message validation. T_UNBIND_REQ message is
2679 2680 * same size as PRIM_type field so already verified earlier.
2680 2681 */
2681 2682
2682 2683 /*
2683 2684 * validate state
2684 2685 */
2685 2686 if (tep->te_state != TS_IDLE) {
2686 2687 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2687 2688 SL_TRACE|SL_ERROR,
2688 2689 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2689 2690 tep->te_state));
2690 2691 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2691 2692 return;
2692 2693 }
2693 2694 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2694 2695
2695 2696 /*
2696 2697 * TPI says on T_UNBIND_REQ:
2697 2698 * send up a M_FLUSH to flush both
2698 2699 * read and write queues
2699 2700 */
2700 2701 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2701 2702
2702 2703 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2703 2704 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2704 2705
2705 2706 /*
2706 2707 * Sockets use bind with qlen==0 followed by bind() to
2707 2708 * the same address with qlen > 0 for listeners.
2708 2709 * We allow rebind with a new qlen value.
2709 2710 */
2710 2711 tl_addr_unbind(tep);
2711 2712 }
2712 2713
2713 2714 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2714 2715 /*
2715 2716 * send T_OK_ACK
2716 2717 */
2717 2718 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2718 2719 }
2719 2720
2720 2721
2721 2722 /*
2722 2723 * Option management code from drv/ip is used here
2723 2724 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2724 2725 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2725 2726 * However, that is what we want as that option is 'unorthodox'
2726 2727 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2727 2728 * and not in T_SVR4_OPTMGMT_REQ/ACK
2728 2729 * Note2: use of optcom_req means this routine is an exception to
2729 2730 * recovery from allocb() failures.
2730 2731 */
2731 2732
2732 2733 static void
2733 2734 tl_optmgmt(queue_t *wq, mblk_t *mp)
2734 2735 {
2735 2736 tl_endpt_t *tep;
2736 2737 mblk_t *ackmp;
2737 2738 union T_primitives *prim;
2738 2739 cred_t *cr;
2739 2740
2740 2741 tep = (tl_endpt_t *)wq->q_ptr;
2741 2742 prim = (union T_primitives *)mp->b_rptr;
2742 2743
2743 2744 /*
2744 2745 * All Solaris components should pass a db_credp
2745 2746 * for this TPI message, hence we ASSERT.
2746 2747 * But in case there is some other M_PROTO that looks
2747 2748 * like a TPI message sent by some other kernel
2748 2749 * component, we check and return an error.
2749 2750 */
2750 2751 cr = msg_getcred(mp, NULL);
2751 2752 ASSERT(cr != NULL);
2752 2753 if (cr == NULL) {
2753 2754 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2754 2755 return;
2755 2756 }
2756 2757
2757 2758 /* all states OK for AF_UNIX options ? */
2758 2759 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2759 2760 prim->type == T_SVR4_OPTMGMT_REQ) {
2760 2761 /*
2761 2762 * Broken TLI semantics that options can only be managed
2762 2763 * in TS_IDLE state. Needed for Sparc ABI test suite that
2763 2764 * tests this TLI (mis)feature using this device driver.
2764 2765 */
2765 2766 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2766 2767 SL_TRACE|SL_ERROR,
2767 2768 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2768 2769 tep->te_state));
2769 2770 /*
2770 2771 * preallocate memory for T_ERROR_ACK
2771 2772 */
2772 2773 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2773 2774 if (! ackmp) {
2774 2775 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2775 2776 return;
2776 2777 }
2777 2778
2778 2779 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2779 2780 freemsg(mp);
2780 2781 return;
2781 2782 }
2782 2783
2783 2784 /*
2784 2785 * call common option management routine from drv/ip
2785 2786 */
2786 2787 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2787 2788 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2788 2789 } else {
2789 2790 ASSERT(prim->type == T_OPTMGMT_REQ);
2790 2791 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2791 2792 }
2792 2793 }
2793 2794
2794 2795 /*
2795 2796 * Handle T_conn_req - the driver part of accept().
2796 2797 * If TL_SET[U]CRED generate the credentials options.
2797 2798 * If this is a socket pass through options unmodified.
2798 2799 * For sockets generate the T_CONN_CON here instead of
2799 2800 * waiting for the T_CONN_RES.
2800 2801 */
2801 2802 static void
2802 2803 tl_conn_req(queue_t *wq, mblk_t *mp)
2803 2804 {
2804 2805 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2805 2806 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2806 2807 ssize_t msz = MBLKL(mp);
2807 2808 t_scalar_t alen, aoff, olen, ooff, err = 0;
2808 2809 tl_endpt_t *peer_tep = NULL;
2809 2810 mblk_t *ackmp;
2810 2811 mblk_t *dimp;
2811 2812 struct T_discon_ind *di;
2812 2813 soux_addr_t ux_addr;
2813 2814 tl_addr_t dst;
2814 2815
2815 2816 ASSERT(IS_COTS(tep));
2816 2817
2817 2818 if (tep->te_closing) {
2818 2819 freemsg(mp);
2819 2820 return;
2820 2821 }
2821 2822
2822 2823 /*
2823 2824 * preallocate memory for:
2824 2825 * 1. max of T_ERROR_ACK and T_OK_ACK
2825 2826 * ==> known max T_ERROR_ACK
2826 2827 * 2. max of T_DISCON_IND and T_CONN_IND
2827 2828 */
2828 2829 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2829 2830 if (! ackmp) {
2830 2831 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2831 2832 return;
2832 2833 }
2833 2834 /*
2834 2835 * memory committed for T_OK_ACK/T_ERROR_ACK now
2835 2836 * will be committed for T_DISCON_IND/T_CONN_IND later
2836 2837 */
2837 2838
2838 2839 if (tep->te_state != TS_IDLE) {
2839 2840 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2840 2841 SL_TRACE|SL_ERROR,
2841 2842 "tl_wput:T_CONN_REQ:out of state, state=%d",
2842 2843 tep->te_state));
2843 2844 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2844 2845 freemsg(mp);
2845 2846 return;
2846 2847 }
2847 2848
2848 2849 /*
2849 2850 * validate the message
2850 2851 * Note: dereference fields in struct inside message only
2851 2852 * after validating the message length.
2852 2853 */
2853 2854 if (msz < sizeof (struct T_conn_req)) {
2854 2855 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2855 2856 "tl_conn_req:invalid message length"));
2856 2857 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2857 2858 freemsg(mp);
2858 2859 return;
2859 2860 }
2860 2861 alen = creq->DEST_length;
2861 2862 aoff = creq->DEST_offset;
2862 2863 olen = creq->OPT_length;
2863 2864 ooff = creq->OPT_offset;
2864 2865 if (olen == 0)
2865 2866 ooff = 0;
2866 2867
2867 2868 if (IS_SOCKET(tep)) {
2868 2869 if ((alen != TL_SOUX_ADDRLEN) ||
2869 2870 (aoff < 0) ||
2870 2871 (aoff + alen > msz) ||
2871 2872 (alen > msz - sizeof (struct T_conn_req))) {
2872 2873 (void) (STRLOG(TL_ID, tep->te_minor,
2873 2874 1, SL_TRACE|SL_ERROR,
2874 2875 "tl_conn_req: invalid socket addr"));
2875 2876 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2876 2877 freemsg(mp);
2877 2878 return;
2878 2879 }
2879 2880 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2880 2881 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2881 2882 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2882 2883 (void) (STRLOG(TL_ID, tep->te_minor,
2883 2884 1, SL_TRACE|SL_ERROR,
2884 2885 "tl_conn_req: invalid socket magic"));
2885 2886 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2886 2887 freemsg(mp);
2887 2888 return;
2888 2889 }
2889 2890 } else {
2890 2891 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2891 2892 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2892 2893 ooff + olen < 0)) ||
2893 2894 olen < 0 || ooff < 0) {
2894 2895 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2895 2896 SL_TRACE|SL_ERROR,
2896 2897 "tl_conn_req:invalid message"));
2897 2898 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2898 2899 freemsg(mp);
2899 2900 return;
2900 2901 }
2901 2902
2902 2903 if (alen <= 0 || aoff < 0 ||
2903 2904 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2904 2905 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2905 2906 SL_TRACE|SL_ERROR,
2906 2907 "tl_conn_req:bad addr in message, "
2907 2908 "alen=%d, msz=%ld",
2908 2909 alen, msz));
2909 2910 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2910 2911 freemsg(mp);
2911 2912 return;
2912 2913 }
2913 2914 #ifdef DEBUG
2914 2915 /*
2915 2916 * Mild form of ASSERT()ion to detect broken TPI apps.
2916 2917 * if (! assertion)
2917 2918 * log warning;
2918 2919 */
2919 2920 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2920 2921 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2921 2922 SL_TRACE|SL_ERROR,
2922 2923 "tl_conn_req: addr overlaps TPI message"));
2923 2924 }
2924 2925 #endif
2925 2926 if (olen) {
2926 2927 /*
2927 2928 * no opts in connect req
2928 2929 * supported in this provider except for sockets.
2929 2930 */
2930 2931 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2931 2932 SL_TRACE|SL_ERROR,
2932 2933 "tl_conn_req:options not supported "
2933 2934 "in message"));
2934 2935 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2935 2936 freemsg(mp);
2936 2937 return;
2937 2938 }
2938 2939 }
2939 2940
2940 2941 /*
2941 2942 * Prevent tep from closing on us.
2942 2943 */
2943 2944 if (! tl_noclose(tep)) {
2944 2945 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2945 2946 "tl_conn_req:endpoint is closing"));
2946 2947 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2947 2948 freemsg(mp);
2948 2949 return;
2949 2950 }
2950 2951
2951 2952 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2952 2953 /*
2953 2954 * get endpoint to connect to
2954 2955 * check that peer with DEST addr is bound to addr
2955 2956 * and has CONIND_number > 0
2956 2957 */
2957 2958 dst.ta_alen = alen;
2958 2959 dst.ta_abuf = mp->b_rptr + aoff;
2959 2960 dst.ta_zoneid = tep->te_zoneid;
2960 2961
2961 2962 /*
2962 2963 * Verify if remote addr is in use
2963 2964 */
2964 2965 peer_tep = (IS_SOCKET(tep) ?
2965 2966 tl_sock_find_peer(tep, &ux_addr) :
2966 2967 tl_find_peer(tep, &dst));
2967 2968
2968 2969 if (peer_tep == NULL) {
2969 2970 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2970 2971 "tl_conn_req:no one at connect address"));
2971 2972 err = ECONNREFUSED;
2972 2973 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2973 2974 /*
2974 2975 * validate that number of incoming connection is
2975 2976 * not to capacity on destination endpoint
2976 2977 */
2977 2978 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2978 2979 "tl_conn_req: qlen overflow connection refused"));
2979 2980 err = ECONNREFUSED;
2980 2981 }
2981 2982
2982 2983 /*
2983 2984 * Send T_DISCON_IND in case of error
2984 2985 */
2985 2986 if (err != 0) {
2986 2987 if (peer_tep != NULL)
2987 2988 tl_refrele(peer_tep);
2988 2989 /* We are still expected to send T_OK_ACK */
2989 2990 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2990 2991 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2991 2992 tl_closeok(tep);
2992 2993 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2993 2994 M_PROTO, T_DISCON_IND);
2994 2995 if (dimp == NULL) {
2995 2996 tl_merror(wq, NULL, ENOSR);
2996 2997 return;
2997 2998 }
2998 2999 di = (struct T_discon_ind *)dimp->b_rptr;
2999 3000 di->DISCON_reason = err;
3000 3001 di->SEQ_number = BADSEQNUM;
3001 3002
3002 3003 tep->te_state = TS_IDLE;
3003 3004 /*
3004 3005 * send T_DISCON_IND message
3005 3006 */
3006 3007 putnext(tep->te_rq, dimp);
3007 3008 return;
3008 3009 }
3009 3010
3010 3011 ASSERT(IS_COTS(peer_tep));
3011 3012
3012 3013 /*
3013 3014 * Found the listener. At this point processing will continue on
3014 3015 * listener serializer. Close of the endpoint should be blocked while we
3015 3016 * switch serializers.
3016 3017 */
3017 3018 tl_serializer_refhold(peer_tep->te_ser);
3018 3019 tl_serializer_refrele(tep->te_ser);
3019 3020 tep->te_ser = peer_tep->te_ser;
3020 3021 ASSERT(tep->te_oconp == NULL);
3021 3022 tep->te_oconp = peer_tep;
3022 3023
3023 3024 /*
3024 3025 * It is safe to close now. Close may continue on listener serializer.
3025 3026 */
3026 3027 tl_closeok(tep);
3027 3028
3028 3029 /*
3029 3030 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3030 3031 * data, so we link mp to ackmp.
3031 3032 */
3032 3033 ackmp->b_cont = mp;
3033 3034 mp = ackmp;
3034 3035
3035 3036 tl_refhold(tep);
3036 3037 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3037 3038 }
3038 3039
3039 3040 /*
3040 3041 * Finish T_CONN_REQ processing on listener serializer.
3041 3042 */
3042 3043 static void
3043 3044 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3044 3045 {
3045 3046 queue_t *wq;
3046 3047 tl_endpt_t *peer_tep = tep->te_oconp;
3047 3048 mblk_t *confmp, *cimp, *indmp;
3048 3049 void *opts = NULL;
3049 3050 mblk_t *ackmp = mp;
3050 3051 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3051 3052 struct T_conn_ind *ci;
3052 3053 tl_icon_t *tip;
3053 3054 void *addr_startp;
3054 3055 t_scalar_t olen = creq->OPT_length;
3055 3056 t_scalar_t ooff = creq->OPT_offset;
3056 3057 size_t ci_msz;
3057 3058 size_t size;
3058 3059 cred_t *cr = NULL;
3059 3060 pid_t cpid;
3060 3061
3061 3062 if (tep->te_closing) {
3062 3063 TL_UNCONNECT(tep->te_oconp);
3063 3064 tl_serializer_exit(tep);
3064 3065 tl_refrele(tep);
3065 3066 freemsg(mp);
3066 3067 return;
3067 3068 }
3068 3069
3069 3070 wq = tep->te_wq;
3070 3071 tep->te_flag |= TL_EAGER;
3071 3072
3072 3073 /*
3073 3074 * Extract preallocated ackmp from mp.
3074 3075 */
3075 3076 mp = mp->b_cont;
3076 3077 ackmp->b_cont = NULL;
3077 3078
3078 3079 if (olen == 0)
3079 3080 ooff = 0;
3080 3081
3081 3082 if (peer_tep->te_closing ||
3082 3083 !((peer_tep->te_state == TS_IDLE) ||
3083 3084 (peer_tep->te_state == TS_WRES_CIND))) {
3084 3085 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3085 3086 "tl_conn_req:peer in bad state (%d)",
3086 3087 peer_tep->te_state));
3087 3088 TL_UNCONNECT(tep->te_oconp);
3088 3089 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3089 3090 freemsg(ackmp);
3090 3091 tl_serializer_exit(tep);
3091 3092 tl_refrele(tep);
3092 3093 return;
3093 3094 }
3094 3095
3095 3096 /*
3096 3097 * preallocate now for T_DISCON_IND or T_CONN_IND
3097 3098 */
3098 3099 /*
3099 3100 * calculate length of T_CONN_IND message
3100 3101 */
3101 3102 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3102 3103 cr = msg_getcred(mp, &cpid);
3103 3104 ASSERT(cr != NULL);
3104 3105 if (peer_tep->te_flag & TL_SETCRED) {
3105 3106 ooff = 0;
3106 3107 olen = (t_scalar_t) sizeof (struct opthdr) +
3107 3108 OPTLEN(sizeof (tl_credopt_t));
3108 3109 /* 1 option only */
3109 3110 } else {
3110 3111 ooff = 0;
3111 3112 olen = (t_scalar_t)sizeof (struct opthdr) +
3112 3113 OPTLEN(ucredminsize(cr));
3113 3114 /* 1 option only */
3114 3115 }
3115 3116 }
3116 3117 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3117 3118 ci_msz = T_ALIGN(ci_msz) + olen;
3118 3119 size = max(ci_msz, sizeof (struct T_discon_ind));
3119 3120
3120 3121 /*
3121 3122 * Save options from mp - we'll need them for T_CONN_IND.
3122 3123 */
3123 3124 if (ooff != 0) {
3124 3125 opts = kmem_alloc(olen, KM_NOSLEEP);
3125 3126 if (opts == NULL) {
3126 3127 /*
3127 3128 * roll back state changes
3128 3129 */
3129 3130 tep->te_state = TS_IDLE;
3130 3131 tl_memrecover(wq, mp, size);
3131 3132 freemsg(ackmp);
3132 3133 TL_UNCONNECT(tep->te_oconp);
3133 3134 tl_serializer_exit(tep);
3134 3135 tl_refrele(tep);
3135 3136 return;
3136 3137 }
3137 3138 /* Copy options to a temp buffer */
3138 3139 bcopy(mp->b_rptr + ooff, opts, olen);
3139 3140 }
3140 3141
3141 3142 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3142 3143 /*
3143 3144 * Generate a T_CONN_CON that has the identical address
3144 3145 * (and options) as the T_CONN_REQ.
3145 3146 * NOTE: assumes that the T_conn_req and T_conn_con structures
3146 3147 * are isomorphic.
3147 3148 */
3148 3149 confmp = copyb(mp);
3149 3150 if (! confmp) {
3150 3151 /*
3151 3152 * roll back state changes
3152 3153 */
3153 3154 tep->te_state = TS_IDLE;
3154 3155 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3155 3156 freemsg(ackmp);
3156 3157 if (opts != NULL)
3157 3158 kmem_free(opts, olen);
3158 3159 TL_UNCONNECT(tep->te_oconp);
3159 3160 tl_serializer_exit(tep);
3160 3161 tl_refrele(tep);
3161 3162 return;
3162 3163 }
3163 3164 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3164 3165 T_CONN_CON;
3165 3166 } else {
3166 3167 confmp = NULL;
3167 3168 }
3168 3169 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3169 3170 /*
3170 3171 * roll back state changes
3171 3172 */
3172 3173 tep->te_state = TS_IDLE;
3173 3174 tl_memrecover(wq, mp, size);
3174 3175 freemsg(ackmp);
3175 3176 if (opts != NULL)
3176 3177 kmem_free(opts, olen);
3177 3178 freemsg(confmp);
3178 3179 TL_UNCONNECT(tep->te_oconp);
3179 3180 tl_serializer_exit(tep);
3180 3181 tl_refrele(tep);
3181 3182 return;
3182 3183 }
3183 3184
3184 3185 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3185 3186 if (tip == NULL) {
3186 3187 /*
3187 3188 * roll back state changes
3188 3189 */
3189 3190 tep->te_state = TS_IDLE;
3190 3191 tl_memrecover(wq, indmp, sizeof (*tip));
3191 3192 freemsg(ackmp);
3192 3193 if (opts != NULL)
3193 3194 kmem_free(opts, olen);
3194 3195 freemsg(confmp);
3195 3196 TL_UNCONNECT(tep->te_oconp);
3196 3197 tl_serializer_exit(tep);
3197 3198 tl_refrele(tep);
3198 3199 return;
3199 3200 }
3200 3201 tip->ti_mp = NULL;
3201 3202
3202 3203 /*
3203 3204 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3204 3205 * and tl_icon_t cell.
3205 3206 */
3206 3207
3207 3208 /*
3208 3209 * ack validity of request and send the peer credential in the ACK.
3209 3210 */
3210 3211 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3211 3212
3212 3213 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3213 3214 confmp != NULL) {
3214 3215 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3215 3216 }
3216 3217
3217 3218 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3218 3219
3219 3220 /*
3220 3221 * prepare message to send T_CONN_IND
3221 3222 */
3222 3223 /*
3223 3224 * allocate the message - original data blocks retained
3224 3225 * in the returned mblk
3225 3226 */
3226 3227 cimp = tl_resizemp(indmp, size);
3227 3228 if (! cimp) {
3228 3229 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3229 3230 "tl_conn_req:con_ind:allocb failure"));
3230 3231 tl_merror(wq, indmp, ENOMEM);
3231 3232 TL_UNCONNECT(tep->te_oconp);
3232 3233 tl_serializer_exit(tep);
3233 3234 tl_refrele(tep);
3234 3235 if (opts != NULL)
3235 3236 kmem_free(opts, olen);
3236 3237 freemsg(confmp);
3237 3238 ASSERT(tip->ti_mp == NULL);
3238 3239 kmem_free(tip, sizeof (*tip));
3239 3240 return;
3240 3241 }
3241 3242
3242 3243 DB_TYPE(cimp) = M_PROTO;
3243 3244 ci = (struct T_conn_ind *)cimp->b_rptr;
3244 3245 ci->PRIM_type = T_CONN_IND;
3245 3246 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3246 3247 ci->SRC_length = tep->te_alen;
3247 3248 ci->SEQ_number = tep->te_seqno;
3248 3249
3249 3250 addr_startp = cimp->b_rptr + ci->SRC_offset;
3250 3251 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3251 3252 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3252 3253
3253 3254 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3254 3255 ci->SRC_length);
3255 3256 ci->OPT_length = olen; /* because only 1 option */
3256 3257 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3257 3258 cr, cpid,
3258 3259 peer_tep->te_flag, peer_tep->te_credp);
3259 3260 } else if (ooff != 0) {
3260 3261 /* Copy option from T_CONN_REQ */
3261 3262 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3262 3263 ci->SRC_length);
3263 3264 ci->OPT_length = olen;
3264 3265 ASSERT(opts != NULL);
3265 3266 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3266 3267 } else {
3267 3268 ci->OPT_offset = 0;
3268 3269 ci->OPT_length = 0;
3269 3270 }
3270 3271 if (opts != NULL)
3271 3272 kmem_free(opts, olen);
3272 3273
3273 3274 /*
3274 3275 * register connection request with server peer
3275 3276 * append to list of incoming connections
3276 3277 * increment references for both peer_tep and tep: peer_tep is placed on
3277 3278 * te_oconp and tep is placed on listeners queue.
3278 3279 */
3279 3280 tip->ti_tep = tep;
3280 3281 tip->ti_seqno = tep->te_seqno;
3281 3282 list_insert_tail(&peer_tep->te_iconp, tip);
3282 3283 peer_tep->te_nicon++;
3283 3284
3284 3285 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3285 3286 /*
3286 3287 * send the T_CONN_IND message
3287 3288 */
3288 3289 putnext(peer_tep->te_rq, cimp);
3289 3290
3290 3291 /*
3291 3292 * Send a T_CONN_CON message for sockets.
3292 3293 * Disable the queues until we have reached the correct state!
3293 3294 */
3294 3295 if (confmp != NULL) {
3295 3296 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3296 3297 noenable(wq);
3297 3298 putnext(tep->te_rq, confmp);
3298 3299 }
3299 3300 /*
3300 3301 * Now we need to increment tep reference because tep is referenced by
3301 3302 * server list of pending connections. We also need to decrement
3302 3303 * reference before exiting serializer. Two operations void each other
3303 3304 * so we don't modify reference at all.
3304 3305 */
3305 3306 ASSERT(tep->te_refcnt >= 2);
3306 3307 ASSERT(peer_tep->te_refcnt >= 2);
3307 3308 tl_serializer_exit(tep);
3308 3309 }
3309 3310
3310 3311
3311 3312
3312 3313 /*
3313 3314 * Handle T_conn_res on listener stream. Called on listener serializer.
3314 3315 * tl_conn_req has already generated the T_CONN_CON.
3315 3316 * tl_conn_res is called on listener serializer.
3316 3317 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3317 3318 * Switch eager serializer to acceptor's.
3318 3319 *
3319 3320 * If TL_SET[U]CRED generate the credentials options.
3320 3321 * For sockets tl_conn_req has already generated the T_CONN_CON.
3321 3322 */
3322 3323 static void
3323 3324 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3324 3325 {
3325 3326 queue_t *wq;
3326 3327 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3327 3328 ssize_t msz = MBLKL(mp);
3328 3329 t_scalar_t olen, ooff, err = 0;
3329 3330 t_scalar_t prim = cres->PRIM_type;
3330 3331 uchar_t *addr_startp;
3331 3332 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3332 3333 tl_icon_t *tip;
3333 3334 size_t size;
3334 3335 mblk_t *ackmp, *respmp;
3335 3336 mblk_t *dimp, *ccmp = NULL;
3336 3337 struct T_discon_ind *di;
3337 3338 struct T_conn_con *cc;
3338 3339 boolean_t client_noclose_set = B_FALSE;
3339 3340 boolean_t switch_client_serializer = B_TRUE;
3340 3341
3341 3342 ASSERT(IS_COTS(tep));
3342 3343
3343 3344 if (tep->te_closing) {
3344 3345 freemsg(mp);
3345 3346 return;
3346 3347 }
3347 3348
3348 3349 wq = tep->te_wq;
3349 3350
3350 3351 /*
3351 3352 * preallocate memory for:
3352 3353 * 1. max of T_ERROR_ACK and T_OK_ACK
3353 3354 * ==> known max T_ERROR_ACK
3354 3355 * 2. max of T_DISCON_IND and T_CONN_CON
3355 3356 */
3356 3357 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3357 3358 if (! ackmp) {
3358 3359 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3359 3360 return;
3360 3361 }
3361 3362 /*
3362 3363 * memory committed for T_OK_ACK/T_ERROR_ACK now
3363 3364 * will be committed for T_DISCON_IND/T_CONN_CON later
3364 3365 */
3365 3366
3366 3367
3367 3368 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3368 3369
3369 3370 /*
3370 3371 * validate state
3371 3372 */
3372 3373 if (tep->te_state != TS_WRES_CIND) {
3373 3374 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3374 3375 SL_TRACE|SL_ERROR,
3375 3376 "tl_wput:T_CONN_RES:out of state, state=%d",
3376 3377 tep->te_state));
3377 3378 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3378 3379 freemsg(mp);
3379 3380 return;
3380 3381 }
3381 3382
3382 3383 /*
3383 3384 * validate the message
3384 3385 * Note: dereference fields in struct inside message only
3385 3386 * after validating the message length.
3386 3387 */
3387 3388 if (msz < sizeof (struct T_conn_res)) {
3388 3389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3389 3390 "tl_conn_res:invalid message length"));
3390 3391 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3391 3392 freemsg(mp);
3392 3393 return;
3393 3394 }
3394 3395 olen = cres->OPT_length;
3395 3396 ooff = cres->OPT_offset;
3396 3397 if (((olen > 0) && ((ooff + olen) > msz))) {
3397 3398 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3398 3399 "tl_conn_res:invalid message"));
3399 3400 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3400 3401 freemsg(mp);
3401 3402 return;
3402 3403 }
3403 3404 if (olen) {
3404 3405 /*
3405 3406 * no opts in connect res
3406 3407 * supported in this provider
3407 3408 */
3408 3409 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3409 3410 "tl_conn_res:options not supported in message"));
3410 3411 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3411 3412 freemsg(mp);
3412 3413 return;
3413 3414 }
3414 3415
3415 3416 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3416 3417 ASSERT(tep->te_state == TS_WACK_CRES);
3417 3418
3418 3419 if (cres->SEQ_number < TL_MINOR_START &&
3419 3420 cres->SEQ_number >= BADSEQNUM) {
3420 3421 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3421 3422 "tl_conn_res:remote endpoint sequence number bad"));
3422 3423 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3423 3424 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3424 3425 freemsg(mp);
3425 3426 return;
3426 3427 }
3427 3428
3428 3429 /*
3429 3430 * find accepting endpoint. Will have extra reference if found.
3430 3431 */
3431 3432 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3432 3433 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3433 3434 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3434 3435 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3435 3436 "tl_conn_res:bad accepting endpoint"));
3436 3437 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3437 3438 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3438 3439 freemsg(mp);
3439 3440 return;
3440 3441 }
3441 3442
3442 3443 /*
3443 3444 * Prevent acceptor from closing.
3444 3445 */
3445 3446 if (! tl_noclose(acc_ep)) {
3446 3447 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3447 3448 "tl_conn_res:bad accepting endpoint"));
3448 3449 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3449 3450 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3450 3451 tl_refrele(acc_ep);
3451 3452 freemsg(mp);
3452 3453 return;
3453 3454 }
3454 3455
3455 3456 acc_ep->te_flag |= TL_ACCEPTOR;
3456 3457
3457 3458 /*
3458 3459 * validate that accepting endpoint, if different from listening
3459 3460 * has address bound => state is TS_IDLE
3460 3461 * TROUBLE in XPG4 !!?
3461 3462 */
3462 3463 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3463 3464 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3464 3465 "tl_conn_res:accepting endpoint has no address bound,"
3465 3466 "state=%d", acc_ep->te_state));
3466 3467 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3467 3468 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3468 3469 freemsg(mp);
3469 3470 tl_closeok(acc_ep);
3470 3471 tl_refrele(acc_ep);
3471 3472 return;
3472 3473 }
3473 3474
3474 3475 /*
3475 3476 * validate if accepting endpt same as listening, then
3476 3477 * no other incoming connection should be on the queue
3477 3478 */
3478 3479
3479 3480 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3480 3481 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3481 3482 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3482 3483 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3483 3484 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3484 3485 freemsg(mp);
3485 3486 tl_closeok(acc_ep);
3486 3487 tl_refrele(acc_ep);
3487 3488 return;
3488 3489 }
3489 3490
3490 3491 /*
3491 3492 * Mark for deletion, the entry corresponding to client
3492 3493 * on list of pending connections made by the listener
3493 3494 * search list to see if client is one of the
3494 3495 * recorded as a listener.
3495 3496 */
3496 3497 tip = tl_icon_find(tep, cres->SEQ_number);
3497 3498 if (tip == NULL) {
3498 3499 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3499 3500 "tl_conn_res:no client in listener list"));
3500 3501 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3501 3502 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3502 3503 freemsg(mp);
3503 3504 tl_closeok(acc_ep);
3504 3505 tl_refrele(acc_ep);
3505 3506 return;
3506 3507 }
3507 3508
3508 3509 /*
3509 3510 * If ti_tep is NULL the client has already closed. In this case
3510 3511 * the code below will avoid any action on the client side
3511 3512 * but complete the server and acceptor state transitions.
3512 3513 */
3513 3514 ASSERT(tip->ti_tep == NULL ||
3514 3515 tip->ti_tep->te_seqno == cres->SEQ_number);
3515 3516 cl_ep = tip->ti_tep;
3516 3517
3517 3518 /*
3518 3519 * If the client is present it is switched from listener's to acceptor's
3519 3520 * serializer. We should block client closes while serializers are
3520 3521 * being switched.
3521 3522 *
3522 3523 * It is possible that the client is present but is currently being
3523 3524 * closed. There are two possible cases:
3524 3525 *
3525 3526 * 1) The client has already entered tl_close_finish_ser() and sent
3526 3527 * T_ORDREL_IND. In this case we can just ignore the client (but we
3527 3528 * still need to send all messages from tip->ti_mp to the acceptor).
3528 3529 *
3529 3530 * 2) The client started the close but has not entered
3530 3531 * tl_close_finish_ser() yet. In this case, the client is already
3531 3532 * proceeding asynchronously on the listener's serializer, so we're
3532 3533 * forced to change the acceptor to use the listener's serializer to
3533 3534 * ensure that any operations on the acceptor are serialized with
3534 3535 * respect to the close that's in-progress.
3535 3536 */
3536 3537 if (cl_ep != NULL) {
3537 3538 if (tl_noclose(cl_ep)) {
3538 3539 client_noclose_set = B_TRUE;
3539 3540 } else {
3540 3541 /*
3541 3542 * Client is closing. If it it has sent the
3542 3543 * T_ORDREL_IND, we can simply ignore it - otherwise,
3543 3544 * we have to let let the client continue until it is
3544 3545 * sent.
3545 3546 *
3546 3547 * If we do continue using the client, acceptor will
3547 3548 * switch to client's serializer which is used by client
3548 3549 * for its close.
3549 3550 */
3550 3551 tl_client_closing_when_accepting++;
3551 3552 switch_client_serializer = B_FALSE;
3552 3553 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3553 3554 cl_ep->te_state == -1)
3554 3555 cl_ep = NULL;
3555 3556 }
3556 3557 }
3557 3558
3558 3559 if (cl_ep != NULL) {
3559 3560 /*
3560 3561 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3561 3562 * (latter for sockets only)
3562 3563 */
3563 3564 if (cl_ep->te_state != TS_WCON_CREQ &&
3564 3565 (cl_ep->te_state != TS_DATA_XFER &&
3565 3566 IS_SOCKET(cl_ep))) {
3566 3567 err = ECONNREFUSED;
3567 3568 /*
3568 3569 * T_DISCON_IND sent later after committing memory
3569 3570 * and acking validity of request
3570 3571 */
3571 3572 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3572 3573 "tl_conn_res:peer in bad state"));
3573 3574 }
3574 3575
3575 3576 /*
3576 3577 * preallocate now for T_DISCON_IND or T_CONN_CONN
3577 3578 * ack validity of request (T_OK_ACK) after memory committed
3578 3579 */
3579 3580
3580 3581 if (err)
3581 3582 size = sizeof (struct T_discon_ind);
3582 3583 else {
3583 3584 /*
3584 3585 * calculate length of T_CONN_CON message
3585 3586 */
3586 3587 olen = 0;
3587 3588 if (cl_ep->te_flag & TL_SETCRED) {
3588 3589 olen = (t_scalar_t)sizeof (struct opthdr) +
3589 3590 OPTLEN(sizeof (tl_credopt_t));
3590 3591 } else if (cl_ep->te_flag & TL_SETUCRED) {
3591 3592 olen = (t_scalar_t)sizeof (struct opthdr) +
3592 3593 OPTLEN(ucredminsize(acc_ep->te_credp));
3593 3594 }
3594 3595 size = T_ALIGN(sizeof (struct T_conn_con) +
3595 3596 acc_ep->te_alen) + olen;
3596 3597 }
3597 3598 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3598 3599 /*
3599 3600 * roll back state changes
3600 3601 */
3601 3602 tep->te_state = TS_WRES_CIND;
3602 3603 tl_memrecover(wq, mp, size);
3603 3604 freemsg(ackmp);
3604 3605 if (client_noclose_set)
3605 3606 tl_closeok(cl_ep);
3606 3607 tl_closeok(acc_ep);
3607 3608 tl_refrele(acc_ep);
3608 3609 return;
3609 3610 }
3610 3611 mp = NULL;
3611 3612 }
3612 3613
3613 3614 /*
3614 3615 * Now ack validity of request
3615 3616 */
3616 3617 if (tep->te_nicon == 1) {
3617 3618 if (tep == acc_ep)
3618 3619 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3619 3620 else
3620 3621 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3621 3622 } else
3622 3623 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3623 3624
3624 3625 /*
3625 3626 * send T_DISCON_IND now if client state validation failed earlier
3626 3627 */
3627 3628 if (err) {
3628 3629 tl_ok_ack(wq, ackmp, prim);
3629 3630 /*
3630 3631 * flush the queues - why always ?
3631 3632 */
3632 3633 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3633 3634
3634 3635 dimp = tl_resizemp(respmp, size);
3635 3636 if (! dimp) {
3636 3637 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3637 3638 SL_TRACE|SL_ERROR,
3638 3639 "tl_conn_res:con_ind:allocb failure"));
3639 3640 tl_merror(wq, respmp, ENOMEM);
3640 3641 tl_closeok(acc_ep);
3641 3642 if (client_noclose_set)
3642 3643 tl_closeok(cl_ep);
3643 3644 tl_refrele(acc_ep);
3644 3645 return;
3645 3646 }
3646 3647 if (dimp->b_cont) {
3647 3648 /* no user data in provider generated discon ind */
3648 3649 freemsg(dimp->b_cont);
3649 3650 dimp->b_cont = NULL;
3650 3651 }
3651 3652
3652 3653 DB_TYPE(dimp) = M_PROTO;
3653 3654 di = (struct T_discon_ind *)dimp->b_rptr;
3654 3655 di->PRIM_type = T_DISCON_IND;
3655 3656 di->DISCON_reason = err;
3656 3657 di->SEQ_number = BADSEQNUM;
3657 3658
3658 3659 tep->te_state = TS_IDLE;
3659 3660 /*
3660 3661 * send T_DISCON_IND message
3661 3662 */
3662 3663 putnext(acc_ep->te_rq, dimp);
3663 3664 if (client_noclose_set)
3664 3665 tl_closeok(cl_ep);
3665 3666 tl_closeok(acc_ep);
3666 3667 tl_refrele(acc_ep);
3667 3668 return;
3668 3669 }
3669 3670
3670 3671 /*
3671 3672 * now start connecting the accepting endpoint
3672 3673 */
3673 3674 if (tep != acc_ep)
3674 3675 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3675 3676
3676 3677 if (cl_ep == NULL) {
3677 3678 /*
3678 3679 * The client has already closed. Send up any queued messages
3679 3680 * and change the state accordingly.
3680 3681 */
3681 3682 tl_ok_ack(wq, ackmp, prim);
3682 3683 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3683 3684
3684 3685 /*
3685 3686 * remove endpoint from incoming connection
3686 3687 * delete client from list of incoming connections
3687 3688 */
3688 3689 tl_freetip(tep, tip);
3689 3690 freemsg(mp);
3690 3691 tl_closeok(acc_ep);
3691 3692 tl_refrele(acc_ep);
3692 3693 return;
3693 3694 } else if (tip->ti_mp != NULL) {
3694 3695 /*
3695 3696 * The client could have queued a T_DISCON_IND which needs
3696 3697 * to be sent up.
3697 3698 * Note that t_discon_req can not operate the same as
3698 3699 * t_data_req since it is not possible for it to putbq
3699 3700 * the message and return -1 due to the use of qwriter.
3700 3701 */
3701 3702 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3702 3703 }
3703 3704
3704 3705 /*
3705 3706 * prepare connect confirm T_CONN_CON message
3706 3707 */
3707 3708
3708 3709 /*
3709 3710 * allocate the message - original data blocks
3710 3711 * retained in the returned mblk
3711 3712 */
3712 3713 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3713 3714 ccmp = tl_resizemp(respmp, size);
3714 3715 if (ccmp == NULL) {
3715 3716 tl_ok_ack(wq, ackmp, prim);
3716 3717 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3717 3718 SL_TRACE|SL_ERROR,
3718 3719 "tl_conn_res:conn_con:allocb failure"));
3719 3720 tl_merror(wq, respmp, ENOMEM);
3720 3721 tl_closeok(acc_ep);
3721 3722 if (client_noclose_set)
3722 3723 tl_closeok(cl_ep);
3723 3724 tl_refrele(acc_ep);
3724 3725 return;
3725 3726 }
3726 3727
3727 3728 DB_TYPE(ccmp) = M_PROTO;
3728 3729 cc = (struct T_conn_con *)ccmp->b_rptr;
3729 3730 cc->PRIM_type = T_CONN_CON;
3730 3731 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3731 3732 cc->RES_length = acc_ep->te_alen;
3732 3733 addr_startp = ccmp->b_rptr + cc->RES_offset;
3733 3734 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3734 3735 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3735 3736 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3736 3737 cc->RES_length);
3737 3738 cc->OPT_length = olen;
3738 3739 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3739 3740 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3740 3741 cl_ep->te_credp);
3741 3742 } else {
3742 3743 cc->OPT_offset = 0;
3743 3744 cc->OPT_length = 0;
3744 3745 }
3745 3746 /*
3746 3747 * Forward the credential in the packet so it can be picked up
3747 3748 * at the higher layers for more complete credential processing
3748 3749 */
3749 3750 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3750 3751 } else {
3751 3752 freemsg(respmp);
3752 3753 respmp = NULL;
3753 3754 }
3754 3755
3755 3756 /*
3756 3757 * make connection linking
3757 3758 * accepting and client endpoints
3758 3759 * No need to increment references:
3759 3760 * on client: it should already have one from tip->ti_tep linkage.
3760 3761 * on acceptor is should already have one from the table lookup.
3761 3762 *
3762 3763 * At this point both client and acceptor can't close. Set client
3763 3764 * serializer to acceptor's.
3764 3765 */
3765 3766 ASSERT(cl_ep->te_refcnt >= 2);
3766 3767 ASSERT(acc_ep->te_refcnt >= 2);
3767 3768 ASSERT(cl_ep->te_conp == NULL);
3768 3769 ASSERT(acc_ep->te_conp == NULL);
3769 3770 cl_ep->te_conp = acc_ep;
3770 3771 acc_ep->te_conp = cl_ep;
3771 3772 ASSERT(cl_ep->te_ser == tep->te_ser);
3772 3773 if (switch_client_serializer) {
3773 3774 mutex_enter(&cl_ep->te_ser_lock);
3774 3775 if (cl_ep->te_ser_count > 0) {
3775 3776 switch_client_serializer = B_FALSE;
3776 3777 tl_serializer_noswitch++;
3777 3778 } else {
3778 3779 /*
3779 3780 * Move client to the acceptor's serializer.
3780 3781 */
3781 3782 tl_serializer_refhold(acc_ep->te_ser);
3782 3783 tl_serializer_refrele(cl_ep->te_ser);
3783 3784 cl_ep->te_ser = acc_ep->te_ser;
3784 3785 }
3785 3786 mutex_exit(&cl_ep->te_ser_lock);
3786 3787 }
3787 3788 if (!switch_client_serializer) {
3788 3789 /*
3789 3790 * It is not possible to switch client to use acceptor's.
3790 3791 * Move acceptor to client's serializer (which is the same as
3791 3792 * listener's).
3792 3793 */
3793 3794 tl_serializer_refhold(cl_ep->te_ser);
3794 3795 tl_serializer_refrele(acc_ep->te_ser);
3795 3796 acc_ep->te_ser = cl_ep->te_ser;
3796 3797 }
3797 3798
3798 3799 TL_REMOVE_PEER(cl_ep->te_oconp);
3799 3800 TL_REMOVE_PEER(acc_ep->te_oconp);
3800 3801
3801 3802 /*
3802 3803 * remove endpoint from incoming connection
3803 3804 * delete client from list of incoming connections
3804 3805 */
3805 3806 tip->ti_tep = NULL;
3806 3807 tl_freetip(tep, tip);
3807 3808 tl_ok_ack(wq, ackmp, prim);
3808 3809
3809 3810 /*
3810 3811 * data blocks already linked in reallocb()
3811 3812 */
3812 3813
3813 3814 /*
3814 3815 * link queues so that I_SENDFD will work
3815 3816 */
3816 3817 if (! IS_SOCKET(tep)) {
3817 3818 acc_ep->te_wq->q_next = cl_ep->te_rq;
3818 3819 cl_ep->te_wq->q_next = acc_ep->te_rq;
3819 3820 }
3820 3821
3821 3822 /*
3822 3823 * send T_CONN_CON up on client side unless it was already
3823 3824 * done (for a socket). In cases any data or ordrel req has been
3824 3825 * queued make sure that the service procedure runs.
3825 3826 */
3826 3827 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3827 3828 enableok(cl_ep->te_wq);
3828 3829 TL_QENABLE(cl_ep);
3829 3830 if (ccmp != NULL)
3830 3831 freemsg(ccmp);
3831 3832 } else {
3832 3833 /*
3833 3834 * change client state on TE_CONN_CON event
3834 3835 */
3835 3836 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3836 3837 putnext(cl_ep->te_rq, ccmp);
3837 3838 }
3838 3839
3839 3840 /* Mark the both endpoints as accepted */
3840 3841 cl_ep->te_flag |= TL_ACCEPTED;
3841 3842 acc_ep->te_flag |= TL_ACCEPTED;
3842 3843
3843 3844 /*
3844 3845 * Allow client and acceptor to close.
3845 3846 */
3846 3847 tl_closeok(acc_ep);
3847 3848 if (client_noclose_set)
3848 3849 tl_closeok(cl_ep);
3849 3850 }
3850 3851
3851 3852
3852 3853
3853 3854
3854 3855 static void
3855 3856 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3856 3857 {
3857 3858 queue_t *wq;
3858 3859 struct T_discon_req *dr;
3859 3860 ssize_t msz;
3860 3861 tl_endpt_t *peer_tep = tep->te_conp;
3861 3862 tl_endpt_t *srv_tep = tep->te_oconp;
3862 3863 tl_icon_t *tip;
3863 3864 size_t size;
3864 3865 mblk_t *ackmp, *dimp, *respmp;
3865 3866 struct T_discon_ind *di;
3866 3867 t_scalar_t save_state, new_state;
3867 3868
3868 3869 if (tep->te_closing) {
3869 3870 freemsg(mp);
3870 3871 return;
3871 3872 }
3872 3873
3873 3874 if ((peer_tep != NULL) && peer_tep->te_closing) {
3874 3875 TL_UNCONNECT(tep->te_conp);
3875 3876 peer_tep = NULL;
3876 3877 }
3877 3878 if ((srv_tep != NULL) && srv_tep->te_closing) {
3878 3879 TL_UNCONNECT(tep->te_oconp);
3879 3880 srv_tep = NULL;
3880 3881 }
3881 3882
3882 3883 wq = tep->te_wq;
3883 3884
3884 3885 /*
3885 3886 * preallocate memory for:
3886 3887 * 1. max of T_ERROR_ACK and T_OK_ACK
3887 3888 * ==> known max T_ERROR_ACK
3888 3889 * 2. for T_DISCON_IND
3889 3890 */
3890 3891 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3891 3892 if (! ackmp) {
3892 3893 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3893 3894 return;
3894 3895 }
3895 3896 /*
3896 3897 * memory committed for T_OK_ACK/T_ERROR_ACK now
3897 3898 * will be committed for T_DISCON_IND later
3898 3899 */
3899 3900
3900 3901 dr = (struct T_discon_req *)mp->b_rptr;
3901 3902 msz = MBLKL(mp);
3902 3903
3903 3904 /*
3904 3905 * validate the state
3905 3906 */
3906 3907 save_state = new_state = tep->te_state;
3907 3908 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3908 3909 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3909 3910 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3910 3911 SL_TRACE|SL_ERROR,
3911 3912 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3912 3913 tep->te_state));
3913 3914 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3914 3915 freemsg(mp);
3915 3916 return;
3916 3917 }
3917 3918 /*
3918 3919 * Defer committing the state change until it is determined if
3919 3920 * the message will be queued with the tl_icon or not.
3920 3921 */
3921 3922 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3922 3923
3923 3924 /* validate the message */
3924 3925 if (msz < sizeof (struct T_discon_req)) {
3925 3926 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3926 3927 "tl_discon_req:invalid message"));
3927 3928 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3928 3929 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3929 3930 freemsg(mp);
3930 3931 return;
3931 3932 }
3932 3933
3933 3934 /*
3934 3935 * if server, then validate that client exists
3935 3936 * by connection sequence number etc.
3936 3937 */
3937 3938 if (tep->te_nicon > 0) { /* server */
3938 3939
3939 3940 /*
3940 3941 * search server list for disconnect client
3941 3942 */
3942 3943 tip = tl_icon_find(tep, dr->SEQ_number);
3943 3944 if (tip == NULL) {
3944 3945 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3945 3946 SL_TRACE|SL_ERROR,
3946 3947 "tl_discon_req:no disconnect endpoint"));
3947 3948 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3948 3949 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3949 3950 freemsg(mp);
3950 3951 return;
3951 3952 }
3952 3953 /*
3953 3954 * If ti_tep is NULL the client has already closed. In this case
3954 3955 * the code below will avoid any action on the client side.
3955 3956 */
3956 3957
3957 3958 IMPLY(tip->ti_tep != NULL,
3958 3959 tip->ti_tep->te_seqno == dr->SEQ_number);
3959 3960 peer_tep = tip->ti_tep;
3960 3961 }
3961 3962
3962 3963 /*
3963 3964 * preallocate now for T_DISCON_IND
3964 3965 * ack validity of request (T_OK_ACK) after memory committed
3965 3966 */
3966 3967 size = sizeof (struct T_discon_ind);
3967 3968 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3968 3969 tl_memrecover(wq, mp, size);
3969 3970 freemsg(ackmp);
3970 3971 return;
3971 3972 }
3972 3973
3973 3974 /*
3974 3975 * prepare message to ack validity of request
3975 3976 */
3976 3977 if (tep->te_nicon == 0)
3977 3978 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3978 3979 else
3979 3980 if (tep->te_nicon == 1)
3980 3981 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3981 3982 else
3982 3983 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3983 3984
3984 3985 /*
3985 3986 * Flushing queues according to TPI. Using the old state.
3986 3987 */
3987 3988 if ((tep->te_nicon <= 1) &&
3988 3989 ((save_state == TS_DATA_XFER) ||
3989 3990 (save_state == TS_WIND_ORDREL) ||
3990 3991 (save_state == TS_WREQ_ORDREL)))
3991 3992 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3992 3993
3993 3994 /* send T_OK_ACK up */
3994 3995 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3995 3996
3996 3997 /*
3997 3998 * now do disconnect business
3998 3999 */
3999 4000 if (tep->te_nicon > 0) { /* listener */
4000 4001 if (peer_tep != NULL && !peer_tep->te_closing) {
4001 4002 /*
4002 4003 * disconnect incoming connect request pending to tep
4003 4004 */
4004 4005 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4005 4006 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4006 4007 SL_TRACE|SL_ERROR,
4007 4008 "tl_discon_req: reallocb failed"));
4008 4009 tep->te_state = new_state;
4009 4010 tl_merror(wq, respmp, ENOMEM);
4010 4011 return;
4011 4012 }
4012 4013 di = (struct T_discon_ind *)dimp->b_rptr;
4013 4014 di->SEQ_number = BADSEQNUM;
4014 4015 save_state = peer_tep->te_state;
4015 4016 peer_tep->te_state = TS_IDLE;
4016 4017
4017 4018 TL_REMOVE_PEER(peer_tep->te_oconp);
4018 4019 enableok(peer_tep->te_wq);
4019 4020 TL_QENABLE(peer_tep);
4020 4021 } else {
4021 4022 freemsg(respmp);
4022 4023 dimp = NULL;
4023 4024 }
4024 4025
4025 4026 /*
4026 4027 * remove endpoint from incoming connection list
4027 4028 * - remove disconnect client from list on server
4028 4029 */
4029 4030 tl_freetip(tep, tip);
4030 4031 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4031 4032 /*
4032 4033 * disconnect an outgoing request pending from tep
4033 4034 */
4034 4035
4035 4036 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4036 4037 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4037 4038 SL_TRACE|SL_ERROR,
4038 4039 "tl_discon_req: reallocb failed"));
4039 4040 tep->te_state = new_state;
4040 4041 tl_merror(wq, respmp, ENOMEM);
4041 4042 return;
4042 4043 }
4043 4044 di = (struct T_discon_ind *)dimp->b_rptr;
4044 4045 DB_TYPE(dimp) = M_PROTO;
4045 4046 di->PRIM_type = T_DISCON_IND;
4046 4047 di->DISCON_reason = ECONNRESET;
4047 4048 di->SEQ_number = tep->te_seqno;
4048 4049
4049 4050 /*
4050 4051 * If this is a socket the T_DISCON_IND is queued with
4051 4052 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4052 4053 * from the list of pending connections.
4053 4054 * Note that when te_oconp is set the peer better have
4054 4055 * a t_connind_t for the client.
4055 4056 */
4056 4057 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4057 4058 /*
4058 4059 * No need to check that
4059 4060 * ti_tep == NULL since the T_DISCON_IND
4060 4061 * takes precedence over other queued
4061 4062 * messages.
4062 4063 */
4063 4064 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4064 4065 peer_tep = NULL;
4065 4066 dimp = NULL;
4066 4067 /*
4067 4068 * Can't clear te_oconp since tl_co_unconnect needs
4068 4069 * it as a hint not to free the tep.
4069 4070 * Keep the state unchanged since tl_conn_res inspects
4070 4071 * it.
4071 4072 */
4072 4073 new_state = tep->te_state;
4073 4074 } else {
4074 4075 /* Found - delete it */
4075 4076 tip = tl_icon_find(peer_tep, tep->te_seqno);
4076 4077 if (tip != NULL) {
4077 4078 ASSERT(tep == tip->ti_tep);
4078 4079 save_state = peer_tep->te_state;
4079 4080 if (peer_tep->te_nicon == 1)
4080 4081 peer_tep->te_state =
4081 4082 NEXTSTATE(TE_DISCON_IND2,
4082 4083 peer_tep->te_state);
4083 4084 else
4084 4085 peer_tep->te_state =
4085 4086 NEXTSTATE(TE_DISCON_IND3,
4086 4087 peer_tep->te_state);
4087 4088 tl_freetip(peer_tep, tip);
4088 4089 }
4089 4090 ASSERT(tep->te_oconp != NULL);
4090 4091 TL_UNCONNECT(tep->te_oconp);
4091 4092 }
4092 4093 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4093 4094 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4094 4095 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4095 4096 SL_TRACE|SL_ERROR,
4096 4097 "tl_discon_req: reallocb failed"));
4097 4098 tep->te_state = new_state;
4098 4099 tl_merror(wq, respmp, ENOMEM);
4099 4100 return;
4100 4101 }
4101 4102 di = (struct T_discon_ind *)dimp->b_rptr;
4102 4103 di->SEQ_number = BADSEQNUM;
4103 4104
4104 4105 save_state = peer_tep->te_state;
4105 4106 peer_tep->te_state = TS_IDLE;
4106 4107 } else {
4107 4108 /* Not connected */
4108 4109 tep->te_state = new_state;
4109 4110 freemsg(respmp);
4110 4111 return;
4111 4112 }
4112 4113
4113 4114 /* Commit state changes */
4114 4115 tep->te_state = new_state;
4115 4116
4116 4117 if (peer_tep == NULL) {
4117 4118 ASSERT(dimp == NULL);
4118 4119 goto done;
4119 4120 }
4120 4121 /*
4121 4122 * Flush queues on peer before sending up
4122 4123 * T_DISCON_IND according to TPI
4123 4124 */
4124 4125
4125 4126 if ((save_state == TS_DATA_XFER) ||
4126 4127 (save_state == TS_WIND_ORDREL) ||
4127 4128 (save_state == TS_WREQ_ORDREL))
4128 4129 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4129 4130
4130 4131 DB_TYPE(dimp) = M_PROTO;
4131 4132 di->PRIM_type = T_DISCON_IND;
4132 4133 di->DISCON_reason = ECONNRESET;
4133 4134
4134 4135 /*
4135 4136 * data blocks already linked into dimp by reallocb()
4136 4137 */
4137 4138 /*
4138 4139 * send indication message to peer user module
4139 4140 */
4140 4141 ASSERT(dimp != NULL);
4141 4142 putnext(peer_tep->te_rq, dimp);
4142 4143 done:
4143 4144 if (tep->te_conp) { /* disconnect pointers if connected */
4144 4145 ASSERT(! peer_tep->te_closing);
4145 4146
4146 4147 /*
4147 4148 * Messages may be queued on peer's write queue
4148 4149 * waiting to be processed by its write service
4149 4150 * procedure. Before the pointer to the peer transport
4150 4151 * structure is set to NULL, qenable the peer's write
4151 4152 * queue so that the queued up messages are processed.
4152 4153 */
4153 4154 if ((save_state == TS_DATA_XFER) ||
4154 4155 (save_state == TS_WIND_ORDREL) ||
4155 4156 (save_state == TS_WREQ_ORDREL))
4156 4157 TL_QENABLE(peer_tep);
4157 4158 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4158 4159 TL_UNCONNECT(peer_tep->te_conp);
4159 4160 if (! IS_SOCKET(tep)) {
4160 4161 /*
4161 4162 * unlink the streams
4162 4163 */
4163 4164 tep->te_wq->q_next = NULL;
4164 4165 peer_tep->te_wq->q_next = NULL;
4165 4166 }
4166 4167 TL_UNCONNECT(tep->te_conp);
4167 4168 }
4168 4169 }
4169 4170
4170 4171 static void
4171 4172 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4172 4173 {
4173 4174 if (!tep->te_closing)
4174 4175 tl_addr_req(mp, tep);
4175 4176 else
4176 4177 freemsg(mp);
4177 4178
4178 4179 tl_serializer_exit(tep);
4179 4180 tl_refrele(tep);
4180 4181 }
4181 4182
4182 4183 static void
4183 4184 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4184 4185 {
4185 4186 queue_t *wq;
4186 4187 size_t ack_sz;
4187 4188 mblk_t *ackmp;
4188 4189 struct T_addr_ack *taa;
4189 4190
4190 4191 if (tep->te_closing) {
4191 4192 freemsg(mp);
4192 4193 return;
4193 4194 }
4194 4195
4195 4196 wq = tep->te_wq;
4196 4197
4197 4198 /*
4198 4199 * Note: T_ADDR_REQ message has only PRIM_type field
4199 4200 * so it is already validated earlier.
4200 4201 */
4201 4202
4202 4203 if (IS_CLTS(tep) ||
4203 4204 (tep->te_state > TS_WREQ_ORDREL) ||
4204 4205 (tep->te_state < TS_DATA_XFER)) {
4205 4206 /*
4206 4207 * Either connectionless or connection oriented but not
4207 4208 * in connected data transfer state or half-closed states.
4208 4209 */
4209 4210 ack_sz = sizeof (struct T_addr_ack);
4210 4211 if (tep->te_state >= TS_IDLE)
4211 4212 /* is bound */
4212 4213 ack_sz += tep->te_alen;
4213 4214 ackmp = reallocb(mp, ack_sz, 0);
4214 4215 if (ackmp == NULL) {
4215 4216 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4216 4217 SL_TRACE|SL_ERROR,
4217 4218 "tl_addr_req: reallocb failed"));
4218 4219 tl_memrecover(wq, mp, ack_sz);
4219 4220 return;
4220 4221 }
4221 4222
4222 4223 taa = (struct T_addr_ack *)ackmp->b_rptr;
4223 4224
4224 4225 bzero(taa, sizeof (struct T_addr_ack));
4225 4226
4226 4227 taa->PRIM_type = T_ADDR_ACK;
4227 4228 ackmp->b_datap->db_type = M_PCPROTO;
4228 4229 ackmp->b_wptr = (uchar_t *)&taa[1];
4229 4230
4230 4231 if (tep->te_state >= TS_IDLE) {
4231 4232 /* endpoint is bound */
4232 4233 taa->LOCADDR_length = tep->te_alen;
4233 4234 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4234 4235
4235 4236 bcopy(tep->te_abuf, ackmp->b_wptr,
4236 4237 tep->te_alen);
4237 4238 ackmp->b_wptr += tep->te_alen;
4238 4239 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4239 4240 }
4240 4241
4241 4242 (void) qreply(wq, ackmp);
4242 4243 } else {
4243 4244 ASSERT(tep->te_state == TS_DATA_XFER ||
4244 4245 tep->te_state == TS_WIND_ORDREL ||
↓ open down ↓ |
4208 lines elided |
↑ open up ↑ |
4245 4246 tep->te_state == TS_WREQ_ORDREL);
4246 4247 /* connection oriented in data transfer */
4247 4248 tl_connected_cots_addr_req(mp, tep);
4248 4249 }
4249 4250 }
4250 4251
4251 4252
4252 4253 static void
4253 4254 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4254 4255 {
4255 - tl_endpt_t *peer_tep;
4256 + tl_endpt_t *peer_tep = tep->te_conp;
4256 4257 size_t ack_sz;
4257 4258 mblk_t *ackmp;
4258 4259 struct T_addr_ack *taa;
4259 4260 uchar_t *addr_startp;
4260 4261
4261 4262 if (tep->te_closing) {
4262 4263 freemsg(mp);
4263 4264 return;
4264 4265 }
4265 4266
4267 + if (peer_tep == NULL || peer_tep->te_closing) {
4268 + tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4269 + return;
4270 + }
4271 +
4266 4272 ASSERT(tep->te_state >= TS_IDLE);
4267 4273
4268 4274 ack_sz = sizeof (struct T_addr_ack);
4269 4275 ack_sz += T_ALIGN(tep->te_alen);
4270 - peer_tep = tep->te_conp;
4271 4276 ack_sz += peer_tep->te_alen;
4272 4277
4273 4278 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4274 4279 if (ackmp == NULL) {
4275 4280 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4276 4281 "tl_connected_cots_addr_req: reallocb failed"));
4277 4282 tl_memrecover(tep->te_wq, mp, ack_sz);
4278 4283 return;
4279 4284 }
4280 4285
4281 4286 taa = (struct T_addr_ack *)ackmp->b_rptr;
4282 4287
4283 4288 /* endpoint is bound */
4284 4289 taa->LOCADDR_length = tep->te_alen;
4285 4290 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4286 4291
4287 4292 addr_startp = (uchar_t *)&taa[1];
4288 4293
4289 4294 bcopy(tep->te_abuf, addr_startp,
4290 4295 tep->te_alen);
4291 4296
4292 4297 taa->REMADDR_length = peer_tep->te_alen;
4293 4298 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4294 4299 taa->LOCADDR_length);
4295 4300 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4296 4301 bcopy(peer_tep->te_abuf, addr_startp,
4297 4302 peer_tep->te_alen);
4298 4303 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4299 4304 taa->REMADDR_offset + peer_tep->te_alen;
4300 4305 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4301 4306
4302 4307 putnext(tep->te_rq, ackmp);
4303 4308 }
4304 4309
4305 4310 static void
4306 4311 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4307 4312 {
4308 4313 if (IS_CLTS(tep)) {
4309 4314 *ia = tl_clts_info_ack;
4310 4315 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4311 4316 } else {
4312 4317 *ia = tl_cots_info_ack;
4313 4318 if (IS_COTSORD(tep))
4314 4319 ia->SERV_type = T_COTS_ORD;
4315 4320 }
4316 4321 ia->TIDU_size = tl_tidusz;
4317 4322 ia->CURRENT_state = tep->te_state;
4318 4323 }
4319 4324
4320 4325 /*
4321 4326 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4322 4327 * tl_wput.
4323 4328 */
4324 4329 static void
4325 4330 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4326 4331 {
4327 4332 mblk_t *ackmp;
4328 4333 t_uscalar_t cap_bits1;
4329 4334 struct T_capability_ack *tcap;
4330 4335
4331 4336 if (tep->te_closing) {
4332 4337 freemsg(mp);
4333 4338 return;
4334 4339 }
4335 4340
4336 4341 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4337 4342
4338 4343 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4339 4344 M_PCPROTO, T_CAPABILITY_ACK);
4340 4345 if (ackmp == NULL) {
4341 4346 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4342 4347 "tl_capability_req: reallocb failed"));
4343 4348 tl_memrecover(tep->te_wq, mp,
4344 4349 sizeof (struct T_capability_ack));
4345 4350 return;
4346 4351 }
4347 4352
4348 4353 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4349 4354 tcap->CAP_bits1 = 0;
4350 4355
4351 4356 if (cap_bits1 & TC1_INFO) {
4352 4357 tl_copy_info(&tcap->INFO_ack, tep);
4353 4358 tcap->CAP_bits1 |= TC1_INFO;
4354 4359 }
4355 4360
4356 4361 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4357 4362 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4358 4363 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4359 4364 }
4360 4365
4361 4366 putnext(tep->te_rq, ackmp);
4362 4367 }
4363 4368
4364 4369 static void
4365 4370 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4366 4371 {
4367 4372 if (! tep->te_closing)
4368 4373 tl_info_req(mp, tep);
4369 4374 else
4370 4375 freemsg(mp);
4371 4376
4372 4377 tl_serializer_exit(tep);
4373 4378 tl_refrele(tep);
4374 4379 }
4375 4380
4376 4381 static void
4377 4382 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4378 4383 {
4379 4384 mblk_t *ackmp;
4380 4385
4381 4386 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4382 4387 M_PCPROTO, T_INFO_ACK);
4383 4388 if (ackmp == NULL) {
4384 4389 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4385 4390 "tl_info_req: reallocb failed"));
4386 4391 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4387 4392 return;
4388 4393 }
4389 4394
4390 4395 /*
4391 4396 * fill in T_INFO_ACK contents
4392 4397 */
4393 4398 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4394 4399
4395 4400 /*
4396 4401 * send ack message
4397 4402 */
4398 4403 putnext(tep->te_rq, ackmp);
4399 4404 }
4400 4405
4401 4406 /*
4402 4407 * Handle M_DATA, T_data_req and T_optdata_req.
4403 4408 * If this is a socket pass through T_optdata_req options unmodified.
4404 4409 */
4405 4410 static void
4406 4411 tl_data(mblk_t *mp, tl_endpt_t *tep)
4407 4412 {
4408 4413 queue_t *wq = tep->te_wq;
4409 4414 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4410 4415 ssize_t msz = MBLKL(mp);
4411 4416 tl_endpt_t *peer_tep;
4412 4417 queue_t *peer_rq;
4413 4418 boolean_t closing = tep->te_closing;
4414 4419
4415 4420 if (IS_CLTS(tep)) {
4416 4421 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4417 4422 SL_TRACE|SL_ERROR,
4418 4423 "tl_wput:clts:unattached M_DATA"));
4419 4424 if (!closing) {
4420 4425 tl_merror(wq, mp, EPROTO);
4421 4426 } else {
4422 4427 freemsg(mp);
4423 4428 }
4424 4429 return;
4425 4430 }
4426 4431
4427 4432 /*
4428 4433 * If the endpoint is closing it should still forward any data to the
4429 4434 * peer (if it has one). If it is not allowed to forward it can just
4430 4435 * free the message.
4431 4436 */
4432 4437 if (closing &&
4433 4438 (tep->te_state != TS_DATA_XFER) &&
4434 4439 (tep->te_state != TS_WREQ_ORDREL)) {
4435 4440 freemsg(mp);
4436 4441 return;
4437 4442 }
4438 4443
4439 4444 if (DB_TYPE(mp) == M_PROTO) {
4440 4445 if (prim->type == T_DATA_REQ &&
4441 4446 msz < sizeof (struct T_data_req)) {
4442 4447 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4443 4448 SL_TRACE|SL_ERROR,
4444 4449 "tl_data:T_DATA_REQ:invalid message"));
4445 4450 if (!closing) {
4446 4451 tl_merror(wq, mp, EPROTO);
4447 4452 } else {
4448 4453 freemsg(mp);
4449 4454 }
4450 4455 return;
4451 4456 } else if (prim->type == T_OPTDATA_REQ &&
4452 4457 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4453 4458 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4454 4459 SL_TRACE|SL_ERROR,
4455 4460 "tl_data:T_OPTDATA_REQ:invalid message"));
4456 4461 if (!closing) {
4457 4462 tl_merror(wq, mp, EPROTO);
4458 4463 } else {
4459 4464 freemsg(mp);
4460 4465 }
4461 4466 return;
4462 4467 }
4463 4468 }
4464 4469
4465 4470 /*
4466 4471 * connection oriented provider
4467 4472 */
4468 4473 switch (tep->te_state) {
4469 4474 case TS_IDLE:
4470 4475 /*
4471 4476 * Other end not here - do nothing.
4472 4477 */
4473 4478 freemsg(mp);
4474 4479 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4475 4480 "tl_data:cots with endpoint idle"));
4476 4481 return;
4477 4482
4478 4483 case TS_DATA_XFER:
4479 4484 /* valid states */
4480 4485 if (tep->te_conp != NULL)
4481 4486 break;
4482 4487
4483 4488 if (tep->te_oconp == NULL) {
4484 4489 if (!closing) {
4485 4490 tl_merror(wq, mp, EPROTO);
4486 4491 } else {
4487 4492 freemsg(mp);
4488 4493 }
4489 4494 return;
4490 4495 }
4491 4496 /*
4492 4497 * For a socket the T_CONN_CON is sent early thus
4493 4498 * the peer might not yet have accepted the connection.
4494 4499 * If we are closing queue the packet with the T_CONN_IND.
4495 4500 * Otherwise defer processing the packet until the peer
4496 4501 * accepts the connection.
4497 4502 * Note that the queue is noenabled when we go into this
4498 4503 * state.
4499 4504 */
4500 4505 if (!closing) {
4501 4506 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4502 4507 SL_TRACE|SL_ERROR,
4503 4508 "tl_data: ocon"));
4504 4509 TL_PUTBQ(tep, mp);
4505 4510 return;
4506 4511 }
4507 4512 if (DB_TYPE(mp) == M_PROTO) {
4508 4513 if (msz < sizeof (t_scalar_t)) {
4509 4514 freemsg(mp);
4510 4515 return;
4511 4516 }
4512 4517 /* reuse message block - just change REQ to IND */
4513 4518 if (prim->type == T_DATA_REQ)
4514 4519 prim->type = T_DATA_IND;
4515 4520 else
4516 4521 prim->type = T_OPTDATA_IND;
4517 4522 }
4518 4523 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4519 4524 return;
4520 4525
4521 4526 case TS_WREQ_ORDREL:
4522 4527 if (tep->te_conp == NULL) {
4523 4528 /*
4524 4529 * Other end closed - generate discon_ind
4525 4530 * with reason 0 to cause an EPIPE but no
4526 4531 * read side error on AF_UNIX sockets.
4527 4532 */
4528 4533 freemsg(mp);
4529 4534 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4530 4535 SL_TRACE|SL_ERROR,
4531 4536 "tl_data: WREQ_ORDREL and no peer"));
4532 4537 tl_discon_ind(tep, 0);
4533 4538 return;
4534 4539 }
4535 4540 break;
4536 4541
4537 4542 default:
4538 4543 /* invalid state for event TE_DATA_REQ */
4539 4544 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4540 4545 "tl_data:cots:out of state"));
4541 4546 tl_merror(wq, mp, EPROTO);
4542 4547 return;
4543 4548 }
4544 4549 /*
4545 4550 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4546 4551 * (State stays same on this event)
4547 4552 */
4548 4553
4549 4554 /*
4550 4555 * get connected endpoint
4551 4556 */
4552 4557 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4553 4558 freemsg(mp);
4554 4559 /* Peer closed */
4555 4560 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4556 4561 "tl_data: peer gone"));
4557 4562 return;
4558 4563 }
4559 4564
4560 4565 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4561 4566 peer_rq = peer_tep->te_rq;
4562 4567
4563 4568 /*
4564 4569 * Put it back if flow controlled
4565 4570 * Note: Messages already on queue when we are closing is bounded
4566 4571 * so we can ignore flow control.
4567 4572 */
4568 4573 if (!canputnext(peer_rq) && !closing) {
4569 4574 TL_PUTBQ(tep, mp);
4570 4575 return;
4571 4576 }
4572 4577
4573 4578 /*
4574 4579 * validate peer state
4575 4580 */
4576 4581 switch (peer_tep->te_state) {
4577 4582 case TS_DATA_XFER:
4578 4583 case TS_WIND_ORDREL:
4579 4584 /* valid states */
4580 4585 break;
4581 4586 default:
4582 4587 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4583 4588 "tl_data:rx side:invalid state"));
4584 4589 tl_merror(peer_tep->te_wq, mp, EPROTO);
4585 4590 return;
4586 4591 }
4587 4592 if (DB_TYPE(mp) == M_PROTO) {
4588 4593 /* reuse message block - just change REQ to IND */
4589 4594 if (prim->type == T_DATA_REQ)
4590 4595 prim->type = T_DATA_IND;
4591 4596 else
4592 4597 prim->type = T_OPTDATA_IND;
4593 4598 }
4594 4599 /*
4595 4600 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4596 4601 * (peer state stays same on this event)
4597 4602 */
4598 4603 /*
4599 4604 * send data to connected peer
4600 4605 */
4601 4606 putnext(peer_rq, mp);
4602 4607 }
4603 4608
4604 4609
4605 4610
4606 4611 static void
4607 4612 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4608 4613 {
4609 4614 queue_t *wq = tep->te_wq;
4610 4615 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4611 4616 ssize_t msz = MBLKL(mp);
4612 4617 tl_endpt_t *peer_tep;
4613 4618 queue_t *peer_rq;
4614 4619 boolean_t closing = tep->te_closing;
4615 4620
4616 4621 if (msz < sizeof (struct T_exdata_req)) {
4617 4622 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4618 4623 "tl_exdata:invalid message"));
4619 4624 if (!closing) {
4620 4625 tl_merror(wq, mp, EPROTO);
4621 4626 } else {
4622 4627 freemsg(mp);
4623 4628 }
4624 4629 return;
4625 4630 }
4626 4631
4627 4632 /*
4628 4633 * If the endpoint is closing it should still forward any data to the
4629 4634 * peer (if it has one). If it is not allowed to forward it can just
4630 4635 * free the message.
4631 4636 */
4632 4637 if (closing &&
4633 4638 (tep->te_state != TS_DATA_XFER) &&
4634 4639 (tep->te_state != TS_WREQ_ORDREL)) {
4635 4640 freemsg(mp);
4636 4641 return;
4637 4642 }
4638 4643
4639 4644 /*
4640 4645 * validate state
4641 4646 */
4642 4647 switch (tep->te_state) {
4643 4648 case TS_IDLE:
4644 4649 /*
4645 4650 * Other end not here - do nothing.
4646 4651 */
4647 4652 freemsg(mp);
4648 4653 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4649 4654 "tl_exdata:cots with endpoint idle"));
4650 4655 return;
4651 4656
4652 4657 case TS_DATA_XFER:
4653 4658 /* valid states */
4654 4659 if (tep->te_conp != NULL)
4655 4660 break;
4656 4661
4657 4662 if (tep->te_oconp == NULL) {
4658 4663 if (!closing) {
4659 4664 tl_merror(wq, mp, EPROTO);
4660 4665 } else {
4661 4666 freemsg(mp);
4662 4667 }
4663 4668 return;
4664 4669 }
4665 4670 /*
4666 4671 * For a socket the T_CONN_CON is sent early thus
4667 4672 * the peer might not yet have accepted the connection.
4668 4673 * If we are closing queue the packet with the T_CONN_IND.
4669 4674 * Otherwise defer processing the packet until the peer
4670 4675 * accepts the connection.
4671 4676 * Note that the queue is noenabled when we go into this
4672 4677 * state.
4673 4678 */
4674 4679 if (!closing) {
4675 4680 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4676 4681 SL_TRACE|SL_ERROR,
4677 4682 "tl_exdata: ocon"));
4678 4683 TL_PUTBQ(tep, mp);
4679 4684 return;
4680 4685 }
4681 4686 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4682 4687 "tl_exdata: closing socket ocon"));
4683 4688 prim->type = T_EXDATA_IND;
4684 4689 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4685 4690 return;
4686 4691
4687 4692 case TS_WREQ_ORDREL:
4688 4693 if (tep->te_conp == NULL) {
4689 4694 /*
4690 4695 * Other end closed - generate discon_ind
4691 4696 * with reason 0 to cause an EPIPE but no
4692 4697 * read side error on AF_UNIX sockets.
4693 4698 */
4694 4699 freemsg(mp);
4695 4700 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4696 4701 SL_TRACE|SL_ERROR,
4697 4702 "tl_exdata: WREQ_ORDREL and no peer"));
4698 4703 tl_discon_ind(tep, 0);
4699 4704 return;
4700 4705 }
4701 4706 break;
4702 4707
4703 4708 default:
4704 4709 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4705 4710 SL_TRACE|SL_ERROR,
4706 4711 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4707 4712 tep->te_state));
4708 4713 tl_merror(wq, mp, EPROTO);
4709 4714 return;
4710 4715 }
4711 4716 /*
4712 4717 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4713 4718 * (state stays same on this event)
4714 4719 */
4715 4720
4716 4721 /*
4717 4722 * get connected endpoint
4718 4723 */
4719 4724 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4720 4725 freemsg(mp);
4721 4726 /* Peer closed */
4722 4727 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4723 4728 "tl_exdata: peer gone"));
4724 4729 return;
4725 4730 }
4726 4731
4727 4732 peer_rq = peer_tep->te_rq;
4728 4733
4729 4734 /*
4730 4735 * Put it back if flow controlled
4731 4736 * Note: Messages already on queue when we are closing is bounded
4732 4737 * so we can ignore flow control.
4733 4738 */
4734 4739 if (!canputnext(peer_rq) && !closing) {
4735 4740 TL_PUTBQ(tep, mp);
4736 4741 return;
4737 4742 }
4738 4743
4739 4744 /*
4740 4745 * validate state on peer
4741 4746 */
4742 4747 switch (peer_tep->te_state) {
4743 4748 case TS_DATA_XFER:
4744 4749 case TS_WIND_ORDREL:
4745 4750 /* valid states */
4746 4751 break;
4747 4752 default:
4748 4753 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4749 4754 "tl_exdata:rx side:invalid state"));
4750 4755 tl_merror(peer_tep->te_wq, mp, EPROTO);
4751 4756 return;
4752 4757 }
4753 4758 /*
4754 4759 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4755 4760 * (peer state stays same on this event)
4756 4761 */
4757 4762 /*
4758 4763 * reuse message block
4759 4764 */
4760 4765 prim->type = T_EXDATA_IND;
4761 4766
4762 4767 /*
4763 4768 * send data to connected peer
4764 4769 */
4765 4770 putnext(peer_rq, mp);
4766 4771 }
4767 4772
4768 4773
4769 4774
4770 4775 static void
4771 4776 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4772 4777 {
4773 4778 queue_t *wq = tep->te_wq;
4774 4779 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4775 4780 ssize_t msz = MBLKL(mp);
4776 4781 tl_endpt_t *peer_tep;
4777 4782 queue_t *peer_rq;
4778 4783 boolean_t closing = tep->te_closing;
4779 4784
4780 4785 if (msz < sizeof (struct T_ordrel_req)) {
4781 4786 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4782 4787 "tl_ordrel:invalid message"));
4783 4788 if (!closing) {
4784 4789 tl_merror(wq, mp, EPROTO);
4785 4790 } else {
4786 4791 freemsg(mp);
4787 4792 }
4788 4793 return;
4789 4794 }
4790 4795
4791 4796 /*
4792 4797 * validate state
4793 4798 */
4794 4799 switch (tep->te_state) {
4795 4800 case TS_DATA_XFER:
4796 4801 case TS_WREQ_ORDREL:
4797 4802 /* valid states */
4798 4803 if (tep->te_conp != NULL)
4799 4804 break;
4800 4805
4801 4806 if (tep->te_oconp == NULL)
4802 4807 break;
4803 4808
4804 4809 /*
4805 4810 * For a socket the T_CONN_CON is sent early thus
4806 4811 * the peer might not yet have accepted the connection.
4807 4812 * If we are closing queue the packet with the T_CONN_IND.
4808 4813 * Otherwise defer processing the packet until the peer
4809 4814 * accepts the connection.
4810 4815 * Note that the queue is noenabled when we go into this
4811 4816 * state.
4812 4817 */
4813 4818 if (!closing) {
4814 4819 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4815 4820 SL_TRACE|SL_ERROR,
4816 4821 "tl_ordlrel: ocon"));
4817 4822 TL_PUTBQ(tep, mp);
4818 4823 return;
4819 4824 }
4820 4825 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4821 4826 "tl_ordlrel: closing socket ocon"));
4822 4827 prim->type = T_ORDREL_IND;
4823 4828 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4824 4829 return;
4825 4830
4826 4831 default:
4827 4832 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4828 4833 SL_TRACE|SL_ERROR,
4829 4834 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4830 4835 tep->te_state));
4831 4836 if (!closing) {
4832 4837 tl_merror(wq, mp, EPROTO);
4833 4838 } else {
4834 4839 freemsg(mp);
4835 4840 }
4836 4841 return;
4837 4842 }
4838 4843 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4839 4844
4840 4845 /*
4841 4846 * get connected endpoint
4842 4847 */
4843 4848 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4844 4849 /* Peer closed */
4845 4850 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4846 4851 "tl_ordrel: peer gone"));
4847 4852 freemsg(mp);
4848 4853 return;
4849 4854 }
4850 4855
4851 4856 peer_rq = peer_tep->te_rq;
4852 4857
4853 4858 /*
4854 4859 * Put it back if flow controlled except when we are closing.
4855 4860 * Note: Messages already on queue when we are closing is bounded
4856 4861 * so we can ignore flow control.
4857 4862 */
4858 4863 if (! canputnext(peer_rq) && !closing) {
4859 4864 TL_PUTBQ(tep, mp);
4860 4865 return;
4861 4866 }
4862 4867
4863 4868 /*
4864 4869 * validate state on peer
4865 4870 */
4866 4871 switch (peer_tep->te_state) {
4867 4872 case TS_DATA_XFER:
4868 4873 case TS_WIND_ORDREL:
4869 4874 /* valid states */
4870 4875 break;
4871 4876 default:
4872 4877 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4873 4878 "tl_ordrel:rx side:invalid state"));
4874 4879 tl_merror(peer_tep->te_wq, mp, EPROTO);
4875 4880 return;
4876 4881 }
4877 4882 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4878 4883
4879 4884 /*
4880 4885 * reuse message block
4881 4886 */
4882 4887 prim->type = T_ORDREL_IND;
4883 4888 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4884 4889 "tl_ordrel: send ordrel_ind"));
4885 4890
4886 4891 /*
4887 4892 * send data to connected peer
4888 4893 */
4889 4894 putnext(peer_rq, mp);
4890 4895 }
4891 4896
4892 4897
4893 4898 /*
4894 4899 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4895 4900 */
4896 4901 static void
4897 4902 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4898 4903 {
4899 4904 size_t err_sz;
4900 4905 tl_endpt_t *tep;
4901 4906 struct T_unitdata_req *udreq;
4902 4907 mblk_t *err_mp;
4903 4908 t_scalar_t alen;
4904 4909 t_scalar_t olen;
4905 4910 struct T_uderror_ind *uderr;
4906 4911 uchar_t *addr_startp;
4907 4912
4908 4913 err_sz = sizeof (struct T_uderror_ind);
4909 4914 tep = (tl_endpt_t *)wq->q_ptr;
4910 4915 udreq = (struct T_unitdata_req *)mp->b_rptr;
4911 4916 alen = udreq->DEST_length;
4912 4917 olen = udreq->OPT_length;
4913 4918
4914 4919 if (alen > 0)
4915 4920 err_sz = T_ALIGN(err_sz + alen);
4916 4921 if (olen > 0)
4917 4922 err_sz += olen;
4918 4923
4919 4924 err_mp = allocb(err_sz, BPRI_MED);
4920 4925 if (! err_mp) {
4921 4926 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4922 4927 "tl_uderr:allocb failure"));
4923 4928 /*
4924 4929 * Note: no rollback of state needed as it does
4925 4930 * not change in connectionless transport
4926 4931 */
4927 4932 tl_memrecover(wq, mp, err_sz);
4928 4933 return;
4929 4934 }
4930 4935
4931 4936 DB_TYPE(err_mp) = M_PROTO;
4932 4937 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4933 4938 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4934 4939 uderr->PRIM_type = T_UDERROR_IND;
4935 4940 uderr->ERROR_type = err;
4936 4941 uderr->DEST_length = alen;
4937 4942 uderr->OPT_length = olen;
4938 4943 if (alen <= 0) {
4939 4944 uderr->DEST_offset = 0;
4940 4945 } else {
4941 4946 uderr->DEST_offset =
4942 4947 (t_scalar_t)sizeof (struct T_uderror_ind);
4943 4948 addr_startp = mp->b_rptr + udreq->DEST_offset;
4944 4949 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4945 4950 (size_t)alen);
4946 4951 }
4947 4952 if (olen <= 0) {
4948 4953 uderr->OPT_offset = 0;
4949 4954 } else {
4950 4955 uderr->OPT_offset =
4951 4956 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4952 4957 uderr->DEST_length);
4953 4958 addr_startp = mp->b_rptr + udreq->OPT_offset;
4954 4959 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4955 4960 (size_t)olen);
4956 4961 }
4957 4962 freemsg(mp);
4958 4963
4959 4964 /*
4960 4965 * send indication message
4961 4966 */
4962 4967 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4963 4968
4964 4969 qreply(wq, err_mp);
4965 4970 }
4966 4971
4967 4972 static void
4968 4973 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4969 4974 {
4970 4975 queue_t *wq = tep->te_wq;
4971 4976
4972 4977 if (!tep->te_closing && (wq->q_first != NULL)) {
4973 4978 TL_PUTQ(tep, mp);
4974 4979 } else if (tep->te_rq != NULL)
4975 4980 tl_unitdata(mp, tep);
4976 4981 else
4977 4982 freemsg(mp);
4978 4983
4979 4984 tl_serializer_exit(tep);
4980 4985 tl_refrele(tep);
4981 4986 }
4982 4987
4983 4988 /*
4984 4989 * Handle T_unitdata_req.
4985 4990 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4986 4991 * If this is a socket pass through options unmodified.
4987 4992 */
4988 4993 static void
4989 4994 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4990 4995 {
4991 4996 queue_t *wq = tep->te_wq;
4992 4997 soux_addr_t ux_addr;
4993 4998 tl_addr_t destaddr;
4994 4999 uchar_t *addr_startp;
4995 5000 tl_endpt_t *peer_tep;
4996 5001 struct T_unitdata_ind *udind;
4997 5002 struct T_unitdata_req *udreq;
4998 5003 ssize_t msz, ui_sz;
4999 5004 t_scalar_t alen, aoff, olen, ooff;
5000 5005 t_scalar_t oldolen = 0;
5001 5006 cred_t *cr = NULL;
5002 5007 pid_t cpid;
5003 5008
5004 5009 udreq = (struct T_unitdata_req *)mp->b_rptr;
5005 5010 msz = MBLKL(mp);
5006 5011
5007 5012 /*
5008 5013 * validate the state
5009 5014 */
5010 5015 if (tep->te_state != TS_IDLE) {
5011 5016 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5012 5017 SL_TRACE|SL_ERROR,
5013 5018 "tl_wput:T_CONN_REQ:out of state"));
5014 5019 tl_merror(wq, mp, EPROTO);
5015 5020 return;
5016 5021 }
5017 5022 /*
5018 5023 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5019 5024 * (state does not change on this event)
5020 5025 */
5021 5026
5022 5027 /*
5023 5028 * validate the message
5024 5029 * Note: dereference fields in struct inside message only
5025 5030 * after validating the message length.
5026 5031 */
5027 5032 if (msz < sizeof (struct T_unitdata_req)) {
5028 5033 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5029 5034 "tl_unitdata:invalid message length"));
5030 5035 tl_merror(wq, mp, EINVAL);
5031 5036 return;
5032 5037 }
5033 5038 alen = udreq->DEST_length;
5034 5039 aoff = udreq->DEST_offset;
5035 5040 oldolen = olen = udreq->OPT_length;
5036 5041 ooff = udreq->OPT_offset;
5037 5042 if (olen == 0)
5038 5043 ooff = 0;
5039 5044
5040 5045 if (IS_SOCKET(tep)) {
5041 5046 if ((alen != TL_SOUX_ADDRLEN) ||
5042 5047 (aoff < 0) ||
5043 5048 (aoff + alen > msz) ||
5044 5049 (olen < 0) || (ooff < 0) ||
5045 5050 ((olen > 0) && ((ooff + olen) > msz))) {
5046 5051 (void) (STRLOG(TL_ID, tep->te_minor,
5047 5052 1, SL_TRACE|SL_ERROR,
5048 5053 "tl_unitdata_req: invalid socket addr "
5049 5054 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5050 5055 (int)msz, alen, aoff, olen, ooff));
5051 5056 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5052 5057 return;
5053 5058 }
5054 5059 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5055 5060
5056 5061 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5057 5062 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5058 5063 (void) (STRLOG(TL_ID, tep->te_minor,
5059 5064 1, SL_TRACE|SL_ERROR,
5060 5065 "tl_conn_req: invalid socket magic"));
5061 5066 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5062 5067 return;
5063 5068 }
5064 5069 } else {
5065 5070 if ((alen < 0) ||
5066 5071 (aoff < 0) ||
5067 5072 ((alen > 0) && ((aoff + alen) > msz)) ||
5068 5073 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5069 5074 ((aoff + alen) < 0) ||
5070 5075 ((olen > 0) && ((ooff + olen) > msz)) ||
5071 5076 (olen < 0) ||
5072 5077 (ooff < 0) ||
5073 5078 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5074 5079 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5075 5080 SL_TRACE|SL_ERROR,
5076 5081 "tl_unitdata:invalid unit data message"));
5077 5082 tl_merror(wq, mp, EINVAL);
5078 5083 return;
5079 5084 }
5080 5085 }
5081 5086
5082 5087 /* Options not supported unless it's a socket */
5083 5088 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5084 5089 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5085 5090 "tl_unitdata:option use(unsupported) or zero len addr"));
5086 5091 tl_uderr(wq, mp, EPROTO);
5087 5092 return;
5088 5093 }
5089 5094 #ifdef DEBUG
5090 5095 /*
5091 5096 * Mild form of ASSERT()ion to detect broken TPI apps.
5092 5097 * if (! assertion)
5093 5098 * log warning;
5094 5099 */
5095 5100 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5096 5101 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5097 5102 "tl_unitdata:addr overlaps TPI message"));
5098 5103 }
5099 5104 #endif
5100 5105 /*
5101 5106 * get destination endpoint
5102 5107 */
5103 5108 destaddr.ta_alen = alen;
5104 5109 destaddr.ta_abuf = mp->b_rptr + aoff;
5105 5110 destaddr.ta_zoneid = tep->te_zoneid;
5106 5111
5107 5112 /*
5108 5113 * Check whether the destination is the same that was used previously
5109 5114 * and the destination endpoint is in the right state. If something is
5110 5115 * wrong, find destination again and cache it.
5111 5116 */
5112 5117 peer_tep = tep->te_lastep;
5113 5118
5114 5119 if ((peer_tep == NULL) || peer_tep->te_closing ||
5115 5120 (peer_tep->te_state != TS_IDLE) ||
5116 5121 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5117 5122 /*
5118 5123 * Not the same as cached destination , need to find the right
5119 5124 * destination.
5120 5125 */
5121 5126 peer_tep = (IS_SOCKET(tep) ?
5122 5127 tl_sock_find_peer(tep, &ux_addr) :
5123 5128 tl_find_peer(tep, &destaddr));
5124 5129
5125 5130 if (peer_tep == NULL) {
5126 5131 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5127 5132 SL_TRACE|SL_ERROR,
5128 5133 "tl_unitdata:no one at destination address"));
5129 5134 tl_uderr(wq, mp, ECONNRESET);
5130 5135 return;
5131 5136 }
5132 5137
5133 5138 /*
5134 5139 * Cache the new peer.
5135 5140 */
5136 5141 if (tep->te_lastep != NULL)
5137 5142 tl_refrele(tep->te_lastep);
5138 5143
5139 5144 tep->te_lastep = peer_tep;
5140 5145 }
5141 5146
5142 5147 if (peer_tep->te_state != TS_IDLE) {
5143 5148 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5144 5149 "tl_unitdata:provider in invalid state"));
5145 5150 tl_uderr(wq, mp, EPROTO);
5146 5151 return;
5147 5152 }
5148 5153
5149 5154 ASSERT(peer_tep->te_rq != NULL);
5150 5155
5151 5156 /*
5152 5157 * Put it back if flow controlled except when we are closing.
5153 5158 * Note: Messages already on queue when we are closing is bounded
5154 5159 * so we can ignore flow control.
5155 5160 */
5156 5161 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5157 5162 /* record what we are flow controlled on */
5158 5163 if (tep->te_flowq != NULL) {
5159 5164 list_remove(&tep->te_flowq->te_flowlist, tep);
5160 5165 }
5161 5166 list_insert_head(&peer_tep->te_flowlist, tep);
5162 5167 tep->te_flowq = peer_tep;
5163 5168 TL_PUTBQ(tep, mp);
5164 5169 return;
5165 5170 }
5166 5171 /*
5167 5172 * prepare indication message
5168 5173 */
5169 5174
5170 5175 /*
5171 5176 * calculate length of message
5172 5177 */
5173 5178 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5174 5179 cr = msg_getcred(mp, &cpid);
5175 5180 ASSERT(cr != NULL);
5176 5181
5177 5182 if (peer_tep->te_flag & TL_SETCRED) {
5178 5183 ASSERT(olen == 0);
5179 5184 olen = (t_scalar_t)sizeof (struct opthdr) +
5180 5185 OPTLEN(sizeof (tl_credopt_t));
5181 5186 /* 1 option only */
5182 5187 } else if (peer_tep->te_flag & TL_SETUCRED) {
5183 5188 ASSERT(olen == 0);
5184 5189 olen = (t_scalar_t)sizeof (struct opthdr) +
5185 5190 OPTLEN(ucredminsize(cr));
5186 5191 /* 1 option only */
5187 5192 } else {
5188 5193 /* Possibly more than one option */
5189 5194 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5190 5195 OPTLEN(ucredminsize(cr));
5191 5196 }
5192 5197 }
5193 5198
5194 5199 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) +
5195 5200 olen;
5196 5201 /*
5197 5202 * If the unitdata_ind fits and we are not adding options
5198 5203 * reuse the udreq mblk.
5199 5204 */
5200 5205 if (msz >= ui_sz && alen >= tep->te_alen &&
5201 5206 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5202 5207 /*
5203 5208 * Reuse the original mblk. Leave options in place.
5204 5209 */
5205 5210 udind = (struct T_unitdata_ind *)mp->b_rptr;
5206 5211 udind->PRIM_type = T_UNITDATA_IND;
5207 5212 udind->SRC_length = tep->te_alen;
5208 5213 addr_startp = mp->b_rptr + udind->SRC_offset;
5209 5214 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5210 5215 } else {
5211 5216 /* Allocate a new T_unidata_ind message */
5212 5217 mblk_t *ui_mp;
5213 5218
5214 5219 ui_mp = allocb(ui_sz, BPRI_MED);
5215 5220 if (! ui_mp) {
5216 5221 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5217 5222 "tl_unitdata:allocb failure:message queued"));
5218 5223 tl_memrecover(wq, mp, ui_sz);
5219 5224 return;
5220 5225 }
5221 5226
5222 5227 /*
5223 5228 * fill in T_UNITDATA_IND contents
5224 5229 */
5225 5230 DB_TYPE(ui_mp) = M_PROTO;
5226 5231 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5227 5232 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5228 5233 udind->PRIM_type = T_UNITDATA_IND;
5229 5234 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5230 5235 udind->SRC_length = tep->te_alen;
5231 5236 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5232 5237 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5233 5238 udind->OPT_offset =
5234 5239 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5235 5240 udind->OPT_length = olen;
5236 5241 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5237 5242
5238 5243 if (oldolen != 0) {
5239 5244 bcopy((void *)((uintptr_t)udreq + ooff),
5240 5245 (void *)((uintptr_t)udind +
5241 5246 udind->OPT_offset),
5242 5247 oldolen);
5243 5248 }
5244 5249 ASSERT(cr != NULL);
5245 5250
5246 5251 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5247 5252 oldolen, cr, cpid,
5248 5253 peer_tep->te_flag, peer_tep->te_credp);
5249 5254 } else {
5250 5255 bcopy((void *)((uintptr_t)udreq + ooff),
5251 5256 (void *)((uintptr_t)udind + udind->OPT_offset),
5252 5257 olen);
5253 5258 }
5254 5259
5255 5260 /*
5256 5261 * relink data blocks from mp to ui_mp
5257 5262 */
5258 5263 ui_mp->b_cont = mp->b_cont;
5259 5264 freeb(mp);
5260 5265 mp = ui_mp;
5261 5266 }
5262 5267 /*
5263 5268 * send indication message
5264 5269 */
5265 5270 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5266 5271 putnext(peer_tep->te_rq, mp);
5267 5272 }
5268 5273
5269 5274
5270 5275
5271 5276 /*
5272 5277 * Check if a given addr is in use.
5273 5278 * Endpoint ptr returned or NULL if not found.
5274 5279 * The name space is separate for each mode. This implies that
5275 5280 * sockets get their own name space.
5276 5281 */
5277 5282 static tl_endpt_t *
5278 5283 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5279 5284 {
5280 5285 tl_endpt_t *peer_tep = NULL;
5281 5286 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5282 5287 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5283 5288
5284 5289 ASSERT(! IS_SOCKET(tep));
5285 5290
5286 5291 ASSERT(ap != NULL && ap->ta_alen > 0);
5287 5292 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5288 5293 ASSERT(ap->ta_abuf != NULL);
5289 5294 EQUIV(rc == 0, peer_tep != NULL);
5290 5295 IMPLY(rc == 0,
5291 5296 (tep->te_zoneid == peer_tep->te_zoneid) &&
5292 5297 (tep->te_transport == peer_tep->te_transport));
5293 5298
5294 5299 if ((rc == 0) && (peer_tep->te_closing)) {
5295 5300 tl_refrele(peer_tep);
5296 5301 peer_tep = NULL;
5297 5302 }
5298 5303
5299 5304 return (peer_tep);
5300 5305 }
5301 5306
5302 5307 /*
5303 5308 * Find peer for a socket based on unix domain address.
5304 5309 * For implicit addresses our peer can be found by minor number in ai hash. For
5305 5310 * explicit binds we look vnode address at addr_hash.
5306 5311 */
5307 5312 static tl_endpt_t *
5308 5313 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5309 5314 {
5310 5315 tl_endpt_t *peer_tep = NULL;
5311 5316 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5312 5317 tep->te_aihash : tep->te_addrhash;
5313 5318 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5314 5319 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5315 5320
5316 5321 ASSERT(IS_SOCKET(tep));
5317 5322 EQUIV(rc == 0, peer_tep != NULL);
5318 5323 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5319 5324
5320 5325 if (peer_tep != NULL) {
5321 5326 /* Don't attempt to use closing peer. */
5322 5327 if (peer_tep->te_closing)
5323 5328 goto errout;
5324 5329
5325 5330 /*
5326 5331 * Cross-zone unix sockets are permitted, but for Trusted
5327 5332 * Extensions only, the "server" for these must be in the
5328 5333 * global zone.
5329 5334 */
5330 5335 if ((peer_tep->te_zoneid != tep->te_zoneid) &&
5331 5336 is_system_labeled() &&
5332 5337 (peer_tep->te_zoneid != GLOBAL_ZONEID))
5333 5338 goto errout;
5334 5339 }
5335 5340
5336 5341 return (peer_tep);
5337 5342
5338 5343 errout:
5339 5344 tl_refrele(peer_tep);
5340 5345 return (NULL);
5341 5346 }
5342 5347
5343 5348 /*
5344 5349 * Generate a free addr and return it in struct pointed by ap
5345 5350 * but allocating space for address buffer.
5346 5351 * The generated address will be at least 4 bytes long and, if req->ta_alen
5347 5352 * exceeds 4 bytes, be req->ta_alen bytes long.
5348 5353 *
5349 5354 * If address is found it will be inserted in the hash.
5350 5355 *
5351 5356 * If req->ta_alen is larger than the default alen (4 bytes) the last
5352 5357 * alen-4 bytes will always be the same as in req.
5353 5358 *
5354 5359 * Return 0 for failure.
5355 5360 * Return non-zero for success.
5356 5361 */
5357 5362 static boolean_t
5358 5363 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5359 5364 {
5360 5365 t_scalar_t alen;
5361 5366 uint32_t loopcnt; /* Limit loop to 2^32 */
5362 5367
5363 5368 ASSERT(tep->te_hash_hndl != NULL);
5364 5369 ASSERT(! IS_SOCKET(tep));
5365 5370
5366 5371 if (tep->te_hash_hndl == NULL)
5367 5372 return (B_FALSE);
5368 5373
5369 5374 /*
5370 5375 * check if default addr is in use
5371 5376 * if it is - bump it and try again
5372 5377 */
5373 5378 if (req == NULL) {
5374 5379 alen = sizeof (uint32_t);
5375 5380 } else {
5376 5381 alen = max(req->ta_alen, sizeof (uint32_t));
5377 5382 ASSERT(tep->te_zoneid == req->ta_zoneid);
5378 5383 }
5379 5384
5380 5385 if (tep->te_alen < alen) {
5381 5386 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5382 5387
5383 5388 /*
5384 5389 * Not enough space in tep->ta_ap to hold the address,
5385 5390 * allocate a bigger space.
5386 5391 */
5387 5392 if (abuf == NULL)
5388 5393 return (B_FALSE);
5389 5394
5390 5395 if (tep->te_alen > 0)
5391 5396 kmem_free(tep->te_abuf, tep->te_alen);
5392 5397
5393 5398 tep->te_alen = alen;
5394 5399 tep->te_abuf = abuf;
5395 5400 }
5396 5401
5397 5402 /* Copy in the address in req */
5398 5403 if (req != NULL) {
5399 5404 ASSERT(alen >= req->ta_alen);
5400 5405 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5401 5406 }
5402 5407
5403 5408 /*
5404 5409 * First try minor number then try default addresses.
5405 5410 */
5406 5411 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5407 5412
5408 5413 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5409 5414 if (mod_hash_insert_reserve(tep->te_addrhash,
5410 5415 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5411 5416 tep->te_hash_hndl) == 0) {
5412 5417 /*
5413 5418 * found free address
5414 5419 */
5415 5420 tep->te_flag |= TL_ADDRHASHED;
5416 5421 tep->te_hash_hndl = NULL;
5417 5422
5418 5423 return (B_TRUE); /* successful return */
5419 5424 }
5420 5425 /*
5421 5426 * Use default address.
5422 5427 */
5423 5428 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5424 5429 atomic_add_32(&tep->te_defaddr, 1);
5425 5430 }
5426 5431
5427 5432 /*
5428 5433 * Failed to find anything.
5429 5434 */
5430 5435 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5431 5436 "tl_get_any_addr:looped 2^32 times"));
5432 5437 return (B_FALSE);
5433 5438 }
5434 5439
5435 5440 /*
5436 5441 * reallocb + set r/w ptrs to reflect size.
5437 5442 */
5438 5443 static mblk_t *
5439 5444 tl_resizemp(mblk_t *mp, ssize_t new_size)
5440 5445 {
5441 5446 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5442 5447 return (NULL);
5443 5448
5444 5449 mp->b_rptr = DB_BASE(mp);
5445 5450 mp->b_wptr = mp->b_rptr + new_size;
5446 5451 return (mp);
5447 5452 }
5448 5453
5449 5454 static void
5450 5455 tl_cl_backenable(tl_endpt_t *tep)
5451 5456 {
5452 5457 list_t *l = &tep->te_flowlist;
5453 5458 tl_endpt_t *elp;
5454 5459
5455 5460 ASSERT(IS_CLTS(tep));
5456 5461
5457 5462 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5458 5463 ASSERT(tep->te_ser == elp->te_ser);
5459 5464 ASSERT(elp->te_flowq == tep);
5460 5465 if (! elp->te_closing)
5461 5466 TL_QENABLE(elp);
5462 5467 elp->te_flowq = NULL;
5463 5468 list_remove(l, elp);
5464 5469 }
5465 5470 }
5466 5471
5467 5472 /*
5468 5473 * Unconnect endpoints.
5469 5474 */
5470 5475 static void
5471 5476 tl_co_unconnect(tl_endpt_t *tep)
5472 5477 {
5473 5478 tl_endpt_t *peer_tep = tep->te_conp;
5474 5479 tl_endpt_t *srv_tep = tep->te_oconp;
5475 5480 list_t *l;
5476 5481 tl_icon_t *tip;
5477 5482 tl_endpt_t *cl_tep;
5478 5483 mblk_t *d_mp;
5479 5484
5480 5485 ASSERT(IS_COTS(tep));
5481 5486 /*
5482 5487 * If our peer is closing, don't use it.
5483 5488 */
5484 5489 if ((peer_tep != NULL) && peer_tep->te_closing) {
5485 5490 TL_UNCONNECT(tep->te_conp);
5486 5491 peer_tep = NULL;
5487 5492 }
5488 5493 if ((srv_tep != NULL) && srv_tep->te_closing) {
5489 5494 TL_UNCONNECT(tep->te_oconp);
5490 5495 srv_tep = NULL;
5491 5496 }
5492 5497
5493 5498 if (tep->te_nicon > 0) {
5494 5499 l = &tep->te_iconp;
5495 5500 /*
5496 5501 * If incoming requests pending, change state
5497 5502 * of clients on disconnect ind event and send
5498 5503 * discon_ind pdu to modules above them
5499 5504 * for server: all clients get disconnect
5500 5505 */
5501 5506
5502 5507 while (tep->te_nicon > 0) {
5503 5508 tip = list_head(l);
5504 5509 cl_tep = tip->ti_tep;
5505 5510
5506 5511 if (cl_tep == NULL) {
5507 5512 tl_freetip(tep, tip);
5508 5513 continue;
5509 5514 }
5510 5515
5511 5516 if (cl_tep->te_oconp != NULL) {
5512 5517 ASSERT(cl_tep != cl_tep->te_oconp);
5513 5518 TL_UNCONNECT(cl_tep->te_oconp);
5514 5519 }
5515 5520
5516 5521 if (cl_tep->te_closing) {
5517 5522 tl_freetip(tep, tip);
5518 5523 continue;
5519 5524 }
5520 5525
5521 5526 enableok(cl_tep->te_wq);
5522 5527 TL_QENABLE(cl_tep);
5523 5528 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5524 5529 if (d_mp != NULL) {
5525 5530 cl_tep->te_state = TS_IDLE;
5526 5531 putnext(cl_tep->te_rq, d_mp);
5527 5532 } else {
5528 5533 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5529 5534 SL_TRACE|SL_ERROR,
5530 5535 "tl_co_unconnect:icmng: "
5531 5536 "allocb failure"));
5532 5537 }
5533 5538 tl_freetip(tep, tip);
5534 5539 }
5535 5540 } else if (srv_tep != NULL) {
5536 5541 /*
5537 5542 * If outgoing request pending, change state
5538 5543 * of server on discon ind event
5539 5544 */
5540 5545
5541 5546 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5542 5547 IS_COTSORD(srv_tep) &&
5543 5548 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5544 5549 /*
5545 5550 * Queue ordrel_ind for server to be picked up
5546 5551 * when the connection is accepted.
5547 5552 */
5548 5553 d_mp = tl_ordrel_ind_alloc();
5549 5554 } else {
5550 5555 /*
5551 5556 * send discon_ind to server
5552 5557 */
5553 5558 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5554 5559 }
5555 5560 if (d_mp == NULL) {
5556 5561 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5557 5562 SL_TRACE|SL_ERROR,
5558 5563 "tl_co_unconnect:outgoing:allocb failure"));
5559 5564 TL_UNCONNECT(tep->te_oconp);
5560 5565 goto discon_peer;
5561 5566 }
5562 5567
5563 5568 /*
5564 5569 * If this is a socket the T_DISCON_IND is queued with
5565 5570 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5566 5571 * from the list of pending connections.
5567 5572 * Note that when te_oconp is set the peer better have
5568 5573 * a t_connind_t for the client.
5569 5574 */
5570 5575 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5571 5576 /*
5572 5577 * Queue the disconnection message.
5573 5578 */
5574 5579 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5575 5580 } else {
5576 5581 tip = tl_icon_find(srv_tep, tep->te_seqno);
5577 5582 if (tip == NULL) {
5578 5583 freemsg(d_mp);
5579 5584 } else {
5580 5585 ASSERT(tep == tip->ti_tep);
5581 5586 ASSERT(tep->te_ser == srv_tep->te_ser);
5582 5587 /*
5583 5588 * Delete tip from the server list.
5584 5589 */
5585 5590 if (srv_tep->te_nicon == 1) {
5586 5591 srv_tep->te_state =
5587 5592 NEXTSTATE(TE_DISCON_IND2,
5588 5593 srv_tep->te_state);
5589 5594 } else {
5590 5595 srv_tep->te_state =
5591 5596 NEXTSTATE(TE_DISCON_IND3,
5592 5597 srv_tep->te_state);
5593 5598 }
5594 5599 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5595 5600 T_DISCON_IND);
5596 5601 putnext(srv_tep->te_rq, d_mp);
5597 5602 tl_freetip(srv_tep, tip);
5598 5603 }
5599 5604 TL_UNCONNECT(tep->te_oconp);
5600 5605 srv_tep = NULL;
5601 5606 }
5602 5607 } else if (peer_tep != NULL) {
5603 5608 /*
5604 5609 * unconnect existing connection
5605 5610 * If connected, change state of peer on
5606 5611 * discon ind event and send discon ind pdu
5607 5612 * to module above it
5608 5613 */
5609 5614
5610 5615 ASSERT(tep->te_ser == peer_tep->te_ser);
5611 5616 if (IS_COTSORD(peer_tep) &&
5612 5617 (peer_tep->te_state == TS_WIND_ORDREL ||
5613 5618 peer_tep->te_state == TS_DATA_XFER)) {
5614 5619 /*
5615 5620 * send ordrel ind
5616 5621 */
5617 5622 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5618 5623 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5619 5624 peer_tep->te_state,
5620 5625 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5621 5626 d_mp = tl_ordrel_ind_alloc();
5622 5627 if (! d_mp) {
5623 5628 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5624 5629 SL_TRACE|SL_ERROR,
5625 5630 "tl_co_unconnect:connected:"
5626 5631 "allocb failure"));
5627 5632 /*
5628 5633 * Continue with cleaning up peer as
5629 5634 * this side may go away with the close
5630 5635 */
5631 5636 TL_QENABLE(peer_tep);
5632 5637 goto discon_peer;
5633 5638 }
5634 5639 peer_tep->te_state =
5635 5640 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5636 5641
5637 5642 putnext(peer_tep->te_rq, d_mp);
5638 5643 /*
5639 5644 * Handle flow control case. This will generate
5640 5645 * a t_discon_ind message with reason 0 if there
5641 5646 * is data queued on the write side.
5642 5647 */
5643 5648 TL_QENABLE(peer_tep);
5644 5649 } else if (IS_COTSORD(peer_tep) &&
5645 5650 peer_tep->te_state == TS_WREQ_ORDREL) {
5646 5651 /*
5647 5652 * Sent an ordrel_ind. We send a discon with
5648 5653 * with error 0 to inform that the peer is gone.
5649 5654 */
5650 5655 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5651 5656 SL_TRACE|SL_ERROR,
5652 5657 "tl_co_unconnect: discon in state %d",
5653 5658 tep->te_state));
5654 5659 tl_discon_ind(peer_tep, 0);
5655 5660 } else {
5656 5661 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5657 5662 SL_TRACE|SL_ERROR,
5658 5663 "tl_co_unconnect: state %d", tep->te_state));
5659 5664 tl_discon_ind(peer_tep, ECONNRESET);
5660 5665 }
5661 5666
5662 5667 discon_peer:
5663 5668 /*
5664 5669 * Disconnect cross-pointers only for close
5665 5670 */
5666 5671 if (tep->te_closing) {
5667 5672 peer_tep = tep->te_conp;
5668 5673 TL_REMOVE_PEER(peer_tep->te_conp);
5669 5674 TL_REMOVE_PEER(tep->te_conp);
5670 5675 }
5671 5676 }
5672 5677 }
5673 5678
5674 5679 /*
5675 5680 * Note: The following routine does not recover from allocb()
5676 5681 * failures
5677 5682 * The reason should be from the <sys/errno.h> space.
5678 5683 */
5679 5684 static void
5680 5685 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5681 5686 {
5682 5687 mblk_t *d_mp;
5683 5688
5684 5689 if (tep->te_closing)
5685 5690 return;
5686 5691
5687 5692 /*
5688 5693 * flush the queues.
5689 5694 */
5690 5695 flushq(tep->te_rq, FLUSHDATA);
5691 5696 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5692 5697
5693 5698 /*
5694 5699 * send discon ind
5695 5700 */
5696 5701 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5697 5702 if (! d_mp) {
5698 5703 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5699 5704 "tl_discon_ind:allocb failure"));
5700 5705 return;
5701 5706 }
5702 5707 tep->te_state = TS_IDLE;
5703 5708 putnext(tep->te_rq, d_mp);
5704 5709 }
5705 5710
5706 5711 /*
5707 5712 * Note: The following routine does not recover from allocb()
5708 5713 * failures
5709 5714 * The reason should be from the <sys/errno.h> space.
5710 5715 */
5711 5716 static mblk_t *
5712 5717 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5713 5718 {
5714 5719 mblk_t *mp;
5715 5720 struct T_discon_ind *tdi;
5716 5721
5717 5722 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5718 5723 DB_TYPE(mp) = M_PROTO;
5719 5724 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5720 5725 tdi = (struct T_discon_ind *)mp->b_rptr;
5721 5726 tdi->PRIM_type = T_DISCON_IND;
5722 5727 tdi->DISCON_reason = reason;
5723 5728 tdi->SEQ_number = seqnum;
5724 5729 }
5725 5730 return (mp);
5726 5731 }
5727 5732
5728 5733
5729 5734 /*
5730 5735 * Note: The following routine does not recover from allocb()
5731 5736 * failures
5732 5737 */
5733 5738 static mblk_t *
5734 5739 tl_ordrel_ind_alloc(void)
5735 5740 {
5736 5741 mblk_t *mp;
5737 5742 struct T_ordrel_ind *toi;
5738 5743
5739 5744 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5740 5745 DB_TYPE(mp) = M_PROTO;
5741 5746 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5742 5747 toi = (struct T_ordrel_ind *)mp->b_rptr;
5743 5748 toi->PRIM_type = T_ORDREL_IND;
5744 5749 }
5745 5750 return (mp);
5746 5751 }
5747 5752
5748 5753
5749 5754 /*
5750 5755 * Lookup the seqno in the list of queued connections.
5751 5756 */
5752 5757 static tl_icon_t *
5753 5758 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5754 5759 {
5755 5760 list_t *l = &tep->te_iconp;
5756 5761 tl_icon_t *tip = list_head(l);
5757 5762
5758 5763 ASSERT(seqno != 0);
5759 5764
5760 5765 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5761 5766 ;
5762 5767
5763 5768 return (tip);
5764 5769 }
5765 5770
5766 5771 /*
5767 5772 * Queue data for a given T_CONN_IND while verifying that redundant
5768 5773 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5769 5774 * Used when the originator of the connection closes.
5770 5775 */
5771 5776 static void
5772 5777 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5773 5778 {
5774 5779 tl_icon_t *tip;
5775 5780 mblk_t **mpp, *mp;
5776 5781 int prim, nprim;
5777 5782
5778 5783 if (nmp->b_datap->db_type == M_PROTO)
5779 5784 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5780 5785 else
5781 5786 nprim = -1; /* M_DATA */
5782 5787
5783 5788 tip = tl_icon_find(tep, seqno);
5784 5789 if (tip == NULL) {
5785 5790 freemsg(nmp);
5786 5791 return;
5787 5792 }
5788 5793
5789 5794 ASSERT(tip->ti_seqno != 0);
5790 5795 mpp = &tip->ti_mp;
5791 5796 while (*mpp != NULL) {
5792 5797 mp = *mpp;
5793 5798
5794 5799 if (mp->b_datap->db_type == M_PROTO)
5795 5800 prim = ((union T_primitives *)mp->b_rptr)->type;
5796 5801 else
5797 5802 prim = -1; /* M_DATA */
5798 5803
5799 5804 /*
5800 5805 * Allow nothing after a T_DISCON_IND
5801 5806 */
5802 5807 if (prim == T_DISCON_IND) {
5803 5808 freemsg(nmp);
5804 5809 return;
5805 5810 }
5806 5811 /*
5807 5812 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5808 5813 */
5809 5814 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5810 5815 freemsg(nmp);
5811 5816 return;
5812 5817 }
5813 5818 mpp = &(mp->b_next);
5814 5819 }
5815 5820 *mpp = nmp;
5816 5821 }
5817 5822
5818 5823 /*
5819 5824 * Verify if a certain TPI primitive exists on the connind queue.
5820 5825 * Use prim -1 for M_DATA.
5821 5826 * Return non-zero if found.
5822 5827 */
5823 5828 static boolean_t
5824 5829 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5825 5830 {
5826 5831 tl_icon_t *tip = tl_icon_find(tep, seqno);
5827 5832 boolean_t found = B_FALSE;
5828 5833
5829 5834 if (tip != NULL) {
5830 5835 mblk_t *mp;
5831 5836 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5832 5837 found = (DB_TYPE(mp) == M_PROTO &&
5833 5838 ((union T_primitives *)mp->b_rptr)->type == prim);
5834 5839 }
5835 5840 }
5836 5841 return (found);
5837 5842 }
5838 5843
5839 5844 /*
5840 5845 * Send the b_next mblk chain that has accumulated before the connection
5841 5846 * was accepted. Perform the necessary state transitions.
5842 5847 */
5843 5848 static void
5844 5849 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5845 5850 {
5846 5851 mblk_t *mp;
5847 5852 union T_primitives *primp;
5848 5853
5849 5854 if (tep->te_closing) {
5850 5855 tl_icon_freemsgs(mpp);
5851 5856 return;
5852 5857 }
5853 5858
5854 5859 ASSERT(tep->te_state == TS_DATA_XFER);
5855 5860 ASSERT(tep->te_rq->q_first == NULL);
5856 5861
5857 5862 while ((mp = *mpp) != NULL) {
5858 5863 *mpp = mp->b_next;
5859 5864 mp->b_next = NULL;
5860 5865
5861 5866 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5862 5867 switch (DB_TYPE(mp)) {
5863 5868 default:
5864 5869 freemsg(mp);
5865 5870 break;
5866 5871 case M_DATA:
5867 5872 putnext(tep->te_rq, mp);
5868 5873 break;
5869 5874 case M_PROTO:
5870 5875 primp = (union T_primitives *)mp->b_rptr;
5871 5876 switch (primp->type) {
5872 5877 case T_UNITDATA_IND:
5873 5878 case T_DATA_IND:
5874 5879 case T_OPTDATA_IND:
5875 5880 case T_EXDATA_IND:
5876 5881 putnext(tep->te_rq, mp);
5877 5882 break;
5878 5883 case T_ORDREL_IND:
5879 5884 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5880 5885 tep->te_state);
5881 5886 putnext(tep->te_rq, mp);
5882 5887 break;
5883 5888 case T_DISCON_IND:
5884 5889 tep->te_state = TS_IDLE;
5885 5890 putnext(tep->te_rq, mp);
5886 5891 break;
5887 5892 default:
5888 5893 #ifdef DEBUG
5889 5894 cmn_err(CE_PANIC,
5890 5895 "tl_icon_sendmsgs: unknown primitive");
5891 5896 #endif /* DEBUG */
5892 5897 freemsg(mp);
5893 5898 break;
5894 5899 }
5895 5900 break;
5896 5901 }
5897 5902 }
5898 5903 }
5899 5904
5900 5905 /*
5901 5906 * Free the b_next mblk chain that has accumulated before the connection
5902 5907 * was accepted.
5903 5908 */
5904 5909 static void
5905 5910 tl_icon_freemsgs(mblk_t **mpp)
5906 5911 {
5907 5912 mblk_t *mp;
5908 5913
5909 5914 while ((mp = *mpp) != NULL) {
5910 5915 *mpp = mp->b_next;
5911 5916 mp->b_next = NULL;
5912 5917 freemsg(mp);
5913 5918 }
5914 5919 }
5915 5920
5916 5921 /*
5917 5922 * Send M_ERROR
5918 5923 * Note: assumes caller ensured enough space in mp or enough
5919 5924 * memory available. Does not attempt recovery from allocb()
5920 5925 * failures
5921 5926 */
5922 5927
5923 5928 static void
5924 5929 tl_merror(queue_t *wq, mblk_t *mp, int error)
5925 5930 {
5926 5931 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5927 5932
5928 5933 if (tep->te_closing) {
5929 5934 freemsg(mp);
5930 5935 return;
5931 5936 }
5932 5937
5933 5938 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5934 5939 SL_TRACE|SL_ERROR,
5935 5940 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5936 5941
5937 5942 /*
5938 5943 * flush all messages on queue. we are shutting
5939 5944 * the stream down on fatal error
5940 5945 */
5941 5946 flushq(wq, FLUSHALL);
5942 5947 if (IS_COTS(tep)) {
5943 5948 /* connection oriented - unconnect endpoints */
5944 5949 tl_co_unconnect(tep);
5945 5950 }
5946 5951 if (mp->b_cont) {
5947 5952 freemsg(mp->b_cont);
5948 5953 mp->b_cont = NULL;
5949 5954 }
5950 5955
5951 5956 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5952 5957 freemsg(mp);
5953 5958 mp = allocb(1, BPRI_HI);
5954 5959 if (!mp) {
5955 5960 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5956 5961 SL_TRACE|SL_ERROR,
5957 5962 "tl_merror:M_PROTO: out of memory"));
5958 5963 return;
5959 5964 }
5960 5965 }
5961 5966 if (mp) {
5962 5967 DB_TYPE(mp) = M_ERROR;
5963 5968 mp->b_rptr = DB_BASE(mp);
5964 5969 *mp->b_rptr = (char)error;
5965 5970 mp->b_wptr = mp->b_rptr + sizeof (char);
5966 5971 qreply(wq, mp);
5967 5972 } else {
5968 5973 (void) putnextctl1(tep->te_rq, M_ERROR, error);
5969 5974 }
5970 5975 }
5971 5976
5972 5977 static void
5973 5978 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
5974 5979 {
5975 5980 ASSERT(cr != NULL);
5976 5981
5977 5982 if (flag & TL_SETCRED) {
5978 5983 struct opthdr *opt = (struct opthdr *)buf;
5979 5984 tl_credopt_t *tlcred;
5980 5985
5981 5986 opt->level = TL_PROT_LEVEL;
5982 5987 opt->name = TL_OPT_PEER_CRED;
5983 5988 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
5984 5989
5985 5990 tlcred = (tl_credopt_t *)(opt + 1);
5986 5991 tlcred->tc_uid = crgetuid(cr);
5987 5992 tlcred->tc_gid = crgetgid(cr);
5988 5993 tlcred->tc_ruid = crgetruid(cr);
5989 5994 tlcred->tc_rgid = crgetrgid(cr);
5990 5995 tlcred->tc_suid = crgetsuid(cr);
5991 5996 tlcred->tc_sgid = crgetsgid(cr);
5992 5997 tlcred->tc_ngroups = crgetngroups(cr);
5993 5998 } else if (flag & TL_SETUCRED) {
5994 5999 struct opthdr *opt = (struct opthdr *)buf;
5995 6000
5996 6001 opt->level = TL_PROT_LEVEL;
5997 6002 opt->name = TL_OPT_PEER_UCRED;
5998 6003 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
5999 6004
6000 6005 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6001 6006 } else {
6002 6007 struct T_opthdr *topt = (struct T_opthdr *)buf;
6003 6008 ASSERT(flag & TL_SOCKUCRED);
6004 6009
6005 6010 topt->level = SOL_SOCKET;
6006 6011 topt->name = SCM_UCRED;
6007 6012 topt->len = ucredminsize(cr) + sizeof (*topt);
6008 6013 topt->status = 0;
6009 6014 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6010 6015 }
6011 6016 }
6012 6017
6013 6018 /* ARGSUSED */
6014 6019 static int
6015 6020 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6016 6021 {
6017 6022 /* no default value processed in protocol specific code currently */
6018 6023 return (-1);
6019 6024 }
6020 6025
6021 6026 /* ARGSUSED */
6022 6027 static int
6023 6028 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6024 6029 {
6025 6030 int len;
6026 6031 tl_endpt_t *tep;
6027 6032 int *valp;
6028 6033
6029 6034 tep = (tl_endpt_t *)wq->q_ptr;
6030 6035
6031 6036 len = 0;
6032 6037
6033 6038 /*
6034 6039 * Assumes: option level and name sanity check done elsewhere
6035 6040 */
6036 6041
6037 6042 switch (level) {
6038 6043 case SOL_SOCKET:
6039 6044 if (! IS_SOCKET(tep))
6040 6045 break;
6041 6046 switch (name) {
6042 6047 case SO_RECVUCRED:
6043 6048 len = sizeof (int);
6044 6049 valp = (int *)ptr;
6045 6050 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6046 6051 break;
6047 6052 default:
6048 6053 break;
6049 6054 }
6050 6055 break;
6051 6056 case TL_PROT_LEVEL:
6052 6057 switch (name) {
6053 6058 case TL_OPT_PEER_CRED:
6054 6059 case TL_OPT_PEER_UCRED:
6055 6060 /*
6056 6061 * option not supposed to retrieved directly
6057 6062 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6058 6063 * when some internal flags set by other options
6059 6064 * Direct retrieval always designed to fail(ignored)
6060 6065 * for this option.
6061 6066 */
6062 6067 break;
6063 6068 }
6064 6069 }
6065 6070 return (len);
6066 6071 }
6067 6072
6068 6073 /* ARGSUSED */
6069 6074 static int
6070 6075 tl_set_opt(
6071 6076 queue_t *wq,
6072 6077 uint_t mgmt_flags,
6073 6078 int level,
6074 6079 int name,
6075 6080 uint_t inlen,
6076 6081 uchar_t *invalp,
6077 6082 uint_t *outlenp,
6078 6083 uchar_t *outvalp,
6079 6084 void *thisdg_attrs,
6080 6085 cred_t *cr)
6081 6086 {
6082 6087 int error;
6083 6088 tl_endpt_t *tep;
6084 6089
6085 6090 tep = (tl_endpt_t *)wq->q_ptr;
6086 6091
6087 6092 error = 0; /* NOERROR */
6088 6093
6089 6094 /*
6090 6095 * Assumes: option level and name sanity checks done elsewhere
6091 6096 */
6092 6097
6093 6098 switch (level) {
6094 6099 case SOL_SOCKET:
6095 6100 if (! IS_SOCKET(tep)) {
6096 6101 error = EINVAL;
6097 6102 break;
6098 6103 }
6099 6104 /*
6100 6105 * TBD: fill in other AF_UNIX socket options and then stop
6101 6106 * returning error.
6102 6107 */
6103 6108 switch (name) {
6104 6109 case SO_RECVUCRED:
6105 6110 /*
6106 6111 * We only support this for datagram sockets;
6107 6112 * getpeerucred handles the connection oriented
6108 6113 * transports.
6109 6114 */
6110 6115 if (! IS_CLTS(tep)) {
6111 6116 error = EINVAL;
6112 6117 break;
6113 6118 }
6114 6119 if (*(int *)invalp == 0)
6115 6120 tep->te_flag &= ~TL_SOCKUCRED;
6116 6121 else
6117 6122 tep->te_flag |= TL_SOCKUCRED;
6118 6123 break;
6119 6124 default:
6120 6125 error = EINVAL;
6121 6126 break;
6122 6127 }
6123 6128 break;
6124 6129 case TL_PROT_LEVEL:
6125 6130 switch (name) {
6126 6131 case TL_OPT_PEER_CRED:
6127 6132 case TL_OPT_PEER_UCRED:
6128 6133 /*
6129 6134 * option not supposed to be set directly
6130 6135 * Its value in initialized for each endpoint at
6131 6136 * driver open time.
6132 6137 * Direct setting always designed to fail for this
6133 6138 * option.
6134 6139 */
6135 6140 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6136 6141 SL_TRACE|SL_ERROR,
6137 6142 "tl_set_opt: option is not supported"));
6138 6143 error = EPROTO;
6139 6144 break;
6140 6145 }
6141 6146 }
6142 6147 return (error);
6143 6148 }
6144 6149
6145 6150
6146 6151 static void
6147 6152 tl_timer(void *arg)
6148 6153 {
6149 6154 queue_t *wq = arg;
6150 6155 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6151 6156
6152 6157 ASSERT(tep);
6153 6158
6154 6159 tep->te_timoutid = 0;
6155 6160
6156 6161 enableok(wq);
6157 6162 /*
6158 6163 * Note: can call wsrv directly here and save context switch
6159 6164 * Consider change when qtimeout (not timeout) is active
6160 6165 */
6161 6166 qenable(wq);
6162 6167 }
6163 6168
6164 6169 static void
6165 6170 tl_buffer(void *arg)
6166 6171 {
6167 6172 queue_t *wq = arg;
6168 6173 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6169 6174
6170 6175 ASSERT(tep);
6171 6176
6172 6177 tep->te_bufcid = 0;
6173 6178 tep->te_nowsrv = B_FALSE;
6174 6179
6175 6180 enableok(wq);
6176 6181 /*
6177 6182 * Note: can call wsrv directly here and save context switch
6178 6183 * Consider change when qbufcall (not bufcall) is active
6179 6184 */
6180 6185 qenable(wq);
6181 6186 }
6182 6187
6183 6188 static void
6184 6189 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6185 6190 {
6186 6191 tl_endpt_t *tep;
6187 6192
6188 6193 tep = (tl_endpt_t *)wq->q_ptr;
6189 6194
6190 6195 if (tep->te_closing) {
6191 6196 freemsg(mp);
6192 6197 return;
6193 6198 }
6194 6199 noenable(wq);
6195 6200
6196 6201 (void) insq(wq, wq->q_first, mp);
6197 6202
6198 6203 if (tep->te_bufcid || tep->te_timoutid) {
6199 6204 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6200 6205 "tl_memrecover:recover %p pending", (void *)wq));
6201 6206 return;
6202 6207 }
6203 6208
6204 6209 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6205 6210 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6206 6211 drv_usectohz(TL_BUFWAIT));
6207 6212 }
6208 6213 }
6209 6214
6210 6215 static void
6211 6216 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6212 6217 {
6213 6218 ASSERT(tip->ti_seqno != 0);
6214 6219
6215 6220 if (tip->ti_mp != NULL) {
6216 6221 tl_icon_freemsgs(&tip->ti_mp);
6217 6222 tip->ti_mp = NULL;
6218 6223 }
6219 6224 if (tip->ti_tep != NULL) {
6220 6225 tl_refrele(tip->ti_tep);
6221 6226 tip->ti_tep = NULL;
6222 6227 }
6223 6228 list_remove(&tep->te_iconp, tip);
6224 6229 kmem_free(tip, sizeof (tl_icon_t));
6225 6230 tep->te_nicon--;
6226 6231 }
6227 6232
6228 6233 /*
6229 6234 * Remove address from address hash.
6230 6235 */
6231 6236 static void
6232 6237 tl_addr_unbind(tl_endpt_t *tep)
6233 6238 {
6234 6239 tl_endpt_t *elp;
6235 6240
6236 6241 if (tep->te_flag & TL_ADDRHASHED) {
6237 6242 if (IS_SOCKET(tep)) {
6238 6243 (void) mod_hash_remove(tep->te_addrhash,
6239 6244 (mod_hash_key_t)tep->te_vp,
6240 6245 (mod_hash_val_t *)&elp);
6241 6246 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6242 6247 tep->te_magic = SOU_MAGIC_IMPLICIT;
6243 6248 } else {
6244 6249 (void) mod_hash_remove(tep->te_addrhash,
6245 6250 (mod_hash_key_t)&tep->te_ap,
6246 6251 (mod_hash_val_t *)&elp);
6247 6252 (void) kmem_free(tep->te_abuf, tep->te_alen);
6248 6253 tep->te_alen = -1;
6249 6254 tep->te_abuf = NULL;
6250 6255 }
6251 6256 tep->te_flag &= ~TL_ADDRHASHED;
6252 6257 }
6253 6258 }
↓ open down ↓ |
1973 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX