Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/rds/rdsddi.c
+++ new/usr/src/uts/common/io/ib/clients/rds/rdsddi.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/conf.h>
28 28 #include <sys/modctl.h>
29 29 #include <sys/stat.h>
30 30 #include <sys/stream.h>
31 31 #include <sys/strsun.h>
32 32 #include <sys/ddi.h>
33 33 #include <sys/sunddi.h>
34 34 #include <sys/priv_names.h>
35 35 #include <inet/common.h>
36 36
37 37 #define _SUN_TPI_VERSION 2
38 38 #include <sys/tihdr.h>
39 39 #include <sys/timod.h>
40 40 #include <sys/tiuser.h>
41 41 #include <sys/suntpi.h>
42 42 #include <inet/common.h>
43 43 #include <inet/ip.h>
44 44 #include <inet/mi.h>
45 45 #include <inet/proto_set.h>
46 46 #include <sys/ib/clients/rds/rds.h>
47 47 #include <sys/policy.h>
48 48 #include <inet/ipclassifier.h>
49 49 #include <sys/ib/clients/rds/rds_kstat.h>
50 50 #include "sys/random.h"
51 51 #include <sys/ib/clients/rds/rds_transport.h>
52 52 #include <sys/ib/ibtl/ibti.h>
53 53
54 54
55 55 #define RDS_NAME "rds"
56 56 #define RDS_STRTAB rdsinfo
57 57 #define RDS_DEVDESC "RDS STREAMS driver"
58 58 #define RDS_DEVMINOR 0
59 59 #define RDS_DEVMTFLAGS D_MP | D_SYNCSTR
60 60 #define RDS_DEFAULT_PRIV_MODE 0666
61 61
62 62 #define rds_smallest_port 1
63 63 #define rds_largest_port 65535
64 64
65 65 #define RDS_RECV_HIWATER (56 * 1024)
66 66 #define RDS_RECV_LOWATER 128
67 67 #define RDS_XMIT_HIWATER (56 * 1024)
68 68 #define RDS_XMIT_LOWATER 1024
69 69
70 70 #define RDS_DPRINTF2 0 &&
71 71 #define LABEL "RDS"
72 72
73 73 typedef struct rdsahdr_s {
74 74 in_port_t uha_src_port; /* Source port */
75 75 in_port_t uha_dst_port; /* Destination port */
76 76 } rdsha_t;
77 77
78 78 #define RDSH_SIZE 4
79 79
80 80 int rds_recv_hiwat = RDS_RECV_HIWATER;
81 81 int rds_recv_lowat = RDS_RECV_LOWATER;
82 82 int rds_xmit_hiwat = RDS_XMIT_HIWATER;
83 83 int rds_xmit_lowat = RDS_XMIT_LOWATER;
84 84
85 85 int rdsdebug;
86 86
87 87 static dev_info_t *rds_dev_info;
88 88
89 89 /* Hint not protected by any lock */
90 90 static in_port_t rds_next_port_to_try;
91 91
92 92 ldi_ident_t rds_li;
93 93 static int loopmax = rds_largest_port - rds_smallest_port + 1;
94 94
95 95 /* global configuration variables */
96 96 uint_t UserBufferSize;
97 97 uint_t rds_rx_pkts_pending_hwm;
98 98
99 99 extern void rds_ioctl(queue_t *, mblk_t *);
100 100 extern void rds_ioctl_copyin_done(queue_t *q, mblk_t *mp);
101 101
102 102 int rds_open_transport_driver();
103 103 int rds_close_transport_driver();
104 104
105 105 #define RDS_CURRENT_PORT_QUOTA() \
106 106 (rds_rx_pkts_pending_hwm/RDS_GET_NPORT())
107 107
108 108 krwlock_t rds_transport_lock;
109 109 ldi_handle_t rds_transport_handle = NULL;
110 110 rds_transport_ops_t *rds_transport_ops = NULL;
111 111
112 112 static int
113 113 rds_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
114 114 {
115 115 int ret;
116 116
117 117 if (cmd != DDI_ATTACH)
118 118 return (DDI_FAILURE);
119 119
120 120 rds_dev_info = devi;
121 121
122 122 ret = ddi_create_minor_node(devi, RDS_NAME, S_IFCHR,
123 123 RDS_DEVMINOR, DDI_PSEUDO, 0);
124 124 if (ret != DDI_SUCCESS) {
125 125 return (ret);
126 126 }
127 127
128 128 return (DDI_SUCCESS);
129 129 }
130 130
131 131 static int
132 132 rds_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
133 133 {
134 134 if (cmd != DDI_DETACH)
135 135 return (DDI_FAILURE);
136 136
137 137 ASSERT(devi == rds_dev_info);
138 138
139 139 ddi_remove_minor_node(devi, NULL);
140 140
141 141 return (DDI_SUCCESS);
142 142 }
143 143
144 144 /* ARGSUSED */
145 145 static int
146 146 rds_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
147 147 {
148 148 int error = DDI_FAILURE;
149 149
150 150 switch (cmd) {
151 151 case DDI_INFO_DEVT2DEVINFO:
152 152 if (rds_dev_info != NULL) {
153 153 *result = (void *)rds_dev_info;
154 154 error = DDI_SUCCESS;
155 155 }
156 156 break;
157 157
158 158 case DDI_INFO_DEVT2INSTANCE:
159 159 *result = NULL;
160 160 error = DDI_SUCCESS;
161 161 break;
162 162
163 163 default:
164 164 break;
165 165 }
166 166
167 167 return (error);
168 168 }
169 169
170 170
171 171 /*ARGSUSED*/
172 172 static int
173 173 rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
174 174 {
175 175 rds_t *rds;
176 176 int ret;
177 177
178 178 if (is_system_labeled()) {
179 179 /*
180 180 * RDS socket is not supported on labeled systems
181 181 */
182 182 return (ESOCKTNOSUPPORT);
183 183 }
184 184
185 185 /* Open the transport driver if IB HW is present */
186 186 rw_enter(&rds_transport_lock, RW_READER);
187 187 if (rds_transport_handle == NULL) {
188 188 rw_exit(&rds_transport_lock);
189 189 ret = rds_open_transport_driver();
190 190 rw_enter(&rds_transport_lock, RW_READER);
191 191
192 192 if (ret != 0) {
193 193 /* Transport driver failed to load */
194 194 rw_exit(&rds_transport_lock);
195 195 return (ret);
196 196 }
197 197 }
198 198 rw_exit(&rds_transport_lock);
199 199
200 200 if (sflag == MODOPEN) {
201 201 return (EINVAL);
202 202 }
203 203
204 204 /* Reopen not supported */
205 205 if (q->q_ptr != NULL) {
206 206 dprint(2, ("%s: Reopen is not supported: %p", LABEL, q->q_ptr));
207 207 return (0);
208 208 }
209 209
210 210 rds = rds_create(q, credp);
211 211 if (rds == NULL) {
212 212 dprint(2, ("%s: rds_create failed", LABEL));
213 213 return (0);
214 214 }
215 215
216 216 q->q_ptr = WR(q)->q_ptr = rds;
217 217 rds->rds_state = TS_UNBND;
218 218 rds->rds_family = AF_INET_OFFLOAD;
219 219
220 220 q->q_hiwat = rds_recv_hiwat;
221 221 q->q_lowat = rds_recv_lowat;
222 222
223 223 qprocson(q);
224 224
225 225 WR(q)->q_hiwat = rds_xmit_hiwat;
226 226 WR(q)->q_lowat = rds_xmit_lowat;
227 227
228 228 /* Set the Stream head watermarks */
229 229 (void) proto_set_rx_hiwat(q, NULL, rds_recv_hiwat);
230 230 (void) proto_set_rx_lowat(q, NULL, rds_recv_lowat);
231 231
232 232 return (0);
233 233 }
234 234
235 235 static int
236 236 rds_close(queue_t *q)
237 237 {
238 238 rds_t *rdsp = (rds_t *)q->q_ptr;
239 239
240 240 qprocsoff(q);
241 241
242 242 /*
243 243 * NPORT should be decremented only if this socket was previously
244 244 * bound to an RDS port.
245 245 */
246 246 if (rdsp->rds_state >= TS_IDLE) {
247 247 RDS_DECR_NPORT();
248 248 RDS_SET_PORT_QUOTA(RDS_CURRENT_PORT_QUOTA());
249 249 rds_transport_ops->
250 250 rds_transport_resume_port(ntohs(rdsp->rds_port));
251 251 }
252 252
253 253 /* close the transport driver if this is the last socket */
254 254 if (RDS_GET_NPORT() == 1) {
255 255 (void) rds_close_transport_driver();
256 256 }
257 257
258 258 /*
259 259 * We set the flags without holding a lock as this is
260 260 * just a hint for the fanout lookup to skip this rds.
261 261 * We dont free the struct until it's out of the hash and
262 262 * the ref count goes down.
263 263 */
264 264 rdsp->rds_flags |= RDS_CLOSING;
265 265 rds_bind_hash_remove(rdsp, B_FALSE);
266 266 mutex_enter(&rdsp->rds_lock);
267 267 ASSERT(rdsp->rds_refcnt > 0);
268 268 if (rdsp->rds_refcnt != 1) {
269 269 cv_wait(&rdsp->rds_refcv, &rdsp->rds_lock);
270 270 }
271 271 mutex_exit(&rdsp->rds_lock);
272 272 RDS_DEC_REF_CNT(rdsp);
273 273 RD(q)->q_ptr = NULL;
274 274 WR(q)->q_ptr = NULL;
275 275 return (0);
276 276 }
277 277
278 278 /*
279 279 * Add a new message to the socket
280 280 */
281 281 int
282 282 rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
283 283 in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
284 284 {
285 285 rds_t *rds;
286 286 struct T_unitdata_ind *tudi;
287 287 int udi_size; /* Size of T_unitdata_ind */
288 288 mblk_t *mp1;
289 289 sin_t *sin;
290 290 int error = 0;
291 291
292 292 local_port = htons(local_port);
293 293 rem_port = htons(rem_port);
294 294
295 295 ASSERT(mp->b_datap->db_type == M_DATA);
296 296 rds = rds_fanout(local_addr, rem_addr, local_port, rem_port, zoneid);
297 297 if (rds == NULL) {
298 298 dprint(2, ("%s: rds_fanout failed: (0x%x 0x%x %d %d)", LABEL,
299 299 local_addr, rem_addr, ntohs(local_port), ntohs(rem_port)));
300 300 freemsg(mp);
301 301 return (error);
302 302 }
303 303
304 304 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
305 305
306 306 /* Allocate a message block for the T_UNITDATA_IND structure. */
307 307 mp1 = allocb(udi_size, BPRI_MED);
308 308 if (mp1 == NULL) {
309 309 dprint(2, ("%s: allocb failed", LABEL));
310 310 freemsg(mp);
311 311 return (ENOMEM);
312 312 }
313 313
314 314 mp1->b_cont = mp;
315 315 mp = mp1;
316 316 mp->b_datap->db_type = M_PROTO;
317 317 tudi = (struct T_unitdata_ind *)(uintptr_t)mp->b_rptr;
318 318 mp->b_wptr = (uchar_t *)tudi + udi_size;
319 319 tudi->PRIM_type = T_UNITDATA_IND;
320 320 tudi->SRC_length = sizeof (sin_t);
321 321 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
322 322 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
323 323 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
324 324 tudi->OPT_length = udi_size;
325 325 sin = (sin_t *)&tudi[1];
326 326 sin->sin_addr.s_addr = rem_addr;
327 327 sin->sin_port = ntohs(rem_port);
328 328 sin->sin_family = rds->rds_family;
329 329 *(uint32_t *)(uintptr_t)&sin->sin_zero[0] = 0;
330 330 *(uint32_t *)(uintptr_t)&sin->sin_zero[4] = 0;
331 331
332 332 putnext(rds->rds_ulpd, mp);
333 333
334 334 /* check port quota */
335 335 if (RDS_GET_RXPKTS_PEND() > rds_rx_pkts_pending_hwm) {
336 336 ulong_t current_port_quota = RDS_GET_PORT_QUOTA();
337 337 if (rds->rds_port_quota > current_port_quota) {
338 338 /* this may result in stalling the port */
339 339 rds->rds_port_quota = current_port_quota;
340 340 (void) proto_set_rx_hiwat(rds->rds_ulpd, NULL,
341 341 rds->rds_port_quota * UserBufferSize);
342 342 RDS_INCR_PORT_QUOTA_ADJUSTED();
343 343 }
344 344 }
345 345
346 346 /*
347 347 * canputnext() check is done after putnext as the protocol does
348 348 * not allow dropping any received packet.
349 349 */
350 350 if (!canputnext(rds->rds_ulpd)) {
351 351 error = ENOSPC;
352 352 }
353 353
354 354 RDS_DEC_REF_CNT(rds);
355 355 return (error);
356 356 }
357 357
358 358
359 359 /* Default structure copied into T_INFO_ACK messages */
360 360 static struct T_info_ack rds_g_t_info_ack_ipv4 = {
361 361 T_INFO_ACK,
362 362 65535, /* TSDU_size. Excl. headers */
363 363 T_INVALID, /* ETSU_size. rds does not support expedited data. */
364 364 T_INVALID, /* CDATA_size. rds does not support connect data. */
365 365 T_INVALID, /* DDATA_size. rds does not support disconnect data. */
366 366 sizeof (sin_t), /* ADDR_size. */
367 367 0, /* OPT_size - not initialized here */
368 368 65535, /* TIDU_size. Excl. headers */
369 369 T_CLTS, /* SERV_type. rds supports connection-less. */
370 370 TS_UNBND, /* CURRENT_state. This is set from rds_state. */
371 371 (XPG4_1|SENDZERO) /* PROVIDER_flag */
372 372 };
373 373
374 374 static in_port_t
375 375 rds_update_next_port(in_port_t port)
376 376 {
377 377 (void) random_get_pseudo_bytes((uint8_t *)&port, sizeof (in_port_t));
378 378 if (port < rds_smallest_port)
379 379 port = rds_smallest_port;
380 380 return (port);
381 381 }
382 382
383 383 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
384 384 static void
385 385 rds_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
386 386 {
387 387 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
388 388 qreply(q, mp);
389 389 }
390 390
391 391 static void
392 392 rds_capability_req(queue_t *q, mblk_t *mp)
393 393 {
394 394 t_uscalar_t cap_bits1;
395 395 struct T_capability_ack *tcap;
396 396
397 397 cap_bits1 =
398 398 ((struct T_capability_req *)(uintptr_t)mp->b_rptr)->CAP_bits1;
399 399
400 400 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
401 401 mp->b_datap->db_type, T_CAPABILITY_ACK);
402 402 if (mp == NULL)
403 403 return;
404 404 tcap = (struct T_capability_ack *)(uintptr_t)mp->b_rptr;
405 405 tcap->CAP_bits1 = 0;
406 406
407 407 if (cap_bits1 & TC1_INFO) {
408 408 tcap->CAP_bits1 |= TC1_INFO;
409 409 *(&tcap->INFO_ack) = rds_g_t_info_ack_ipv4;
410 410 }
411 411
412 412 qreply(q, mp);
413 413 }
414 414
415 415 static void
416 416 rds_info_req(queue_t *q, mblk_t *omp)
417 417 {
418 418 rds_t *rds = (rds_t *)q->q_ptr;
419 419 struct T_info_ack *tap;
420 420 mblk_t *mp;
421 421
422 422 /* Create a T_INFO_ACK message. */
423 423 mp = tpi_ack_alloc(omp, sizeof (struct T_info_ack), M_PCPROTO,
424 424 T_INFO_ACK);
425 425 if (mp == NULL)
426 426 return;
427 427 tap = (struct T_info_ack *)(uintptr_t)mp->b_rptr;
428 428 *tap = rds_g_t_info_ack_ipv4;
429 429 tap->CURRENT_state = rds->rds_state;
430 430 tap->OPT_size = 128;
431 431 qreply(q, mp);
432 432 }
433 433
434 434 /*
435 435 * NO locking protection here as sockfs will only send down
436 436 * one bind operation at a time.
437 437 */
438 438 static void
439 439 rds_bind(queue_t *q, mblk_t *mp)
440 440 {
441 441 sin_t *sin;
442 442 rds_t *rds;
443 443 struct T_bind_req *tbr;
444 444 in_port_t port; /* Host byte order */
445 445 in_port_t requested_port; /* Host byte order */
446 446 struct T_bind_ack *tba;
447 447 int count;
448 448 rds_bf_t *rdsbf;
449 449 in_port_t lport; /* Network byte order */
450 450
451 451 rds = (rds_t *)q->q_ptr;
452 452 if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < sizeof (*tbr)) {
453 453 rds_err_ack(q, mp, TPROTO, 0);
454 454 return;
455 455 }
456 456
457 457 /*
458 458 * We don't allow multiple binds
459 459 */
460 460 if (rds->rds_state != TS_UNBND) {
461 461 rds_err_ack(q, mp, TOUTSTATE, 0);
462 462 return;
463 463 }
464 464
465 465 tbr = (struct T_bind_req *)(uintptr_t)mp->b_rptr;
466 466 switch (tbr->ADDR_length) {
467 467 case sizeof (sin_t): /* Complete IPv4 address */
468 468 sin = (sin_t *)(uintptr_t)mi_offset_param(mp, tbr->ADDR_offset,
469 469 sizeof (sin_t));
470 470 if (sin == NULL || !OK_32PTR((char *)sin)) {
471 471 rds_err_ack(q, mp, TSYSERR, EINVAL);
472 472 return;
473 473 }
474 474 if (rds->rds_family != AF_INET_OFFLOAD ||
475 475 sin->sin_family != AF_INET_OFFLOAD) {
476 476 rds_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
477 477 return;
478 478 }
479 479 if (sin->sin_addr.s_addr == INADDR_ANY) {
480 480 rds_err_ack(q, mp, TBADADDR, 0);
481 481 return;
482 482 }
483 483
484 484 /*
485 485 * verify that the address is hosted on IB
486 486 * only exception is the loopback address.
487 487 */
488 488 if ((sin->sin_addr.s_addr != INADDR_LOOPBACK) &&
489 489 !rds_verify_bind_address(sin->sin_addr.s_addr)) {
490 490 rds_err_ack(q, mp, TBADADDR, 0);
491 491 return;
492 492 }
493 493
494 494 port = ntohs(sin->sin_port);
495 495 break;
496 496 default: /* Invalid request */
497 497 rds_err_ack(q, mp, TBADADDR, 0);
498 498 return;
499 499 }
500 500
501 501 requested_port = port;
502 502
503 503 /*
504 504 * TPI only sends down T_BIND_REQ for AF_INET and AF_INET6
505 505 * since RDS socket is of type AF_INET_OFFLOAD a O_T_BIND_REQ
506 506 * will be sent down. Treat O_T_BIND_REQ as T_BIND_REQ
507 507 */
508 508
509 509 if (requested_port == 0) {
510 510 /*
511 511 * If the application passed in zero for the port number, it
512 512 * doesn't care which port number we bind to. Get one in the
513 513 * valid range.
514 514 */
515 515 port = rds_update_next_port(rds_next_port_to_try);
516 516 }
517 517
518 518 ASSERT(port != 0);
519 519 count = 0;
520 520 for (;;) {
521 521 rds_t *rds1;
522 522 ASSERT(sin->sin_addr.s_addr != INADDR_ANY);
523 523 /*
524 524 * Walk through the list of rds streams bound to
525 525 * requested port with the same IP address.
526 526 */
527 527 lport = htons(port);
528 528 rdsbf = &rds_bind_fanout[RDS_BIND_HASH(lport)];
529 529 mutex_enter(&rdsbf->rds_bf_lock);
530 530 for (rds1 = rdsbf->rds_bf_rds; rds1 != NULL;
531 531 rds1 = rds1->rds_bind_hash) {
532 532 if (lport != rds1->rds_port ||
533 533 rds1->rds_src != sin->sin_addr.s_addr ||
534 534 rds1->rds_zoneid != rds->rds_zoneid)
535 535
536 536 continue;
537 537 break;
538 538 }
539 539
540 540 if (rds1 == NULL) {
541 541 /*
542 542 * No other stream has this IP address
543 543 * and port number. We can use it.
544 544 */
545 545 break;
546 546 }
547 547 mutex_exit(&rdsbf->rds_bf_lock);
548 548 if (requested_port != 0) {
549 549 /*
550 550 * We get here only when requested port
551 551 * is bound (and only first of the for()
552 552 * loop iteration).
553 553 *
554 554 * The semantics of this bind request
555 555 * require it to fail so we return from
556 556 * the routine (and exit the loop).
557 557 *
558 558 */
559 559 rds_err_ack(q, mp, TADDRBUSY, 0);
560 560 return;
561 561 }
562 562
563 563 port = rds_update_next_port(port + 1);
564 564
565 565 if (++count >= loopmax) {
566 566 /*
567 567 * We've tried every possible port number and
568 568 * there are none available, so send an error
569 569 * to the user.
570 570 */
571 571 rds_err_ack(q, mp, TNOADDR, 0);
572 572 return;
573 573 }
574 574 }
575 575
576 576 /*
577 577 * Copy the source address into our rds structure.
578 578 */
579 579 rds->rds_src = sin->sin_addr.s_addr;
580 580 rds->rds_port = lport;
581 581
582 582 /*
583 583 * reset the next port if we choose the port
584 584 */
585 585 if (requested_port == 0) {
586 586 rds_next_port_to_try = port + 1;
587 587 }
588 588
589 589 rds->rds_state = TS_IDLE;
590 590 rds_bind_hash_insert(rdsbf, rds);
591 591 mutex_exit(&rdsbf->rds_bf_lock);
592 592
593 593 /* Reset the message type in preparation for shipping it back. */
594 594 mp->b_datap->db_type = M_PCPROTO;
595 595 tba = (struct T_bind_ack *)(uintptr_t)mp->b_rptr;
596 596 tba->PRIM_type = T_BIND_ACK;
597 597
598 598 /* Increment the number of ports and set the port quota */
599 599 RDS_INCR_NPORT();
600 600 rds->rds_port_quota = RDS_CURRENT_PORT_QUOTA();
601 601 RDS_SET_PORT_QUOTA(rds->rds_port_quota);
602 602 (void) proto_set_rx_hiwat(RD(q), NULL,
603 603 rds->rds_port_quota * UserBufferSize);
604 604
605 605 qreply(q, mp);
606 606 }
607 607
608 608 static void
609 609 rds_wput_other(queue_t *q, mblk_t *mp)
610 610 {
611 611 uchar_t *rptr = mp->b_rptr;
612 612 struct datab *db;
613 613 cred_t *cr;
614 614
615 615 db = mp->b_datap;
616 616 switch (db->db_type) {
617 617 case M_DATA:
618 618 /* Not connected */
619 619 freemsg(mp);
620 620 return;
621 621 case M_PROTO:
622 622 case M_PCPROTO:
623 623 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr <
624 624 sizeof (t_scalar_t)) {
625 625 freemsg(mp);
626 626 return;
627 627 }
628 628 switch (((union T_primitives *)(uintptr_t)rptr)->type) {
629 629 case T_CAPABILITY_REQ:
630 630 rds_capability_req(q, mp);
631 631 return;
632 632
633 633 case T_INFO_REQ:
634 634 rds_info_req(q, mp);
635 635 return;
636 636 case O_T_BIND_REQ:
637 637 case T_BIND_REQ:
638 638 rds_bind(q, mp);
639 639 return;
640 640 case T_SVR4_OPTMGMT_REQ:
641 641 case T_OPTMGMT_REQ:
642 642 /*
643 643 * All Solaris components should pass a db_credp
644 644 * for this TPI message, hence we ASSERT.
645 645 * But in case there is some other M_PROTO that looks
646 646 * like a TPI message sent by some other kernel
647 647 * component, we check and return an error.
648 648 */
649 649 cr = msg_getcred(mp, NULL);
650 650 ASSERT(cr != NULL);
651 651 if (cr == NULL) {
652 652 rds_err_ack(q, mp, TSYSERR, EINVAL);
653 653 return;
654 654 }
655 655 if (((union T_primitives *)(uintptr_t)rptr)->type ==
656 656 T_SVR4_OPTMGMT_REQ) {
657 657 svr4_optcom_req(q, mp, cr, &rds_opt_obj);
658 658 } else {
659 659 tpi_optcom_req(q, mp, cr, &rds_opt_obj);
660 660 }
661 661 return;
662 662 case T_CONN_REQ:
663 663 /*
664 664 * We should not receive T_CONN_REQ as sockfs only
665 665 * sends down T_CONN_REQ if family == AF_INET/AF_INET6
666 666 * and type == SOCK_DGRAM/SOCK_RAW. For all others
667 667 * it simply calls soisconnected. see sotpi_connect()
668 668 * for details.
669 669 */
670 670 /* FALLTHRU */
671 671 default:
672 672 cmn_err(CE_PANIC, "type %d \n",
673 673 ((union T_primitives *)(uintptr_t)rptr)->type);
674 674 }
675 675 break;
676 676 case M_FLUSH:
677 677 if (*rptr & FLUSHW)
678 678 flushq(q, FLUSHDATA);
679 679 break;
680 680 case M_IOCTL:
681 681 rds_ioctl(q, mp);
682 682 break;
683 683 case M_IOCDATA:
684 684 /* IOCTL continuation following copyin or copyout. */
685 685 if (mi_copy_state(q, mp, NULL) == -1) {
686 686 /*
687 687 * The copy operation failed. mi_copy_state already
688 688 * cleaned up, so we're out of here.
689 689 */
690 690 return;
691 691 }
692 692 /*
693 693 * If we just completed a copy in, continue processing
694 694 * in rds_ioctl_copyin_done. If it was a copy out, we call
695 695 * mi_copyout again. If there is nothing more to copy out,
696 696 * it will complete the IOCTL.
697 697 */
698 698
699 699 if (MI_COPY_DIRECTION(mp) == MI_COPY_IN)
700 700 rds_ioctl_copyin_done(q, mp);
701 701 else
702 702 mi_copyout(q, mp);
703 703 return;
704 704
705 705 default:
706 706 cmn_err(CE_PANIC, "types %d \n", db->db_type);
707 707 }
708 708 }
709 709
710 710 static int
711 711 rds_wput(queue_t *q, mblk_t *mp)
712 712 {
713 713 struct datab *db;
714 714 uchar_t *rptr = mp->b_rptr;
715 715
716 716 db = mp->b_datap;
717 717 switch (db->db_type) {
718 718 case M_PROTO:
719 719 case M_PCPROTO:
720 720 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <=
721 721 (uintptr_t)INT_MAX);
722 722 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >=
723 723 sizeof (struct T_unitdata_req)) {
724 724 if (((union T_primitives *)(uintptr_t)rptr)->type
725 725 == T_UNITDATA_REQ) {
726 726 /*
727 727 * We should never come here for T_UNITDATA_REQ
728 728 */
729 729 cmn_err(CE_PANIC, "rds_wput T_UNITDATA_REQ \n");
730 730 }
731 731 }
732 732 /* FALLTHRU */
733 733 default:
734 734 rds_wput_other(q, mp);
735 735 return (0);
736 736 }
737 737 }
738 738
739 739 static int
740 740 rds_wput_data(queue_t *q, mblk_t *mp, uio_t *uiop)
741 741 {
742 742 uchar_t *rptr = mp->b_rptr;
743 743 rds_t *rds;
744 744 mblk_t *mp1;
745 745 sin_t *sin;
746 746 ipaddr_t dst;
747 747 uint16_t port;
748 748 int ret = 0;
749 749
750 750 #define tudr ((struct T_unitdata_req *)(uintptr_t)rptr)
751 751
752 752 rds = (rds_t *)q->q_ptr;
753 753 /* Handle UNITDATA_REQ messages here */
754 754 if (rds->rds_state == TS_UNBND) {
755 755 /* If a port has not been bound to the stream, fail. */
756 756 dprint(2, ("%s: socket is not bound to a port", LABEL));
757 757 freemsg(mp);
758 758 return (EPROTO);
759 759 }
760 760
761 761 mp1 = mp->b_cont;
762 762 mp->b_cont = NULL;
763 763 if (mp1 == NULL) {
764 764 dprint(2, ("%s: No message to send", LABEL));
765 765 freemsg(mp);
766 766 return (EPROTO);
767 767 }
768 768
769 769 /*
770 770 * No options allowed
771 771 */
772 772 if (tudr->OPT_length != 0) {
773 773 ret = EINVAL;
774 774 goto done;
775 775 }
776 776
777 777 ASSERT(mp1->b_datap->db_ref == 1);
778 778
779 779 if ((rptr + tudr->DEST_offset + tudr->DEST_length) >
780 780 mp->b_wptr) {
781 781 ret = EDESTADDRREQ;
782 782 goto done;
783 783 }
784 784
785 785 sin = (sin_t *)(uintptr_t)&rptr[tudr->DEST_offset];
786 786 if (!OK_32PTR((char *)sin) || tudr->DEST_length !=
787 787 sizeof (sin_t) || sin->sin_family != AF_INET_OFFLOAD) {
788 788 ret = EDESTADDRREQ;
789 789 goto done;
790 790 }
791 791 /* Extract port and ipaddr */
792 792 port = sin->sin_port;
793 793 dst = sin->sin_addr.s_addr;
794 794
795 795 if (port == 0 || dst == INADDR_ANY) {
796 796 ret = EDESTADDRREQ;
797 797 goto done;
798 798 }
799 799
800 800 ASSERT(rds_transport_ops != NULL);
801 801 ret = rds_transport_ops->rds_transport_sendmsg(uiop, rds->rds_src, dst,
802 802 ntohs(rds->rds_port), ntohs(port), rds->rds_zoneid);
803 803 if (ret != 0) {
804 804 if ((ret != ENOBUFS) && (ret != ENOMEM)) {
805 805 /* ENOMEM is actually EWOULDBLOCK */
806 806 dprint(2, ("%s: rds_sendmsg returned %d", LABEL, ret));
807 807 goto done;
808 808 }
809 809 }
810 810 done:
811 811 freemsg(mp1);
812 812 freemsg(mp);
813 813 return (ret);
814 814 }
815 815
816 816 /*
817 817 * Make sure we dont return EINVAL and EWOULDBLOCK as it has
818 818 * special meanings for the synchronous streams (rwnext()).
819 819 * We should return ENOMEM which is changed to EWOULDBLOCK by kstrputmsg()
820 820 */
821 821 static int
822 822 rds_wrw(queue_t *q, struiod_t *dp)
823 823 {
824 824 mblk_t *mp = dp->d_mp;
825 825 int error = 0;
826 826 struct datab *db;
827 827 uchar_t *rptr;
828 828
829 829 db = mp->b_datap;
830 830 rptr = mp->b_rptr;
831 831 switch (db->db_type) {
832 832 case M_PROTO:
833 833 case M_PCPROTO:
834 834 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <=
835 835 (uintptr_t)INT_MAX);
836 836 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >=
837 837 sizeof (struct T_unitdata_req)) {
838 838 /* Detect valid T_UNITDATA_REQ here */
839 839 if (((union T_primitives *)(uintptr_t)rptr)->type
840 840 == T_UNITDATA_REQ)
841 841 break;
842 842 }
843 843 /* FALLTHRU */
844 844 default:
845 845
846 846 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) {
847 847 /*
848 848 * Uio error of some sort, so just return the error.
849 849 */
850 850 goto done;
851 851 }
852 852 dp->d_mp = 0;
853 853 rds_wput_other(q, mp);
854 854 return (0);
855 855 }
856 856
857 857 dp->d_mp = 0;
858 858 error = rds_wput_data(q, mp, &dp->d_uio);
859 859 done:
860 860 if (error == EWOULDBLOCK || error == EINVAL)
861 861 error = EIO;
862 862
863 863 return (error);
864 864 }
865 865
866 866 static void
867 867 rds_rsrv(queue_t *q)
868 868 {
869 869 rds_t *rds = (rds_t *)q->q_ptr;
870 870 ulong_t current_port_quota;
871 871
872 872 /* update the port quota to the current level */
873 873 current_port_quota = RDS_GET_PORT_QUOTA();
874 874 if (rds->rds_port_quota != current_port_quota) {
875 875 rds->rds_port_quota = current_port_quota;
876 876 (void) proto_set_rx_hiwat(q, NULL,
877 877 rds->rds_port_quota * UserBufferSize);
878 878 }
879 879
880 880 /* No more messages in the q, unstall the socket */
881 881 rds_transport_ops->rds_transport_resume_port(ntohs(rds->rds_port));
882 882 }
883 883
884 884 int
885 885 rds_close_transport_driver()
886 886 {
887 887 ASSERT(rds_transport_ops != NULL);
888 888
889 889 rw_enter(&rds_transport_lock, RW_WRITER);
890 890 if (rds_transport_handle != NULL) {
891 891 rds_transport_ops->rds_transport_close_ib();
892 892 (void) ldi_close(rds_transport_handle, FNDELAY, kcred);
893 893 rds_transport_handle = NULL;
894 894 }
895 895 rw_exit(&rds_transport_lock);
896 896
897 897 return (0);
898 898 }
899 899
900 900
901 901 int
902 902 rds_open_transport_driver()
903 903 {
904 904 int ret = 0;
905 905
906 906 rw_enter(&rds_transport_lock, RW_WRITER);
907 907 if (rds_transport_handle != NULL) {
908 908 /*
909 909 * Someone beat us to it.
910 910 */
911 911 goto done;
912 912 }
913 913
914 914 if (ibt_hw_is_present() == 0) {
915 915 ret = ENODEV;
916 916 goto done;
917 917 }
918 918
919 919 if (rds_li == NULL) {
920 920 ret = EPROTONOSUPPORT;
921 921 goto done;
922 922 }
923 923
924 924 ret = ldi_open_by_name("/devices/ib/rdsib@0:rdsib",
925 925 FREAD | FWRITE, kcred, &rds_transport_handle, rds_li);
926 926 if (ret != 0) {
927 927 ret = EPROTONOSUPPORT;
928 928 rds_transport_handle = NULL;
929 929 goto done;
930 930 }
931 931
932 932 ret = rds_transport_ops->rds_transport_open_ib();
933 933 if (ret != 0) {
934 934 (void) ldi_close(rds_transport_handle, FNDELAY, kcred);
935 935 rds_transport_handle = NULL;
936 936 }
937 937 done:
938 938 rw_exit(&rds_transport_lock);
939 939 return (ret);
940 940 }
941 941
942 942 static struct module_info info = {
943 943 0, "rds", 1, INFPSZ, 65536, 1024
944 944 };
945 945
946 946 static struct qinit rinit = {
947 947 NULL, (pfi_t)rds_rsrv, rds_open, rds_close, NULL, &info
948 948 };
949 949
950 950 static struct qinit winit = {
951 951 (pfi_t)rds_wput, NULL, rds_open, rds_close, NULL, &info,
952 952 NULL, rds_wrw, NULL, STRUIOT_STANDARD
953 953 };
954 954
955 955 struct streamtab rdsinfo = {
956 956 &rinit, &winit, NULL, NULL
957 957 };
958 958
959 959 DDI_DEFINE_STREAM_OPS(rds_devops, nulldev, nulldev, rds_attach, rds_detach,
960 960 nulldev, rds_info, RDS_DEVMTFLAGS, &RDS_STRTAB, ddi_quiesce_not_supported);
961 961
962 962 /*
↓ open down ↓ |
962 lines elided |
↑ open up ↑ |
963 963 * Module linkage information for the kernel.
964 964 */
965 965 static struct modldrv modldrv = {
966 966 &mod_driverops,
967 967 RDS_DEVDESC,
968 968 &rds_devops
969 969 };
970 970
971 971 static struct modlinkage modlinkage = {
972 972 MODREV_1,
973 - &modldrv,
974 - NULL
973 + { &modldrv, NULL }
975 974 };
976 975
977 976 int
978 977 _init(void)
979 978 {
980 979 int ret;
981 980
982 981 rds_init();
983 982
984 983 ret = mod_install(&modlinkage);
985 984 if (ret != 0)
986 985 goto done;
987 986 ret = ldi_ident_from_mod(&modlinkage, &rds_li);
988 987 if (ret != 0)
989 988 rds_li = NULL;
990 989 done:
991 990 return (ret);
992 991 }
993 992
994 993 int
995 994 _fini(void)
996 995 {
997 996 int ret;
998 997
999 998 ret = mod_remove(&modlinkage);
1000 999 if (ret != 0) {
1001 1000 return (ret);
1002 1001 }
1003 1002
1004 1003 rds_fini();
1005 1004
1006 1005 ldi_ident_release(rds_li);
1007 1006 return (0);
1008 1007 }
1009 1008
1010 1009 int
1011 1010 _info(struct modinfo *modinfop)
1012 1011 {
1013 1012 return (mod_info(&modlinkage, modinfop));
1014 1013 }
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX