Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/rdsv3/af_rds.c
+++ new/usr/src/uts/common/io/ib/clients/rdsv3/af_rds.c
1 1 /*
2 2 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3 3 */
4 4
5 5 /*
6 6 * This file contains code imported from the OFED rds source file af_rds.c
7 7 * Oracle elects to have and use the contents of af_rds.c under and governed
8 8 * by the OpenIB.org BSD license (see below for full license text). However,
9 9 * the following notice accompanied the original version of this file:
10 10 */
11 11
12 12 /*
13 13 * Copyright (c) 2006 Oracle. All rights reserved.
14 14 *
15 15 * This software is available to you under a choice of one of two
16 16 * licenses. You may choose to be licensed under the terms of the GNU
17 17 * General Public License (GPL) Version 2, available from the file
18 18 * COPYING in the main directory of this source tree, or the
19 19 * OpenIB.org BSD license below:
20 20 *
21 21 * Redistribution and use in source and binary forms, with or
22 22 * without modification, are permitted provided that the following
23 23 * conditions are met:
24 24 *
25 25 * - Redistributions of source code must retain the above
26 26 * copyright notice, this list of conditions and the following
27 27 * disclaimer.
28 28 *
29 29 * - Redistributions in binary form must reproduce the above
30 30 * copyright notice, this list of conditions and the following
31 31 * disclaimer in the documentation and/or other materials
32 32 * provided with the distribution.
33 33 *
34 34 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35 35 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36 36 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37 37 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38 38 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39 39 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40 40 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41 41 * SOFTWARE.
42 42 *
43 43 */
44 44 #include <sys/types.h>
45 45 #include <sys/stat.h>
46 46 #include <sys/conf.h>
47 47 #include <sys/ddi.h>
48 48 #include <sys/sunddi.h>
49 49 #include <sys/modctl.h>
50 50 #include <sys/rds.h>
51 51 #include <sys/stropts.h>
52 52 #include <sys/socket.h>
53 53 #include <sys/socketvar.h>
54 54 #include <sys/sockio.h>
55 55 #include <sys/sysmacros.h>
56 56
57 57 #include <inet/ip.h>
58 58 #include <net/if_types.h>
59 59
60 60 #include <sys/ib/clients/rdsv3/rdsv3.h>
61 61 #include <sys/ib/clients/rdsv3/rdma.h>
62 62 #include <sys/ib/clients/rdsv3/rdma_transport.h>
63 63 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
64 64
65 65 extern void rdsv3_remove_bound(struct rdsv3_sock *rds);
66 66 extern int rdsv3_verify_bind_address(ipaddr_t addr);
67 67
68 68 extern ddi_taskq_t *rdsv3_taskq;
69 69 extern struct rdma_cm_id *rdsv3_rdma_listen_id;
70 70
71 71 /* this is just used for stats gathering :/ */
72 72 kmutex_t rdsv3_sock_lock;
73 73 static unsigned long rdsv3_sock_count;
74 74 list_t rdsv3_sock_list;
75 75
76 76 /*
77 77 * This is called as the final descriptor referencing this socket is closed.
78 78 * We have to unbind the socket so that another socket can be bound to the
79 79 * address it was using.
80 80 *
81 81 * We have to be careful about racing with the incoming path. sock_orphan()
82 82 * sets SOCK_DEAD and we use that as an indicator to the rx path that new
83 83 * messages shouldn't be queued.
84 84 */
85 85 /* ARGSUSED */
86 86 static int
87 87 rdsv3_release(sock_lower_handle_t proto_handle, int flgs, cred_t *cr)
88 88 {
89 89 struct rsock *sk = (struct rsock *)proto_handle;
90 90 struct rdsv3_sock *rs;
91 91
92 92 if (!sk)
93 93 goto out;
94 94
95 95 rs = rdsv3_sk_to_rs(sk);
96 96 RDSV3_DPRINTF4("rdsv3_release", "Enter(rs: %p, sk: %p)", rs, sk);
97 97
98 98 rdsv3_sk_sock_orphan(sk);
99 99 rdsv3_cong_remove_socket(rs);
100 100 rdsv3_remove_bound(rs);
101 101
102 102 /*
103 103 * Note - rdsv3_clear_recv_queue grabs rs_recv_lock, so
104 104 * that ensures the recv path has completed messing
105 105 * with the socket.
106 106 *
107 107 * Note2 - rdsv3_clear_recv_queue(rs) should be called first
108 108 * to prevent some race conditions, which is different from
109 109 * the Linux code.
110 110 */
111 111 rdsv3_clear_recv_queue(rs);
112 112 rdsv3_send_drop_to(rs, NULL);
113 113 rdsv3_rdma_drop_keys(rs);
114 114 (void) rdsv3_notify_queue_get(rs, NULL);
115 115
116 116 mutex_enter(&rdsv3_sock_lock);
117 117 list_remove_node(&rs->rs_item);
118 118 rdsv3_sock_count--;
119 119 mutex_exit(&rdsv3_sock_lock);
120 120
121 121 while (sk->sk_refcount > 1) {
122 122 /* wait for 1 sec and try again */
123 123 delay(drv_usectohz(1000000));
124 124 }
125 125
126 126 /* this will free the rs and sk */
127 127 rdsv3_sk_sock_put(sk);
128 128
129 129 RDSV3_DPRINTF4("rdsv3_release", "Return (rds: %p)", rs);
130 130 out:
131 131 return (0);
132 132 }
133 133
134 134 void
135 135 __rdsv3_wake_sk_sleep(struct rsock *sk)
136 136 {
137 137 /* wakup anyone waiting in recvmsg */
138 138 if (!rdsv3_sk_sock_flag(sk, SOCK_DEAD) && sk->sk_sleep)
139 139 rdsv3_wake_up(sk->sk_sleep);
140 140 }
141 141
142 142 /*
143 143 * Careful not to race with rdsv3_release -> sock_orphan which clears sk_sleep.
144 144 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK
145 145 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
146 146 * this seems more conservative.
147 147 * NB - normally, one would use sk_callback_lock for this, but we can
148 148 * get here from interrupts, whereas the network code grabs sk_callback_lock
149 149 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
150 150 */
151 151 void
152 152 rdsv3_wake_sk_sleep(struct rdsv3_sock *rs)
153 153 {
154 154 RDSV3_DPRINTF4("rdsv3_wake_sk_sleep", "Enter(rs: %p)", rs);
155 155
156 156 rw_enter(&rs->rs_recv_lock, RW_READER);
157 157 __rdsv3_wake_sk_sleep(rdsv3_rs_to_sk(rs));
158 158 rw_exit(&rs->rs_recv_lock);
159 159 }
160 160
161 161 /*ARGSUSED*/
162 162 static int
163 163 rdsv3_getname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
164 164 socklen_t *addr_len, cred_t *cr)
165 165 {
166 166 struct rsock *sk = (struct rsock *)proto_handle;
167 167 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
168 168 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
169 169
170 170 RDSV3_DPRINTF4("rdsv3_getname", "Enter(rs: %p, port: %d)", rs,
171 171 rs->rs_bound_port);
172 172
173 173 sin->sin_port = rs->rs_bound_port;
174 174 sin->sin_addr.s_addr = rs->rs_bound_addr;
175 175
176 176 sin->sin_family = AF_INET_OFFLOAD;
177 177
178 178 *addr_len = sizeof (*sin);
179 179 return (0);
180 180 }
181 181
182 182 /*
183 183 * RDS' poll is without a doubt the least intuitive part of the interface,
184 184 * as POLLIN and POLLOUT do not behave entirely as you would expect from
185 185 * a network protocol.
186 186 *
187 187 * POLLIN is asserted if
188 188 * - there is data on the receive queue.
189 189 * - to signal that a previously congested destination may have become
190 190 * uncongested
191 191 * - A notification has been queued to the socket (this can be a congestion
192 192 * update, or a RDMA completion).
193 193 *
194 194 * POLLOUT is asserted if there is room on the send queue. This does not mean
195 195 * however, that the next sendmsg() call will succeed. If the application tries
196 196 * to send to a congested destination, the system call may still fail (and
197 197 * return ENOBUFS).
198 198 */
199 199 /* ARGSUSED */
200 200 static short
201 201 rdsv3_poll(sock_lower_handle_t proto_handle, short events, int anyyet,
202 202 cred_t *cr)
203 203 {
204 204 struct rsock *sk = (struct rsock *)proto_handle;
205 205 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
206 206 unsigned short mask = 0;
207 207
208 208 #if 0
209 209 RDSV3_DPRINTF4("rdsv3_poll", "enter(%p %x %d)", rs, events, anyyet);
210 210 #endif
211 211
212 212 /*
213 213 * If rs_seen_congestion is on, wait until it's off.
214 214 * This is implemented for the following OFED code.
215 215 * if (rs->rs_seen_congestion)
216 216 * poll_wait(file, &rds_poll_waitq, wait);
217 217 */
218 218 mutex_enter(&rs->rs_congested_lock);
219 219 while (rs->rs_seen_congestion) {
220 220 cv_wait(&rs->rs_congested_cv,
221 221 &rs->rs_congested_lock);
222 222 }
223 223 mutex_exit(&rs->rs_congested_lock);
224 224
225 225 rw_enter(&rs->rs_recv_lock, RW_READER);
226 226 if (!rs->rs_cong_monitor) {
227 227 /*
228 228 * When a congestion map was updated, we signal POLLIN for
229 229 * "historical" reasons. Applications can also poll for
230 230 * WRBAND instead.
231 231 */
232 232 if (rdsv3_cong_updated_since(&rs->rs_cong_track))
233 233 mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
234 234 } else {
235 235 mutex_enter(&rs->rs_lock);
236 236 if (rs->rs_cong_notify)
237 237 mask |= (POLLIN | POLLRDNORM);
238 238 mutex_exit(&rs->rs_lock);
239 239 }
240 240 if (!list_is_empty(&rs->rs_recv_queue) ||
241 241 !list_is_empty(&rs->rs_notify_queue))
242 242 mask |= (POLLIN | POLLRDNORM);
243 243 if (rs->rs_snd_bytes < rdsv3_sk_sndbuf(rs))
244 244 mask |= (POLLOUT | POLLWRNORM);
245 245
246 246 /* clear state any time we wake a seen-congested socket */
247 247 if (mask) {
248 248 mutex_enter(&rs->rs_congested_lock);
249 249 rs->rs_seen_congestion = 0;
250 250 mutex_exit(&rs->rs_congested_lock);
251 251 }
252 252
253 253 rw_exit(&rs->rs_recv_lock);
254 254
255 255 #if 0
256 256 RDSV3_DPRINTF4("rdsv3_poll", "return(%p %x)", rs, mask);
257 257 #endif
258 258
259 259 return (mask);
260 260 }
261 261
262 262 /* ARGSUSED */
263 263 static int
264 264 rdsv3_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
265 265 int mode, int32_t *rvalp, cred_t *cr)
266 266 {
267 267 ksocket_t so4;
268 268 struct lifconf lifc;
269 269 struct lifreq lifr, *lifrp;
270 270 struct ifconf ifc;
271 271 struct ifreq ifr;
272 272 int rval = 0, rc, len;
273 273 int numifs;
274 274 int bufsize;
275 275 void *buf;
276 276
277 277 RDSV3_DPRINTF4("rdsv3_ioctl", "enter: cmd: %d", cmd);
278 278
279 279 /* Only ipv4 for now */
280 280 rval = ksocket_socket(&so4, PF_INET, SOCK_DGRAM, 0, KSOCKET_NOSLEEP,
281 281 CRED());
282 282 if (rval != 0) {
283 283 RDSV3_DPRINTF2("rdsv3_ioctl", "ksocket_socket returned %d",
284 284 rval);
285 285 return (rval);
286 286 }
287 287
288 288 switch (cmd) {
289 289 case SIOCGLIFNUM :
290 290 case SIOCGIFNUM :
291 291 rval = rdsv3_do_ip_ioctl(so4, &buf, &bufsize, &numifs);
292 292 if (rval != 0) break;
293 293 if (cmd == SIOCGLIFNUM) {
294 294 struct lifnum lifn;
295 295 lifn.lifn_family = AF_INET_OFFLOAD;
296 296 lifn.lifn_flags = 0;
297 297 lifn.lifn_count = numifs;
298 298 (void) ddi_copyout(&lifn, (void *)arg,
299 299 sizeof (struct lifnum), 0);
300 300 } else {
301 301 len = 0;
302 302 for (lifrp = (struct lifreq *)buf, rc = 0; rc < numifs;
303 303 rc++, lifrp++) {
304 304 if (strlen(lifrp->lifr_name) <= IFNAMSIZ) {
305 305 len++;
306 306 }
307 307 }
308 308 (void) ddi_copyout(&len, (void *)arg,
309 309 sizeof (int), 0);
310 310 }
311 311 kmem_free(buf, bufsize);
312 312 break;
313 313
314 314 case SIOCGLIFCONF :
315 315 if (ddi_copyin((void *)arg, &lifc, sizeof (struct lifconf), 0)
316 316 != 0) {
317 317 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed lifc");
318 318 rval = EFAULT;
319 319 break;
320 320 }
321 321
322 322 rval = rdsv3_do_ip_ioctl(so4, &buf, &bufsize, &numifs);
323 323 if (rval != 0) {
324 324 RDSV3_DPRINTF2("rdsv3_ioctl",
325 325 "rdsv3_do_ip_ioctl failed: %d", rval);
326 326 break;
327 327 }
328 328
329 329 if ((lifc.lifc_len > 0) && (numifs > 0)) {
330 330 if (ddi_copyout(buf, (void *)lifc.lifc_req,
331 331 (lifc.lifc_len < bufsize) ? lifc.lifc_len :
332 332 bufsize, 0) != 0) {
333 333 RDSV3_DPRINTF2("rdsv3_ioctl",
334 334 "copyout of records failed");
335 335 rval = EFAULT;
336 336 }
337 337
338 338 }
339 339
340 340 lifc.lifc_len = bufsize;
341 341 if (ddi_copyout(&lifc, (void *)arg, sizeof (struct lifconf),
342 342 0) != 0) {
343 343 RDSV3_DPRINTF2("rdsv3_ioctl",
344 344 "copyout of lifconf failed");
345 345 rval = EFAULT;
346 346 }
347 347
348 348 kmem_free(buf, bufsize);
349 349 break;
350 350
351 351 case SIOCGIFCONF :
352 352 case O_SIOCGIFCONF :
353 353 if (ddi_copyin((void *)arg, &ifc, sizeof (struct ifconf), 0)
354 354 != 0) {
355 355 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed ifc");
356 356 rval = EFAULT;
357 357 break;
358 358 }
359 359
360 360 RDSV3_DPRINTF2("rdsv3_ioctl",
361 361 "O_SIOCGIFCONF: ifc_len: %d, req: %p",
362 362 ifc.ifc_len, ifc.ifc_req);
363 363
364 364 rval = rdsv3_do_ip_ioctl_old(so4, &buf, &bufsize, &numifs);
365 365 if (rval != 0) {
366 366 RDSV3_DPRINTF2("rdsv3_ioctl",
367 367 "rdsv3_do_ip_ioctl_old failed: %d", rval);
368 368 break;
369 369 }
370 370
371 371 if ((ifc.ifc_len > 0) && (numifs > 0)) {
372 372 if (ddi_copyout(buf, (void *)ifc.ifc_req,
373 373 (ifc.ifc_len < bufsize) ? ifc.ifc_len :
374 374 bufsize, 0) != 0) {
375 375 RDSV3_DPRINTF2("rdsv3_ioctl",
376 376 "copyout of records failed");
377 377 rval = EFAULT;
378 378 }
379 379
380 380 }
381 381
382 382 ifc.ifc_len = bufsize;
383 383 if (ddi_copyout(&ifc, (void *)arg, sizeof (struct ifconf),
384 384 0) != 0) {
385 385 RDSV3_DPRINTF2("rdsv3_ioctl",
386 386 "copyout of ifconf failed");
387 387 rval = EFAULT;
388 388 }
389 389
390 390 kmem_free(buf, bufsize);
391 391 break;
392 392
393 393 case SIOCGLIFFLAGS :
394 394 case SIOCSLIFFLAGS :
395 395 case SIOCGLIFMTU :
396 396 case SIOCGLIFNETMASK :
397 397 case SIOCGLIFINDEX :
398 398 if (ddi_copyin((void *)arg, &lifr, sizeof (struct lifreq), 0)
399 399 != 0) {
400 400 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed lifr");
401 401 rval = EFAULT;
402 402 break;
403 403 }
404 404
405 405 rc = ksocket_ioctl(so4, cmd, (intptr_t)&lifr, &rval, CRED());
406 406 if (rc != 0) {
407 407 RDSV3_DPRINTF2("rdsv3_ioctl",
408 408 "ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
409 409 rc, lifr.lifr_name, cmd);
410 410 break;
411 411 }
412 412
413 413 (void) ddi_copyout(&lifr, (void *)arg,
414 414 sizeof (struct lifreq), 0);
415 415 break;
416 416
417 417 case SIOCGIFFLAGS :
418 418 case SIOCSIFFLAGS :
419 419 case SIOCGIFMTU :
420 420 case SIOCGIFNETMASK :
421 421 case SIOCGIFINDEX :
422 422 if (ddi_copyin((void *)arg, &ifr, sizeof (struct ifreq), 0)
423 423 != 0) {
424 424 RDSV3_DPRINTF2("rdsv3_ioctl", "ddi_copyin failed ifr");
425 425 rval = EFAULT;
426 426 break;
427 427 }
428 428
429 429 RDSV3_DPRINTF2("rdsv3_ioctl", "1. name: %s", ifr.ifr_name);
430 430
431 431 rc = ksocket_ioctl(so4, cmd, (intptr_t)&ifr, &rval, CRED());
432 432 if (rc != 0) {
433 433 RDSV3_DPRINTF2("rdsv3_ioctl",
434 434 "ksocket_ioctl failed: %d, name: %s cmd: 0x%x",
435 435 rc, ifr.ifr_name, cmd);
436 436
437 437 break;
438 438 }
439 439
440 440 RDSV3_DPRINTF2("rdsv3_ioctl", "2. name: %s", ifr.ifr_name);
441 441
442 442 (void) ddi_copyout(&ifr, (void *)arg,
443 443 sizeof (struct ifreq), 0);
444 444 break;
445 445
446 446 default:
447 447 if ((cmd >= RDS_INFO_FIRST) &&
448 448 (cmd <= RDS_INFO_LAST)) {
449 449 return (rdsv3_info_ioctl((struct rsock *)proto_handle,
450 450 cmd, (char *)arg, rvalp));
451 451 }
452 452 RDSV3_DPRINTF2("rdsv3_ioctl", "Unknown ioctl cmd: %d", cmd);
453 453 cmn_err(CE_CONT, "unsupported IOCTL cmd: %d \n", cmd);
454 454 rval = EOPNOTSUPP;
455 455 }
456 456
457 457 (void) ksocket_close(so4, CRED());
458 458
459 459 RDSV3_DPRINTF4("rdsv3_ioctl", "return: %d cmd: %d", rval, cmd);
460 460
461 461 *rvalp = rval;
462 462 return (rval);
463 463 }
464 464
465 465 static int
466 466 rdsv3_cancel_sent_to(struct rdsv3_sock *rs, char *optval, int len)
467 467 {
468 468 struct sockaddr_in sin;
469 469
470 470 /* racing with another thread binding seems ok here */
471 471 if (rs->rs_bound_addr == 0)
472 472 return (-ENOTCONN); /* XXX not a great errno */
473 473
474 474 if (len < sizeof (struct sockaddr_in))
475 475 return (-EINVAL);
476 476
477 477 if (ddi_copyin((void *)optval, &sin, sizeof (struct sockaddr_in),
478 478 0) != 0) {
479 479 RDSV3_DPRINTF2("rdsv3_cancel_sent_to", "ddi_copyin failed sin");
480 480 return (-EFAULT);
481 481 }
482 482
483 483 rdsv3_send_drop_to(rs, &sin);
484 484
485 485 return (0);
486 486 }
487 487
488 488 static int
489 489 rdsv3_set_bool_option(unsigned char *optvar, char *optval, int optlen)
490 490 {
491 491 int value = *optval;
492 492
493 493 if (optlen < sizeof (int))
494 494 return (-EINVAL);
495 495 *optvar = !!value;
496 496 return (0);
497 497 }
498 498
499 499 static int
500 500 rdsv3_cong_monitor(struct rdsv3_sock *rs, char *optval, int optlen)
501 501 {
502 502 int ret;
503 503
504 504 ret = rdsv3_set_bool_option(&rs->rs_cong_monitor, optval, optlen);
505 505 if (ret == 0) {
506 506 if (rs->rs_cong_monitor) {
507 507 rdsv3_cong_add_socket(rs);
508 508 } else {
509 509 rdsv3_cong_remove_socket(rs);
510 510 rs->rs_cong_mask = 0;
511 511 rs->rs_cong_notify = 0;
512 512 }
513 513 }
514 514 return (ret);
515 515 }
516 516
517 517 /*ARGSUSED*/
518 518 static int
519 519 rdsv3_setsockopt(sock_lower_handle_t proto_handle, int level,
520 520 int optname, const void *optval, socklen_t optlen, cred_t *cr)
521 521 {
522 522 struct rsock *sk = (struct rsock *)proto_handle;
523 523 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
524 524 int ret = 0;
525 525
526 526 RDSV3_DPRINTF4("rdsv3_setsockopt", "enter(%p %d %d)",
527 527 rs, level, optname);
528 528
529 529 switch (optname) {
530 530 case RDS_CANCEL_SENT_TO:
531 531 ret = rdsv3_cancel_sent_to(rs, (char *)optval, optlen);
532 532 break;
533 533 case RDS_GET_MR:
534 534 ret = rdsv3_get_mr(rs, optval, optlen);
535 535 break;
536 536 case RDS_GET_MR_FOR_DEST:
537 537 ret = rdsv3_get_mr_for_dest(rs, optval, optlen);
538 538 break;
539 539 case RDS_FREE_MR:
540 540 ret = rdsv3_free_mr(rs, optval, optlen);
541 541 break;
542 542 case RDS_RECVERR:
543 543 ret = rdsv3_set_bool_option(&rs->rs_recverr,
544 544 (char *)optval, optlen);
545 545 break;
546 546 case RDS_CONG_MONITOR:
547 547 ret = rdsv3_cong_monitor(rs, (char *)optval, optlen);
548 548 break;
549 549 case SO_SNDBUF:
550 550 sk->sk_sndbuf = *(uint_t *)optval;
551 551 return (ret);
552 552 case SO_RCVBUF:
553 553 sk->sk_rcvbuf = *(uint_t *)optval;
554 554 return (ret);
555 555 default:
556 556 #if 1
557 557 break;
558 558 #else
559 559 ret = -ENOPROTOOPT;
560 560 #endif
561 561 }
562 562 out:
563 563 return (ret);
564 564 }
565 565
566 566 /* XXX */
567 567 /*ARGSUSED*/
568 568 static int
569 569 rdsv3_getsockopt(sock_lower_handle_t proto_handle, int level,
570 570 int optname, void *optval, socklen_t *optlen, cred_t *cr)
571 571 {
572 572 struct rsock *sk = (struct rsock *)proto_handle;
573 573 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
574 574 int ret = 0;
575 575
576 576 RDSV3_DPRINTF4("rdsv3_getsockopt", "enter(%p %d %d)",
577 577 rs, optname, *optlen);
578 578
579 579 switch (optname) {
580 580 case SO_SNDBUF:
581 581 RDSV3_DPRINTF4("rdsv3_getsockopt", "SO_SNDBUF(%d)",
582 582 sk->sk_sndbuf);
583 583 if (*optlen != 0) {
584 584 *((int *)optval) = sk->sk_sndbuf;
585 585 *optlen = sizeof (uint_t);
586 586 }
587 587 return (ret);
588 588 case SO_RCVBUF:
589 589 RDSV3_DPRINTF4("rdsv3_getsockopt", "SO_RCVBUF(%d)",
590 590 sk->sk_rcvbuf);
591 591 if (*optlen != 0) {
592 592 *((int *)optval) = sk->sk_rcvbuf;
593 593 *optlen = sizeof (uint_t);
594 594 }
595 595 return (ret);
596 596 case RDS_RECVERR:
597 597 RDSV3_DPRINTF4("rdsv3_getsockopt", "RDSV3_RECVERR(%d)",
598 598 rs->rs_recverr);
599 599 if (*optlen < sizeof (int))
600 600 return (-EINVAL);
601 601 else {
602 602 *(int *)optval = rs->rs_recverr;
603 603 *optlen = sizeof (int);
604 604 }
605 605 return (0);
606 606 default:
607 607 RDSV3_DPRINTF2("rdsv3_getsockopt",
608 608 "Unknown: level: %d optname: %d", level, optname);
609 609 ret = -ENOPROTOOPT;
610 610 }
611 611
612 612 RDSV3_DPRINTF4("rdsv3_getsockopt", "return(%p %d %d)",
613 613 rs, optname, ret);
614 614 return (ret);
615 615 }
616 616
617 617 /*ARGSUSED*/
618 618 static int rdsv3_connect(sock_lower_handle_t proto_handle,
619 619 const struct sockaddr *addr, socklen_t addr_len, sock_connid_t *conn,
620 620 cred_t *cr)
621 621 {
622 622 struct rsock *sk = (struct rsock *)proto_handle;
623 623 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
624 624 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
625 625 int ret = 0;
626 626
627 627 RDSV3_DPRINTF4("rdsv3_connect", "Enter(rs: %p)", rs);
628 628
629 629 mutex_enter(&sk->sk_lock);
630 630
631 631 if (addr_len != sizeof (struct sockaddr_in)) {
632 632 ret = -EINVAL;
633 633 goto out;
634 634 }
635 635
636 636 if (sin->sin_family != AF_INET_OFFLOAD) {
637 637 ret = -EAFNOSUPPORT;
638 638 goto out;
639 639 }
640 640
641 641 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
642 642 ret = -EDESTADDRREQ;
643 643 goto out;
644 644 }
645 645
646 646 rs->rs_conn_addr = sin->sin_addr.s_addr;
647 647 rs->rs_conn_port = sin->sin_port;
648 648
649 649 sk->sk_upcalls->su_connected(sk->sk_upper_handle, 0, NULL, -1);
650 650
651 651 RDSV3_DPRINTF4("rdsv3_connect", "Return(rs: %p)", rs);
652 652
653 653 out:
654 654 mutex_exit(&sk->sk_lock);
655 655 return (ret);
656 656 }
657 657
658 658 /*ARGSUSED*/
659 659 static int
660 660 rdsv3_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
661 661 {
662 662 struct rsock *sk = (struct rsock *)proto_handle;
663 663 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
664 664
665 665 RDSV3_DPRINTF4("rdsv3_shutdown", "Enter(rs: %p)", rs);
666 666
667 667 return (0);
668 668 }
669 669
670 670 /*ARGSUSED*/
671 671 void
672 672 rdsv3_activate(sock_lower_handle_t proto_handle,
673 673 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls,
674 674 int flags, cred_t *cr)
675 675 {
676 676 struct rsock *sk = (struct rsock *)proto_handle;
677 677 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
678 678
679 679 RDSV3_DPRINTF4("rdsv3_activate", "Enter(rs: %p)", rs);
680 680
681 681 sk->sk_upcalls = sock_upcalls;
682 682 sk->sk_upper_handle = sock_handle;
683 683
684 684 RDSV3_DPRINTF4("rdsv3_activate", "Return (rs: %p)", rs);
685 685 }
686 686
687 687
688 688 /* ARGSUSED */
689 689 int
690 690 rdsv3_send_uio(sock_lower_handle_t proto_handle, uio_t *uio,
691 691 struct nmsghdr *msg, cred_t *cr)
692 692 {
693 693 struct rsock *sk = (struct rsock *)proto_handle;
694 694 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
695 695 int ret;
696 696
697 697 RDSV3_DPRINTF4("rdsv3_send_uio", "Enter(rs: %p)", rs);
698 698 ret = rdsv3_sendmsg(rs, uio, msg, uio->uio_resid);
699 699
700 700 RDSV3_DPRINTF4("rdsv3_send_uio", "Return(rs: %p ret %d)", rs, ret);
701 701 if (ret < 0) {
702 702 return (-ret);
703 703 }
704 704
705 705 return (0);
706 706 }
707 707
708 708 /* ARGSUSED */
709 709 int
710 710 rdsv3_recv_uio(sock_lower_handle_t proto_handle, uio_t *uio,
711 711 struct nmsghdr *msg, cred_t *cr)
712 712 {
713 713 struct rsock *sk = (struct rsock *)proto_handle;
714 714 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
715 715 int ret;
716 716
717 717 RDSV3_DPRINTF4("rdsv3_recv_uio", "Enter (rs: %p)", rs);
718 718 ret = rdsv3_recvmsg(rs, uio, msg, uio->uio_resid, msg->msg_flags);
719 719
720 720 RDSV3_DPRINTF4("rdsv3_recv_uio", "Return(rs: %p ret %d)", rs, ret);
721 721
722 722 if (ret < 0) {
723 723 return (-ret);
724 724 }
725 725
726 726 return (0);
727 727 }
728 728
729 729 /*ARGSUSED*/
730 730 int
731 731 rdsv3_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
732 732 socklen_t *addr_len, cred_t *cr)
733 733 {
734 734 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
735 735 struct rsock *sk = (struct rsock *)proto_handle;
736 736 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
737 737
738 738 RDSV3_DPRINTF2("rdsv3_getpeername", "enter(rs: %p)", rs);
739 739
740 740 (void) memset(sin->sin_zero, 0, sizeof (sin->sin_zero));
741 741
742 742 /* racey, don't care */
743 743 if (!rs->rs_conn_addr)
744 744 return (-ENOTCONN);
745 745
746 746 sin->sin_port = rs->rs_conn_port;
747 747 sin->sin_addr.s_addr = rs->rs_conn_addr;
748 748
749 749 sin->sin_family = AF_INET_OFFLOAD;
750 750
751 751 *addr_len = sizeof (*sin);
752 752 return (0);
753 753 }
↓ open down ↓ |
753 lines elided |
↑ open up ↑ |
754 754
755 755 void
756 756 rdsv3_clrflowctrl(sock_lower_handle_t proto_handle)
757 757 {
758 758 struct rsock *sk = (struct rsock *)proto_handle;
759 759 struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
760 760
761 761 RDSV3_DPRINTF2("rdsv3_clrflowctrl", "enter(rs: %p)", rs);
762 762 }
763 763
764 -#ifndef __lock_lint
765 764 static struct sock_downcalls_s rdsv3_sock_downcalls = {
766 765 .sd_close = rdsv3_release,
767 766 .sd_bind = rdsv3_bind,
768 767 .sd_connect = rdsv3_connect,
769 768 .sd_accept = NULL,
770 769 .sd_getsockname = rdsv3_getname,
771 770 .sd_poll = rdsv3_poll,
772 771 .sd_ioctl = rdsv3_ioctl,
773 772 .sd_listen = NULL,
774 773 .sd_shutdown = rdsv3_shutdown,
775 774 .sd_setsockopt = rdsv3_setsockopt,
776 775 .sd_getsockopt = rdsv3_getsockopt,
777 776 .sd_send_uio = rdsv3_send_uio,
778 777 .sd_recv_uio = rdsv3_recv_uio,
779 778 .sd_activate = rdsv3_activate,
780 779 .sd_getpeername = rdsv3_getpeername,
781 780 .sd_send = NULL,
782 781 .sd_clr_flowctrl = NULL
783 782 };
784 -#else
785 -static struct sock_downcalls_s rdsv3_sock_downcalls = {
786 - rdsv3_activate,
787 - NULL,
788 - rdsv3_bind,
789 - NULL,
790 - rdsv3_connect,
791 - rdsv3_getpeername,
792 - rdsv3_getname,
793 - rdsv3_getsockopt,
794 - rdsv3_setsockopt,
795 - NULL,
796 - rdsv3_send_uio,
797 - rdsv3_recv_uio,
798 - rdsv3_poll,
799 - rdsv3_shutdown,
800 - NULL,
801 - rdsv3_ioctl,
802 - rdsv3_release
803 -};
804 -#endif
805 783
806 784 sock_lower_handle_t
807 785 rdsv3_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
808 786 uint_t *smodep, int *errorp, int flags, cred_t *credp)
809 787 {
810 788 struct rdsv3_sock *rs;
811 789 struct rsock *sk;
812 790
813 791 RDSV3_DPRINTF4("rdsv3_create", "Enter (family: %d type: %d, proto: %d "
814 792 "flags: %d", family, type, proto, flags);
815 793
816 794 sk = rdsv3_sk_alloc();
817 795 if (sk == NULL)
818 796 return (NULL);
819 797 rdsv3_sock_init_data(sk);
820 798
821 799 rs = rdsv3_sk_to_rs(sk);
822 800 rs->rs_sk = sk;
823 801 mutex_init(&rs->rs_lock, NULL, MUTEX_DRIVER, NULL);
824 802 rw_init(&rs->rs_recv_lock, NULL, RW_DRIVER, NULL);
825 803 list_create(&rs->rs_send_queue, sizeof (struct rdsv3_message),
826 804 offsetof(struct rdsv3_message, m_sock_item));
827 805 list_create(&rs->rs_recv_queue, sizeof (struct rdsv3_incoming),
828 806 offsetof(struct rdsv3_incoming, i_item));
829 807 list_create(&rs->rs_notify_queue, sizeof (struct rdsv3_notifier),
830 808 offsetof(struct rdsv3_notifier, n_list));
831 809 mutex_init(&rs->rs_rdma_lock, NULL, MUTEX_DRIVER, NULL);
832 810 avl_create(&rs->rs_rdma_keys, rdsv3_mr_compare,
833 811 sizeof (struct rdsv3_mr), offsetof(struct rdsv3_mr, r_rb_node));
834 812 mutex_init(&rs->rs_conn_lock, NULL, MUTEX_DRIVER, NULL);
835 813 mutex_init(&rs->rs_congested_lock, NULL, MUTEX_DRIVER, NULL);
836 814 cv_init(&rs->rs_congested_cv, NULL, CV_DRIVER, NULL);
837 815 rs->rs_cred = credp;
838 816 rs->rs_zoneid = getzoneid();
839 817 crhold(credp);
840 818
841 819 mutex_enter(&rdsv3_sock_lock);
842 820 list_insert_tail(&rdsv3_sock_list, rs);
843 821 rdsv3_sock_count++;
844 822 /* Initialize RDMA/IB on the 1st socket if not done at attach */
845 823 if (rdsv3_sock_count == 1) {
846 824 rdsv3_rdma_init();
847 825 }
848 826 mutex_exit(&rdsv3_sock_lock);
849 827
850 828 *errorp = 0;
851 829 *smodep = SM_ATOMIC;
852 830 *sock_downcalls = &rdsv3_sock_downcalls;
853 831
854 832 RDSV3_DPRINTF4("rdsv3_create", "Return: %p", rs);
855 833
856 834 return ((sock_lower_handle_t)rdsv3_rs_to_sk(rs));
857 835 }
858 836
859 837 void
860 838 rdsv3_sock_addref(struct rdsv3_sock *rs)
861 839 {
862 840 RDSV3_DPRINTF4("rdsv3_sock_addref", "Enter(rs: %p)", rs);
863 841 rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs));
864 842 }
865 843
866 844 void
867 845 rdsv3_sock_put(struct rdsv3_sock *rs)
868 846 {
869 847 RDSV3_DPRINTF4("rdsv3_sock_put", "Enter(rs: %p)", rs);
870 848 rdsv3_sk_sock_put(rdsv3_rs_to_sk(rs));
871 849 }
872 850
873 851 static void
874 852 rdsv3_sock_inc_info(struct rsock *sock, unsigned int len,
875 853 struct rdsv3_info_iterator *iter, struct rdsv3_info_lengths *lens)
876 854 {
877 855 struct rdsv3_sock *rs;
878 856 struct rdsv3_incoming *inc;
879 857 unsigned int total = 0;
880 858
881 859 RDSV3_DPRINTF4("rdsv3_sock_inc_info", "Enter(rs: %p)",
882 860 rdsv3_sk_to_rs(sock));
883 861
884 862 len /= sizeof (struct rds_info_message);
885 863
886 864 mutex_enter(&rdsv3_sock_lock);
887 865
888 866 RDSV3_FOR_EACH_LIST_NODE(rs, &rdsv3_sock_list, rs_item) {
889 867 rw_enter(&rs->rs_recv_lock, RW_READER);
890 868
891 869 /* XXX too lazy to maintain counts.. */
892 870 RDSV3_FOR_EACH_LIST_NODE(inc, &rs->rs_recv_queue, i_item) {
893 871 total++;
894 872 if (total <= len)
895 873 rdsv3_inc_info_copy(inc, iter, inc->i_saddr,
896 874 rs->rs_bound_addr, 1);
897 875 }
898 876
899 877 rw_exit(&rs->rs_recv_lock);
900 878 }
901 879
902 880 mutex_exit(&rdsv3_sock_lock);
903 881
904 882 lens->nr = total;
905 883 lens->each = sizeof (struct rds_info_message);
906 884
907 885 RDSV3_DPRINTF4("rdsv3_sock_inc_info", "return(rs: %p)",
908 886 rdsv3_sk_to_rs(sock));
909 887 }
910 888
911 889 static void
912 890 rdsv3_sock_info(struct rsock *sock, unsigned int len,
913 891 struct rdsv3_info_iterator *iter, struct rdsv3_info_lengths *lens)
914 892 {
915 893 struct rds_info_socket sinfo;
916 894 struct rdsv3_sock *rs;
917 895 unsigned long bytes;
918 896
919 897 RDSV3_DPRINTF4("rdsv3_sock_info", "Enter(rs: %p)",
920 898 rdsv3_sk_to_rs(sock));
921 899
922 900 len /= sizeof (struct rds_info_socket);
923 901
924 902 mutex_enter(&rdsv3_sock_lock);
925 903
926 904 if ((len < rdsv3_sock_count) || (iter->addr == NULL))
927 905 goto out;
928 906
929 907 bytes = sizeof (struct rds_info_socket);
930 908 RDSV3_FOR_EACH_LIST_NODE(rs, &rdsv3_sock_list, rs_item) {
931 909 sinfo.sndbuf = rdsv3_sk_sndbuf(rs);
932 910 sinfo.rcvbuf = rdsv3_sk_rcvbuf(rs);
933 911 sinfo.bound_addr = rs->rs_bound_addr;
934 912 sinfo.connected_addr = rs->rs_conn_addr;
935 913 sinfo.bound_port = rs->rs_bound_port;
936 914 sinfo.connected_port = rs->rs_conn_port;
937 915
938 916 rdsv3_info_copy(iter, &sinfo, bytes);
939 917 }
940 918
941 919 RDSV3_DPRINTF4("rdsv3_sock_info", "Return(rs: %p)",
942 920 rdsv3_sk_to_rs(sock));
943 921
944 922 out:
945 923 lens->nr = rdsv3_sock_count;
946 924 lens->each = sizeof (struct rds_info_socket);
947 925
948 926 mutex_exit(&rdsv3_sock_lock);
949 927 }
950 928
951 929 rdsv3_delayed_work_t *rdsv3_rdma_dwp = NULL;
952 930 uint_t rdsv3_rdma_init_delay = 5; /* secs */
953 931 extern void rdsv3_rdma_init_worker(struct rdsv3_work_s *work);
954 932
955 933 void
956 934 rdsv3_exit(void)
↓ open down ↓ |
142 lines elided |
↑ open up ↑ |
957 935 {
958 936 RDSV3_DPRINTF4("rdsv3_exit", "Enter");
959 937
960 938 if (rdsv3_rdma_dwp) {
961 939 rdsv3_cancel_delayed_work(rdsv3_rdma_dwp);
962 940 }
963 941
964 942 (void) ddi_taskq_dispatch(rdsv3_taskq, rdsv3_rdma_exit,
965 943 NULL, DDI_SLEEP);
966 944 while (rdsv3_rdma_listen_id != NULL) {
967 -#ifndef __lock_lint
968 945 RDSV3_DPRINTF5("rdsv3", "%s-%d Waiting for rdsv3_rdma_exit",
969 946 __func__, __LINE__);
970 -#endif
971 947 delay(drv_usectohz(1000));
972 948 }
973 949
974 950 rdsv3_conn_exit();
975 951 rdsv3_cong_exit();
976 952 rdsv3_sysctl_exit();
977 953 rdsv3_threads_exit();
978 954 rdsv3_stats_exit();
979 955 rdsv3_info_deregister_func(RDS_INFO_SOCKETS, rdsv3_sock_info);
980 956 rdsv3_info_deregister_func(RDS_INFO_RECV_MESSAGES,
981 957 rdsv3_sock_inc_info);
982 958
983 959 if (rdsv3_rdma_dwp) {
984 960 kmem_free(rdsv3_rdma_dwp, sizeof (rdsv3_delayed_work_t));
985 961 rdsv3_rdma_dwp = NULL;
986 962 }
987 963
988 964 RDSV3_DPRINTF4("rdsv3_exit", "Return");
989 965 }
990 966
991 967 /*ARGSUSED*/
992 968 int
993 969 rdsv3_init()
994 970 {
995 971 int ret;
996 972
997 973 RDSV3_DPRINTF4("rdsv3_init", "Enter");
998 974
999 975 rdsv3_cong_init();
1000 976
1001 977 ret = rdsv3_conn_init();
1002 978 if (ret)
1003 979 goto out;
1004 980 ret = rdsv3_threads_init();
1005 981 if (ret)
1006 982 goto out_conn;
1007 983 ret = rdsv3_sysctl_init();
1008 984 if (ret)
1009 985 goto out_threads;
1010 986 ret = rdsv3_stats_init();
1011 987 if (ret)
1012 988 goto out_sysctl;
1013 989
1014 990 rdsv3_info_register_func(RDS_INFO_SOCKETS, rdsv3_sock_info);
1015 991 rdsv3_info_register_func(RDS_INFO_RECV_MESSAGES, rdsv3_sock_inc_info);
1016 992
1017 993 /* rdsv3_rdma_init need to be called with a little delay */
1018 994 rdsv3_rdma_dwp = kmem_zalloc(sizeof (rdsv3_delayed_work_t), KM_SLEEP);
1019 995 RDSV3_INIT_DELAYED_WORK(rdsv3_rdma_dwp, rdsv3_rdma_init_worker);
1020 996 rdsv3_queue_delayed_work(rdsv3_wq, rdsv3_rdma_dwp,
1021 997 rdsv3_rdma_init_delay);
1022 998
1023 999 RDSV3_DPRINTF4("rdsv3_init", "Return");
1024 1000
1025 1001 goto out;
1026 1002
1027 1003 out_stats:
1028 1004 rdsv3_stats_exit();
1029 1005 out_sysctl:
1030 1006 rdsv3_sysctl_exit();
1031 1007 out_threads:
1032 1008 rdsv3_threads_exit();
1033 1009 out_conn:
1034 1010 rdsv3_conn_exit();
1035 1011 rdsv3_cong_exit();
1036 1012 out:
1037 1013 return (ret);
1038 1014 }
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX