1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/sysmacros.h> 30 #include <sys/debug.h> 31 #include <sys/cmn_err.h> 32 #include <sys/vfs.h> 33 #include <sys/policy.h> 34 #include <sys/modctl.h> 35 36 #include <sys/sunddi.h> 37 38 #include <sys/strsun.h> 39 #include <sys/stropts.h> 40 #include <sys/strsubr.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/uio.h> 44 45 #include <inet/ipclassifier.h> 46 #include <fs/sockfs/sockcommon.h> 47 #include <fs/sockfs/sockfilter_impl.h> 48 #include <fs/sockfs/nl7c.h> 49 #include <fs/sockfs/socktpi.h> 50 #include <fs/sockfs/sodirect.h> 51 #include <inet/ip.h> 52 53 extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print; 54 55 /* 56 * Common socket access functions. 57 * 58 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()), 59 * the socket_xxx() function should be used. 60 */ 61 62 /* 63 * Try to create a new sonode of the requested <family, type, protocol>. 64 */ 65 /* ARGSUSED */ 66 struct sonode * 67 socket_create(int family, int type, int protocol, char *devpath, char *mod, 68 int flags, int version, struct cred *cr, int *errorp) 69 { 70 struct sonode *so; 71 struct sockparams *sp = NULL; 72 int saved_error; 73 74 /* 75 * Look for a sockparams entry that match the given criteria. 76 * solookup() returns with the entry held. 77 */ 78 *errorp = solookup(family, type, protocol, &sp); 79 saved_error = *errorp; 80 if (sp == NULL) { 81 int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 82 /* 83 * There is no matching sockparams entry. An ephemeral entry is 84 * created if the caller specifies a device or a socket module. 85 */ 86 if (devpath != NULL) { 87 saved_error = 0; 88 sp = sockparams_hold_ephemeral_bydev(family, type, 89 protocol, devpath, kmflags, errorp); 90 } else if (mod != NULL) { 91 saved_error = 0; 92 sp = sockparams_hold_ephemeral_bymod(family, type, 93 protocol, mod, kmflags, errorp); 94 } else { 95 *errorp = solookup(family, type, 0, &sp); 96 } 97 98 if (sp == NULL) { 99 if (saved_error && (*errorp == EPROTONOSUPPORT || 100 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 101 *errorp = saved_error; 102 return (NULL); 103 } 104 } 105 106 ASSERT(sp->sp_smod_info != NULL); 107 ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP); 108 sp->sp_stats.sps_ncreate.value.ui64++; 109 so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, 110 protocol, version, flags, errorp, cr); 111 if (so == NULL) { 112 SOCKPARAMS_DEC_REF(sp); 113 } else { 114 if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) { 115 /* Cannot fail, only bumps so_count */ 116 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 117 } else { 118 if (saved_error && (*errorp == EPROTONOSUPPORT || 119 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 120 *errorp = saved_error; 121 socket_destroy(so); 122 so = NULL; 123 } 124 } 125 return (so); 126 } 127 128 struct sonode * 129 socket_newconn(struct sonode *parent, sock_lower_handle_t lh, 130 sock_downcalls_t *dc, int flags, int *errorp) 131 { 132 struct sonode *so; 133 struct sockparams *sp; 134 struct cred *cr; 135 136 if ((cr = CRED()) == NULL) 137 cr = kcred; 138 139 sp = parent->so_sockparams; 140 ASSERT(sp != NULL); 141 142 sp->sp_stats.sps_ncreate.value.ui64++; 143 so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family, 144 parent->so_type, parent->so_protocol, parent->so_version, flags, 145 errorp, cr); 146 if (so != NULL) { 147 SOCKPARAMS_INC_REF(sp); 148 149 so->so_proto_handle = lh; 150 so->so_downcalls = dc; 151 /* 152 * This function may be called in interrupt context, and CRED() 153 * will be NULL. In this case, pass in kcred. 154 */ 155 if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) { 156 /* Cannot fail, only bumps so_count */ 157 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 158 } else { 159 socket_destroy(so); 160 so = NULL; 161 } 162 } 163 164 return (so); 165 } 166 167 /* 168 * Bind local endpoint. 169 */ 170 int 171 socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 172 int flags, cred_t *cr) 173 { 174 return (SOP_BIND(so, name, namelen, flags, cr)); 175 } 176 177 /* 178 * Turn socket into a listen socket. 179 */ 180 int 181 socket_listen(struct sonode *so, int backlog, cred_t *cr) 182 { 183 if (backlog < 0) { 184 backlog = 0; 185 } 186 187 /* 188 * Use the same qlimit as in BSD. BSD checks the qlimit 189 * before queuing the next connection implying that a 190 * listen(sock, 0) allows one connection to be queued. 191 * BSD also uses 1.5 times the requested backlog. 192 * 193 * XNS Issue 4 required a strict interpretation of the backlog. 194 * This has been waived subsequently for Issue 4 and the change 195 * incorporated in XNS Issue 5. So we aren't required to do 196 * anything special for XPG apps. 197 */ 198 if (backlog >= (INT_MAX - 1) / 3) 199 backlog = INT_MAX; 200 else 201 backlog = backlog * 3 / 2 + 1; 202 203 return (SOP_LISTEN(so, backlog, cr)); 204 } 205 206 /* 207 * Accept incoming connection. 208 */ 209 int 210 socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop) 211 { 212 return (SOP_ACCEPT(lso, fflag, cr, nsop)); 213 } 214 215 /* 216 * Active open. 217 */ 218 int 219 socket_connect(struct sonode *so, struct sockaddr *name, 220 socklen_t namelen, int fflag, int flags, cred_t *cr) 221 { 222 int error; 223 224 /* 225 * Handle a connect to a name parameter of type AF_UNSPEC like a 226 * connect to a null address. This is the portable method to 227 * unconnect a socket. 228 */ 229 if ((namelen >= sizeof (sa_family_t)) && 230 (name->sa_family == AF_UNSPEC)) { 231 name = NULL; 232 namelen = 0; 233 } 234 235 error = SOP_CONNECT(so, name, namelen, fflag, flags, cr); 236 237 if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) { 238 /* 239 * X/Open specification contains a requirement that 240 * ENETUNREACH be returned but does not require 241 * EHOSTUNREACH. In order to keep the test suite 242 * happy we mess with the errno here. 243 */ 244 error = ENETUNREACH; 245 } 246 247 return (error); 248 } 249 250 /* 251 * Get address of remote node. 252 */ 253 int 254 socket_getpeername(struct sonode *so, struct sockaddr *addr, 255 socklen_t *addrlen, boolean_t accept, cred_t *cr) 256 { 257 ASSERT(*addrlen > 0); 258 return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 259 260 } 261 262 /* 263 * Get local address. 264 */ 265 int 266 socket_getsockname(struct sonode *so, struct sockaddr *addr, 267 socklen_t *addrlen, cred_t *cr) 268 { 269 return (SOP_GETSOCKNAME(so, addr, addrlen, cr)); 270 271 } 272 273 /* 274 * Called from shutdown(). 275 */ 276 int 277 socket_shutdown(struct sonode *so, int how, cred_t *cr) 278 { 279 return (SOP_SHUTDOWN(so, how, cr)); 280 } 281 282 /* 283 * Get socket options. 284 */ 285 /*ARGSUSED*/ 286 int 287 socket_getsockopt(struct sonode *so, int level, int option_name, 288 void *optval, socklen_t *optlenp, int flags, cred_t *cr) 289 { 290 return (SOP_GETSOCKOPT(so, level, option_name, optval, 291 optlenp, flags, cr)); 292 } 293 294 /* 295 * Set socket options 296 */ 297 int 298 socket_setsockopt(struct sonode *so, int level, int option_name, 299 const void *optval, t_uscalar_t optlen, cred_t *cr) 300 { 301 int val = 1; 302 /* Caller allocates aligned optval, or passes null */ 303 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 304 /* If optval is null optlen is 0, and vice-versa */ 305 ASSERT(optval != NULL || optlen == 0); 306 ASSERT(optlen != 0 || optval == NULL); 307 308 if (optval == NULL && optlen == 0) 309 optval = &val; 310 311 return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 312 } 313 314 int 315 socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 316 cred_t *cr) 317 { 318 int error = 0; 319 ssize_t orig_resid = uiop->uio_resid; 320 321 /* 322 * Do not bypass the cache if we are doing a local (AF_UNIX) write. 323 */ 324 if (so->so_family == AF_UNIX) 325 uiop->uio_extflg |= UIO_COPY_CACHED; 326 else 327 uiop->uio_extflg &= ~UIO_COPY_CACHED; 328 329 error = SOP_SENDMSG(so, msg, uiop, cr); 330 switch (error) { 331 default: 332 break; 333 case EINTR: 334 case ENOMEM: 335 /* EAGAIN is EWOULDBLOCK */ 336 case EWOULDBLOCK: 337 /* We did a partial send */ 338 if (uiop->uio_resid != orig_resid) 339 error = 0; 340 break; 341 case EPIPE: 342 if ((so->so_mode & SM_KERNEL) == 0) 343 tsignal(curthread, SIGPIPE); 344 break; 345 } 346 347 return (error); 348 } 349 350 int 351 socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 352 struct cred *cr, mblk_t **mpp) 353 { 354 int error = 0; 355 356 error = SOP_SENDMBLK(so, msg, fflag, cr, mpp); 357 if (error == EPIPE) { 358 tsignal(curthread, SIGPIPE); 359 } 360 return (error); 361 } 362 363 int 364 socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 365 cred_t *cr) 366 { 367 int error; 368 ssize_t orig_resid = uiop->uio_resid; 369 370 /* 371 * Do not bypass the cache when reading data, as the application 372 * is likely to access the data shortly. 373 */ 374 uiop->uio_extflg |= UIO_COPY_CACHED; 375 376 error = SOP_RECVMSG(so, msg, uiop, cr); 377 378 switch (error) { 379 case EINTR: 380 /* EAGAIN is EWOULDBLOCK */ 381 case EWOULDBLOCK: 382 /* We did a partial read */ 383 if (uiop->uio_resid != orig_resid) 384 error = 0; 385 break; 386 default: 387 break; 388 } 389 return (error); 390 } 391 392 int 393 socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 394 struct cred *cr, int32_t *rvalp) 395 { 396 return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 397 } 398 399 int 400 socket_poll(struct sonode *so, short events, int anyyet, short *reventsp, 401 struct pollhead **phpp) 402 { 403 return (SOP_POLL(so, events, anyyet, reventsp, phpp)); 404 } 405 406 int 407 socket_close(struct sonode *so, int flag, struct cred *cr) 408 { 409 return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL)); 410 } 411 412 int 413 socket_close_internal(struct sonode *so, int flag, cred_t *cr) 414 { 415 ASSERT(so->so_count == 0); 416 417 return (SOP_CLOSE(so, flag, cr)); 418 } 419 420 void 421 socket_destroy(struct sonode *so) 422 { 423 vn_invalid(SOTOV(so)); 424 VN_RELE(SOTOV(so)); 425 } 426 427 /* ARGSUSED */ 428 void 429 socket_destroy_internal(struct sonode *so, cred_t *cr) 430 { 431 struct sockparams *sp = so->so_sockparams; 432 ASSERT(so->so_count == 0 && sp != NULL); 433 434 sp->sp_smod_info->smod_sock_destroy_func(so); 435 436 SOCKPARAMS_DEC_REF(sp); 437 } 438 439 /* 440 * TODO Once the common vnode ops is available, then the vnops argument 441 * should be removed. 442 */ 443 /*ARGSUSED*/ 444 int 445 sonode_constructor(void *buf, void *cdrarg, int kmflags) 446 { 447 struct sonode *so = buf; 448 struct vnode *vp; 449 450 vp = so->so_vnode = vn_alloc(kmflags); 451 if (vp == NULL) { 452 return (-1); 453 } 454 vp->v_data = so; 455 vn_setops(vp, socket_vnodeops); 456 457 so->so_priv = NULL; 458 so->so_oobmsg = NULL; 459 460 so->so_proto_handle = NULL; 461 462 so->so_peercred = NULL; 463 464 so->so_rcv_queued = 0; 465 so->so_rcv_q_head = NULL; 466 so->so_rcv_q_last_head = NULL; 467 so->so_rcv_head = NULL; 468 so->so_rcv_last_head = NULL; 469 so->so_rcv_wanted = 0; 470 so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER; 471 so->so_rcv_timer_tid = 0; 472 so->so_rcv_thresh = 0; 473 474 list_create(&so->so_acceptq_list, sizeof (struct sonode), 475 offsetof(struct sonode, so_acceptq_node)); 476 list_create(&so->so_acceptq_defer, sizeof (struct sonode), 477 offsetof(struct sonode, so_acceptq_node)); 478 list_link_init(&so->so_acceptq_node); 479 so->so_acceptq_len = 0; 480 so->so_backlog = 0; 481 so->so_listener = NULL; 482 483 so->so_snd_qfull = B_FALSE; 484 485 so->so_filter_active = 0; 486 so->so_filter_tx = 0; 487 so->so_filter_defertime = 0; 488 so->so_filter_top = NULL; 489 so->so_filter_bottom = NULL; 490 491 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 492 mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL); 493 rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL); 494 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 495 cv_init(&so->so_single_cv, NULL, CV_DEFAULT, NULL); 496 cv_init(&so->so_read_cv, NULL, CV_DEFAULT, NULL); 497 498 cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL); 499 cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL); 500 cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL); 501 cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL); 502 cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL); 503 504 return (0); 505 } 506 507 /*ARGSUSED*/ 508 void 509 sonode_destructor(void *buf, void *cdrarg) 510 { 511 struct sonode *so = buf; 512 struct vnode *vp = SOTOV(so); 513 514 ASSERT(so->so_priv == NULL); 515 ASSERT(so->so_peercred == NULL); 516 517 ASSERT(so->so_oobmsg == NULL); 518 519 ASSERT(so->so_rcv_q_head == NULL); 520 521 list_destroy(&so->so_acceptq_list); 522 list_destroy(&so->so_acceptq_defer); 523 ASSERT(!list_link_active(&so->so_acceptq_node)); 524 ASSERT(so->so_listener == NULL); 525 526 ASSERT(so->so_filter_active == 0); 527 ASSERT(so->so_filter_tx == 0); 528 ASSERT(so->so_filter_top == NULL); 529 ASSERT(so->so_filter_bottom == NULL); 530 531 ASSERT(vp->v_data == so); 532 ASSERT(vn_matchops(vp, socket_vnodeops)); 533 534 vn_free(vp); 535 536 mutex_destroy(&so->so_lock); 537 mutex_destroy(&so->so_acceptq_lock); 538 rw_destroy(&so->so_fallback_rwlock); 539 540 cv_destroy(&so->so_state_cv); 541 cv_destroy(&so->so_single_cv); 542 cv_destroy(&so->so_read_cv); 543 cv_destroy(&so->so_acceptq_cv); 544 cv_destroy(&so->so_snd_cv); 545 cv_destroy(&so->so_rcv_cv); 546 cv_destroy(&so->so_closing_cv); 547 } 548 549 void 550 sonode_init(struct sonode *so, struct sockparams *sp, int family, 551 int type, int protocol, sonodeops_t *sops) 552 { 553 vnode_t *vp; 554 555 vp = SOTOV(so); 556 557 so->so_flag = 0; 558 559 so->so_state = 0; 560 so->so_mode = 0; 561 562 so->so_count = 0; 563 564 so->so_family = family; 565 so->so_type = type; 566 so->so_protocol = protocol; 567 568 SOCK_CONNID_INIT(so->so_proto_connid); 569 570 so->so_options = 0; 571 so->so_linger.l_onoff = 0; 572 so->so_linger.l_linger = 0; 573 so->so_sndbuf = 0; 574 so->so_error = 0; 575 so->so_rcvtimeo = 0; 576 so->so_sndtimeo = 0; 577 so->so_xpg_rcvbuf = 0; 578 579 ASSERT(so->so_oobmsg == NULL); 580 so->so_oobmark = 0; 581 so->so_pgrp = 0; 582 583 ASSERT(so->so_peercred == NULL); 584 585 so->so_zoneid = getzoneid(); 586 587 so->so_sockparams = sp; 588 589 so->so_ops = sops; 590 591 so->so_not_str = (sops != &sotpi_sonodeops); 592 593 so->so_proto_handle = NULL; 594 595 so->so_downcalls = NULL; 596 597 so->so_copyflag = 0; 598 599 vn_reinit(vp); 600 vp->v_vfsp = rootvfs; 601 vp->v_type = VSOCK; 602 vp->v_rdev = sockdev; 603 604 so->so_snd_qfull = B_FALSE; 605 so->so_minpsz = 0; 606 607 so->so_rcv_wakeup = B_FALSE; 608 so->so_snd_wakeup = B_FALSE; 609 so->so_flowctrld = B_FALSE; 610 611 so->so_pollev = 0; 612 bzero(&so->so_poll_list, sizeof (so->so_poll_list)); 613 bzero(&so->so_proto_props, sizeof (struct sock_proto_props)); 614 615 bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t)); 616 so->so_ksock_cb_arg = NULL; 617 618 so->so_max_addr_len = sizeof (struct sockaddr_storage); 619 620 so->so_direct = NULL; 621 622 vn_exists(vp); 623 } 624 625 void 626 sonode_fini(struct sonode *so) 627 { 628 vnode_t *vp; 629 630 ASSERT(so->so_count == 0); 631 632 if (so->so_rcv_timer_tid) { 633 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 634 (void) untimeout(so->so_rcv_timer_tid); 635 so->so_rcv_timer_tid = 0; 636 } 637 638 if (so->so_poll_list.ph_list != NULL) { 639 pollwakeup(&so->so_poll_list, POLLERR); 640 pollhead_clean(&so->so_poll_list); 641 } 642 643 if (so->so_direct != NULL) 644 sod_sock_fini(so); 645 646 vp = SOTOV(so); 647 vn_invalid(vp); 648 649 if (so->so_peercred != NULL) { 650 crfree(so->so_peercred); 651 so->so_peercred = NULL; 652 } 653 /* Detach and destroy filters */ 654 if (so->so_filter_top != NULL) 655 sof_sonode_cleanup(so); 656 657 ASSERT(list_is_empty(&so->so_acceptq_list)); 658 ASSERT(list_is_empty(&so->so_acceptq_defer)); 659 ASSERT(!list_link_active(&so->so_acceptq_node)); 660 661 ASSERT(so->so_rcv_queued == 0); 662 ASSERT(so->so_rcv_q_head == NULL); 663 ASSERT(so->so_rcv_q_last_head == NULL); 664 ASSERT(so->so_rcv_head == NULL); 665 ASSERT(so->so_rcv_last_head == NULL); 666 }