1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/buf.h> 31 #include <sys/conf.h> 32 #include <sys/cred.h> 33 #include <sys/kmem.h> 34 #include <sys/sysmacros.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/vnode.h> 38 #include <sys/debug.h> 39 #include <sys/errno.h> 40 #include <sys/time.h> 41 #include <sys/file.h> 42 #include <sys/open.h> 43 #include <sys/user.h> 44 #include <sys/termios.h> 45 #include <sys/stream.h> 46 #include <sys/strsubr.h> 47 #include <sys/strsun.h> 48 #include <sys/esunddi.h> 49 #include <sys/flock.h> 50 #include <sys/modctl.h> 51 #include <sys/cmn_err.h> 52 #include <sys/mkdev.h> 53 #include <sys/pathname.h> 54 #include <sys/ddi.h> 55 #include <sys/stat.h> 56 #include <sys/fs/snode.h> 57 #include <sys/fs/dv_node.h> 58 #include <sys/zone.h> 59 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <netinet/in.h> 63 #include <sys/un.h> 64 #include <sys/ucred.h> 65 66 #include <sys/tiuser.h> 67 #define _SUN_TPI_VERSION 2 68 #include <sys/tihdr.h> 69 70 #include <c2/audit.h> 71 72 #include <fs/sockfs/nl7c.h> 73 #include <fs/sockfs/sockcommon.h> 74 #include <fs/sockfs/sockfilter_impl.h> 75 #include <fs/sockfs/socktpi.h> 76 #include <fs/sockfs/socktpi_impl.h> 77 #include <fs/sockfs/sodirect.h> 78 79 /* 80 * Macros that operate on struct cmsghdr. 81 * The CMSG_VALID macro does not assume that the last option buffer is padded. 82 */ 83 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 84 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 85 #define CMSG_VALID(cmsg, start, end) \ 86 (ISALIGNED_cmsghdr(cmsg) && \ 87 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 88 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 89 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 90 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 91 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 92 93 dev_t sockdev; /* For fsid in getattr */ 94 int sockfs_defer_nl7c_init = 0; 95 96 struct socklist socklist; 97 98 struct kmem_cache *socket_cache; 99 100 /* 101 * sockconf_lock protects the socket configuration (socket types and 102 * socket filters) which is changed via the sockconfig system call. 103 */ 104 krwlock_t sockconf_lock; 105 106 static int sockfs_update(kstat_t *, int); 107 static int sockfs_snapshot(kstat_t *, void *, int); 108 extern smod_info_t *sotpi_smod_create(void); 109 110 extern void sendfile_init(); 111 112 extern void nl7c_init(void); 113 114 extern int modrootloaded; 115 116 /* 117 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 118 * Returns with the vnode held. 119 */ 120 int 121 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 122 { 123 struct snode *csp; 124 vnode_t *vp, *dvp; 125 major_t maj; 126 int error; 127 128 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 129 130 /* 131 * Lookup the underlying filesystem vnode. 132 */ 133 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 134 if (error) 135 return (error); 136 137 /* Check that it is the correct vnode */ 138 if (vp->v_type != VCHR) { 139 VN_RELE(vp); 140 return (ENOTSOCK); 141 } 142 143 /* 144 * If devpath went through devfs, the device should already 145 * be configured. If devpath is a mknod file, however, we 146 * need to make sure the device is properly configured. 147 * To do this, we do something similar to spec_open() 148 * except that we resolve to the minor/leaf level since 149 * we need to return a vnode. 150 */ 151 csp = VTOS(VTOS(vp)->s_commonvp); 152 if (!(csp->s_flag & SDIPSET)) { 153 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 154 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 155 if (error == 0) 156 error = devfs_lookupname(pathname, NULLVPP, &dvp); 157 VN_RELE(vp); 158 kmem_free(pathname, MAXPATHLEN); 159 if (error != 0) 160 return (ENXIO); 161 vp = dvp; /* use the devfs vp */ 162 } 163 164 /* device is configured at this point */ 165 maj = getmajor(vp->v_rdev); 166 if (!STREAMSTAB(maj)) { 167 VN_RELE(vp); 168 return (ENOSTR); 169 } 170 171 *vpp = vp; 172 return (0); 173 } 174 175 /* 176 * Update the accessed, updated, or changed times in an sonode 177 * with the current time. 178 * 179 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 180 * attributes in a fstat call. (They return the current time and 0 for 181 * all timestamps, respectively.) We maintain the current timestamps 182 * here primarily so that should sockmod be popped the resulting 183 * file descriptor will behave like a stream w.r.t. the timestamps. 184 */ 185 void 186 so_update_attrs(struct sonode *so, int flag) 187 { 188 time_t now = gethrestime_sec(); 189 190 if (SOCK_IS_NONSTR(so)) 191 return; 192 193 mutex_enter(&so->so_lock); 194 so->so_flag |= flag; 195 if (flag & SOACC) 196 SOTOTPI(so)->sti_atime = now; 197 if (flag & SOMOD) 198 SOTOTPI(so)->sti_mtime = now; 199 mutex_exit(&so->so_lock); 200 } 201 202 extern so_create_func_t sock_comm_create_function; 203 extern so_destroy_func_t sock_comm_destroy_function; 204 /* 205 * Init function called when sockfs is loaded. 206 */ 207 int 208 sockinit(int fstype, char *name) 209 { 210 static const fs_operation_def_t sock_vfsops_template[] = { 211 NULL, NULL 212 }; 213 int error; 214 major_t dev; 215 char *err_str; 216 217 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 218 if (error != 0) { 219 zcmn_err(GLOBAL_ZONEID, CE_WARN, 220 "sockinit: bad vfs ops template"); 221 return (error); 222 } 223 224 error = vn_make_ops(name, socket_vnodeops_template, 225 &socket_vnodeops); 226 if (error != 0) { 227 err_str = "sockinit: bad socket vnode ops template"; 228 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 229 socket_vnodeops = NULL; 230 goto failure; 231 } 232 233 socket_cache = kmem_cache_create("socket_cache", 234 sizeof (struct sonode), 0, sonode_constructor, 235 sonode_destructor, NULL, NULL, NULL, 0); 236 237 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL); 238 239 error = socktpi_init(); 240 if (error != 0) { 241 err_str = NULL; 242 goto failure; 243 } 244 245 error = sod_init(); 246 if (error != 0) { 247 err_str = NULL; 248 goto failure; 249 } 250 251 /* 252 * Set up the default create and destroy functions 253 */ 254 sock_comm_create_function = socket_sonode_create; 255 sock_comm_destroy_function = socket_sonode_destroy; 256 257 /* 258 * Build initial list mapping socket parameters to vnode. 259 */ 260 smod_init(); 261 smod_add(sotpi_smod_create()); 262 263 sockparams_init(); 264 265 /* 266 * If sockets are needed before init runs /sbin/soconfig 267 * it is possible to preload the sockparams list here using 268 * calls like: 269 * sockconfig(1,2,3, "/dev/tcp", 0); 270 */ 271 272 /* 273 * Create a unique dev_t for use in so_fsid. 274 */ 275 276 if ((dev = getudev()) == (major_t)-1) 277 dev = 0; 278 sockdev = makedevice(dev, 0); 279 280 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 281 sendfile_init(); 282 if (!modrootloaded) { 283 sockfs_defer_nl7c_init = 1; 284 } else { 285 nl7c_init(); 286 } 287 288 /* Initialize socket filters */ 289 sof_init(); 290 291 return (0); 292 293 failure: 294 (void) vfs_freevfsops_by_type(fstype); 295 if (socket_vnodeops != NULL) 296 vn_freevnodeops(socket_vnodeops); 297 if (err_str != NULL) 298 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 299 return (error); 300 } 301 302 /* 303 * Caller must hold the mutex. Used to set SOLOCKED. 304 */ 305 void 306 so_lock_single(struct sonode *so) 307 { 308 ASSERT(MUTEX_HELD(&so->so_lock)); 309 310 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 311 cv_wait_stop(&so->so_single_cv, &so->so_lock, 312 SO_LOCK_WAKEUP_TIME); 313 } 314 so->so_flag |= SOLOCKED; 315 } 316 317 /* 318 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 319 * Used to clear SOLOCKED or SOASYNC_UNBIND. 320 */ 321 void 322 so_unlock_single(struct sonode *so, int flag) 323 { 324 ASSERT(MUTEX_HELD(&so->so_lock)); 325 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 326 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 327 ASSERT(so->so_flag & flag); 328 /* 329 * Process the T_DISCON_IND on sti_discon_ind_mp. 330 * 331 * Call to so_drain_discon_ind will result in so_lock 332 * being dropped and re-acquired later. 333 */ 334 if (!SOCK_IS_NONSTR(so)) { 335 sotpi_info_t *sti = SOTOTPI(so); 336 337 if (sti->sti_discon_ind_mp != NULL) 338 so_drain_discon_ind(so); 339 } 340 341 cv_signal(&so->so_single_cv); 342 so->so_flag &= ~flag; 343 } 344 345 /* 346 * Caller must hold the mutex. Used to set SOREADLOCKED. 347 * If the caller wants nonblocking behavior it should set fmode. 348 */ 349 int 350 so_lock_read(struct sonode *so, int fmode) 351 { 352 ASSERT(MUTEX_HELD(&so->so_lock)); 353 354 while (so->so_flag & SOREADLOCKED) { 355 if (fmode & (FNDELAY|FNONBLOCK)) 356 return (EWOULDBLOCK); 357 cv_wait_stop(&so->so_read_cv, &so->so_lock, 358 SO_LOCK_WAKEUP_TIME); 359 } 360 so->so_flag |= SOREADLOCKED; 361 return (0); 362 } 363 364 /* 365 * Like so_lock_read above but allows signals. 366 */ 367 int 368 so_lock_read_intr(struct sonode *so, int fmode) 369 { 370 ASSERT(MUTEX_HELD(&so->so_lock)); 371 372 while (so->so_flag & SOREADLOCKED) { 373 if (fmode & (FNDELAY|FNONBLOCK)) 374 return (EWOULDBLOCK); 375 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock)) 376 return (EINTR); 377 } 378 so->so_flag |= SOREADLOCKED; 379 return (0); 380 } 381 382 /* 383 * Caller must hold the mutex. Used to clear SOREADLOCKED, 384 * set in so_lock_read() or so_lock_read_intr(). 385 */ 386 void 387 so_unlock_read(struct sonode *so) 388 { 389 ASSERT(MUTEX_HELD(&so->so_lock)); 390 ASSERT(so->so_flag & SOREADLOCKED); 391 392 cv_signal(&so->so_read_cv); 393 so->so_flag &= ~SOREADLOCKED; 394 } 395 396 /* 397 * Verify that the specified offset falls within the mblk and 398 * that the resulting pointer is aligned. 399 * Returns NULL if not. 400 */ 401 void * 402 sogetoff(mblk_t *mp, t_uscalar_t offset, 403 t_uscalar_t length, uint_t align_size) 404 { 405 uintptr_t ptr1, ptr2; 406 407 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 408 ptr1 = (uintptr_t)mp->b_rptr + offset; 409 ptr2 = (uintptr_t)ptr1 + length; 410 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 411 eprintline(0); 412 return (NULL); 413 } 414 if ((ptr1 & (align_size - 1)) != 0) { 415 eprintline(0); 416 return (NULL); 417 } 418 return ((void *)ptr1); 419 } 420 421 /* 422 * Return the AF_UNIX underlying filesystem vnode matching a given name. 423 * Makes sure the sending and the destination sonodes are compatible. 424 * The vnode is returned held. 425 * 426 * The underlying filesystem VSOCK vnode has a v_stream pointer that 427 * references the actual stream head (hence indirectly the actual sonode). 428 */ 429 static int 430 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 431 vnode_t **vpp) 432 { 433 vnode_t *vp; /* Underlying filesystem vnode */ 434 vnode_t *rvp; /* real vnode */ 435 vnode_t *svp; /* sockfs vnode */ 436 struct sonode *so2; 437 int error; 438 439 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so, 440 soun->sun_path)); 441 442 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 443 if (error) { 444 eprintsoline(so, error); 445 return (error); 446 } 447 448 /* 449 * Traverse lofs mounts get the real vnode 450 */ 451 if (VOP_REALVP(vp, &rvp, NULL) == 0) { 452 VN_HOLD(rvp); /* hold the real vnode */ 453 VN_RELE(vp); /* release hold from lookup */ 454 vp = rvp; 455 } 456 457 if (vp->v_type != VSOCK) { 458 error = ENOTSOCK; 459 eprintsoline(so, error); 460 goto done2; 461 } 462 463 if (checkaccess) { 464 /* 465 * Check that we have permissions to access the destination 466 * vnode. This check is not done in BSD but it is required 467 * by X/Open. 468 */ 469 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) { 470 eprintsoline(so, error); 471 goto done2; 472 } 473 } 474 475 /* 476 * Check if the remote socket has been closed. 477 * 478 * Synchronize with vn_rele_stream by holding v_lock while traversing 479 * v_stream->sd_vnode. 480 */ 481 mutex_enter(&vp->v_lock); 482 if (vp->v_stream == NULL) { 483 mutex_exit(&vp->v_lock); 484 if (so->so_type == SOCK_DGRAM) 485 error = EDESTADDRREQ; 486 else 487 error = ECONNREFUSED; 488 489 eprintsoline(so, error); 490 goto done2; 491 } 492 ASSERT(vp->v_stream->sd_vnode); 493 svp = vp->v_stream->sd_vnode; 494 /* 495 * holding v_lock on underlying filesystem vnode and acquiring 496 * it on sockfs vnode. Assumes that no code ever attempts to 497 * acquire these locks in the reverse order. 498 */ 499 VN_HOLD(svp); 500 mutex_exit(&vp->v_lock); 501 502 if (svp->v_type != VSOCK) { 503 error = ENOTSOCK; 504 eprintsoline(so, error); 505 goto done; 506 } 507 508 so2 = VTOSO(svp); 509 510 if (so->so_type != so2->so_type) { 511 error = EPROTOTYPE; 512 eprintsoline(so, error); 513 goto done; 514 } 515 516 VN_RELE(svp); 517 *vpp = vp; 518 return (0); 519 520 done: 521 VN_RELE(svp); 522 done2: 523 VN_RELE(vp); 524 return (error); 525 } 526 527 /* 528 * Verify peer address for connect and sendto/sendmsg. 529 * Since sendto/sendmsg would not get synchronous errors from the transport 530 * provider we have to do these ugly checks in the socket layer to 531 * preserve compatibility with SunOS 4.X. 532 */ 533 int 534 so_addr_verify(struct sonode *so, const struct sockaddr *name, 535 socklen_t namelen) 536 { 537 int family; 538 539 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", 540 (void *)so, (void *)name, namelen)); 541 542 ASSERT(name != NULL); 543 544 family = so->so_family; 545 switch (family) { 546 case AF_INET: 547 if (name->sa_family != family) { 548 eprintsoline(so, EAFNOSUPPORT); 549 return (EAFNOSUPPORT); 550 } 551 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 552 eprintsoline(so, EINVAL); 553 return (EINVAL); 554 } 555 break; 556 case AF_INET6: { 557 #ifdef DEBUG 558 struct sockaddr_in6 *sin6; 559 #endif /* DEBUG */ 560 561 if (name->sa_family != family) { 562 eprintsoline(so, EAFNOSUPPORT); 563 return (EAFNOSUPPORT); 564 } 565 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 566 eprintsoline(so, EINVAL); 567 return (EINVAL); 568 } 569 #ifdef DEBUG 570 /* Verify that apps don't forget to clear sin6_scope_id etc */ 571 sin6 = (struct sockaddr_in6 *)name; 572 if (sin6->sin6_scope_id != 0 && 573 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 574 zcmn_err(getzoneid(), CE_WARN, 575 "connect/send* with uninitialized sin6_scope_id " 576 "(%d) on socket. Pid = %d\n", 577 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 578 } 579 #endif /* DEBUG */ 580 break; 581 } 582 case AF_UNIX: 583 if (SOTOTPI(so)->sti_faddr_noxlate) { 584 return (0); 585 } 586 if (namelen < (socklen_t)sizeof (short)) { 587 eprintsoline(so, ENOENT); 588 return (ENOENT); 589 } 590 if (name->sa_family != family) { 591 eprintsoline(so, EAFNOSUPPORT); 592 return (EAFNOSUPPORT); 593 } 594 /* MAXPATHLEN + soun_family + nul termination */ 595 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 596 eprintsoline(so, ENAMETOOLONG); 597 return (ENAMETOOLONG); 598 } 599 600 break; 601 602 default: 603 /* 604 * Default is don't do any length or sa_family check 605 * to allow non-sockaddr style addresses. 606 */ 607 break; 608 } 609 610 return (0); 611 } 612 613 614 /* 615 * Translate an AF_UNIX sockaddr_un to the transport internal name. 616 * Assumes caller has called so_addr_verify first. 617 */ 618 /*ARGSUSED*/ 619 int 620 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 621 socklen_t namelen, int checkaccess, 622 void **addrp, socklen_t *addrlenp) 623 { 624 int error; 625 struct sockaddr_un *soun; 626 vnode_t *vp; 627 void *addr; 628 socklen_t addrlen; 629 sotpi_info_t *sti = SOTOTPI(so); 630 631 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 632 (void *)so, (void *)name, namelen, checkaccess)); 633 634 ASSERT(name != NULL); 635 ASSERT(so->so_family == AF_UNIX); 636 ASSERT(!sti->sti_faddr_noxlate); 637 ASSERT(namelen >= (socklen_t)sizeof (short)); 638 ASSERT(name->sa_family == AF_UNIX); 639 soun = (struct sockaddr_un *)name; 640 /* 641 * Lookup vnode for the specified path name and verify that 642 * it is a socket. 643 */ 644 error = so_ux_lookup(so, soun, checkaccess, &vp); 645 if (error) { 646 eprintsoline(so, error); 647 return (error); 648 } 649 /* 650 * Use the address of the peer vnode as the address to send 651 * to. We release the peer vnode here. In case it has been 652 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the 653 * transport the message will get an error or be dropped. 654 */ 655 sti->sti_ux_faddr.soua_vp = vp; 656 sti->sti_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT; 657 addr = &sti->sti_ux_faddr; 658 addrlen = (socklen_t)sizeof (sti->sti_ux_faddr); 659 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 660 addrlen, (void *)vp)); 661 VN_RELE(vp); 662 *addrp = addr; 663 *addrlenp = (socklen_t)addrlen; 664 return (0); 665 } 666 667 /* 668 * Esballoc free function for messages that contain SO_FILEP option. 669 * Decrement the reference count on the file pointers using closef. 670 */ 671 void 672 fdbuf_free(struct fdbuf *fdbuf) 673 { 674 int i; 675 struct file *fp; 676 677 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 678 for (i = 0; i < fdbuf->fd_numfd; i++) { 679 /* 680 * We need pointer size alignment for fd_fds. On a LP64 681 * kernel, the required alignment is 8 bytes while 682 * the option headers and values are only 4 bytes 683 * aligned. So its safer to do a bcopy compared to 684 * assigning fdbuf->fd_fds[i] to fp. 685 */ 686 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 687 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp)); 688 (void) closef(fp); 689 } 690 if (fdbuf->fd_ebuf != NULL) 691 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 692 kmem_free(fdbuf, fdbuf->fd_size); 693 } 694 695 /* 696 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 697 * Waits if memory is not available. 698 */ 699 mblk_t * 700 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 701 { 702 uchar_t *buf; 703 mblk_t *mp; 704 705 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 706 buf = kmem_alloc(size, KM_SLEEP); 707 fdbuf->fd_ebuf = (caddr_t)buf; 708 fdbuf->fd_ebuflen = size; 709 fdbuf->fd_frtn.free_func = fdbuf_free; 710 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 711 712 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 713 mp->b_datap->db_type = M_PROTO; 714 return (mp); 715 } 716 717 /* 718 * Extract file descriptors from a fdbuf. 719 * Return list in rights/rightslen. 720 */ 721 /*ARGSUSED*/ 722 static int 723 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 724 { 725 int i, fd; 726 int *rp; 727 struct file *fp; 728 int numfd; 729 730 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 731 fdbuf->fd_numfd, rightslen)); 732 733 numfd = fdbuf->fd_numfd; 734 ASSERT(rightslen == numfd * (int)sizeof (int)); 735 736 /* 737 * Allocate a file descriptor and increment the f_count. 738 * The latter is needed since we always call fdbuf_free 739 * which performs a closef. 740 */ 741 rp = (int *)rights; 742 for (i = 0; i < numfd; i++) { 743 if ((fd = ufalloc(0)) == -1) 744 goto cleanup; 745 /* 746 * We need pointer size alignment for fd_fds. On a LP64 747 * kernel, the required alignment is 8 bytes while 748 * the option headers and values are only 4 bytes 749 * aligned. So its safer to do a bcopy compared to 750 * assigning fdbuf->fd_fds[i] to fp. 751 */ 752 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 753 mutex_enter(&fp->f_tlock); 754 fp->f_count++; 755 mutex_exit(&fp->f_tlock); 756 setf(fd, fp); 757 *rp++ = fd; 758 759 /* 760 * Add the current pid to the list associated with this 761 * descriptor. 762 */ 763 if (fp->f_vnode != NULL) 764 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID, 765 (intptr_t)curproc->p_pidp->pid_id, FKIOCTL, kcred, 766 NULL, NULL); 767 768 if (AU_AUDITING()) 769 audit_fdrecv(fd, fp); 770 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 771 i, fd, (void *)fp, fp->f_count)); 772 } 773 return (0); 774 775 cleanup: 776 /* 777 * Undo whatever partial work the loop above has done. 778 */ 779 { 780 int j; 781 782 rp = (int *)rights; 783 for (j = 0; j < i; j++) { 784 dprint(0, 785 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 786 (void) closeandsetf(*rp++, NULL); 787 } 788 } 789 790 return (EMFILE); 791 } 792 793 /* 794 * Insert file descriptors into an fdbuf. 795 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 796 * by calling fdbuf_free(). 797 */ 798 int 799 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 800 { 801 int numfd, i; 802 int *fds; 803 struct file *fp; 804 struct fdbuf *fdbuf; 805 int fdbufsize; 806 807 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 808 809 numfd = rightslen / (int)sizeof (int); 810 811 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 812 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 813 fdbuf->fd_size = fdbufsize; 814 fdbuf->fd_numfd = 0; 815 fdbuf->fd_ebuf = NULL; 816 fdbuf->fd_ebuflen = 0; 817 fds = (int *)rights; 818 for (i = 0; i < numfd; i++) { 819 if ((fp = getf(fds[i])) == NULL) { 820 fdbuf_free(fdbuf); 821 return (EBADF); 822 } 823 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 824 i, fds[i], (void *)fp, fp->f_count)); 825 mutex_enter(&fp->f_tlock); 826 fp->f_count++; 827 mutex_exit(&fp->f_tlock); 828 /* 829 * The maximum alignment for fdbuf (or any option header 830 * and its value) it 4 bytes. On a LP64 kernel, the alignment 831 * is not sufficient for pointers (fd_fds in this case). Since 832 * we just did a kmem_alloc (we get a double word alignment), 833 * we don't need to do anything on the send side (we loose 834 * the double word alignment because fdbuf goes after an 835 * option header (eg T_unitdata_req) which is only 4 byte 836 * aligned). We take care of this when we extract the file 837 * descriptor in fdbuf_extract or fdbuf_free. 838 */ 839 fdbuf->fd_fds[i] = fp; 840 fdbuf->fd_numfd++; 841 releasef(fds[i]); 842 if (AU_AUDITING()) 843 audit_fdsend(fds[i], fp, 0); 844 } 845 *fdbufp = fdbuf; 846 return (0); 847 } 848 849 static int 850 fdbuf_optlen(int rightslen) 851 { 852 int numfd; 853 854 numfd = rightslen / (int)sizeof (int); 855 856 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 857 } 858 859 static t_uscalar_t 860 fdbuf_cmsglen(int fdbuflen) 861 { 862 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 863 (int)sizeof (struct file *) * (int)sizeof (int)); 864 } 865 866 867 /* 868 * Return non-zero if the mblk and fdbuf are consistent. 869 */ 870 static int 871 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 872 { 873 if (fdbuflen >= FDBUF_HDRSIZE && 874 fdbuflen == fdbuf->fd_size) { 875 frtn_t *frp = mp->b_datap->db_frtnp; 876 /* 877 * Check that the SO_FILEP portion of the 878 * message has not been modified by 879 * the loopback transport. The sending sockfs generates 880 * a message that is esballoc'ed with the free function 881 * being fdbuf_free() and where free_arg contains the 882 * identical information as the SO_FILEP content. 883 * 884 * If any of these constraints are not satisfied we 885 * silently ignore the option. 886 */ 887 ASSERT(mp); 888 if (frp != NULL && 889 frp->free_func == fdbuf_free && 890 frp->free_arg != NULL && 891 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 892 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 893 (void *)fdbuf, fdbuflen)); 894 return (1); 895 } else { 896 zcmn_err(getzoneid(), CE_WARN, 897 "sockfs: mismatched fdbuf content (%p)", 898 (void *)mp); 899 return (0); 900 } 901 } else { 902 zcmn_err(getzoneid(), CE_WARN, 903 "sockfs: mismatched fdbuf len %d, %d\n", 904 fdbuflen, fdbuf->fd_size); 905 return (0); 906 } 907 } 908 909 /* 910 * When the file descriptors returned by sorecvmsg can not be passed 911 * to the application this routine will cleanup the references on 912 * the files. Start at startoff bytes into the buffer. 913 */ 914 static void 915 close_fds(void *fdbuf, int fdbuflen, int startoff) 916 { 917 int *fds = (int *)fdbuf; 918 int numfd = fdbuflen / (int)sizeof (int); 919 int i; 920 921 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 922 923 for (i = 0; i < numfd; i++) { 924 if (startoff < 0) 925 startoff = 0; 926 if (startoff < (int)sizeof (int)) { 927 /* 928 * This file descriptor is partially or fully after 929 * the offset 930 */ 931 dprint(0, 932 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 933 (void) closeandsetf(fds[i], NULL); 934 } 935 startoff -= (int)sizeof (int); 936 } 937 } 938 939 /* 940 * Close all file descriptors contained in the control part starting at 941 * the startoffset. 942 */ 943 void 944 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 945 int startoff) 946 { 947 struct cmsghdr *cmsg; 948 949 if (control == NULL) 950 return; 951 952 if (oldflg) { 953 close_fds(control, controllen, startoff); 954 return; 955 } 956 /* Scan control part for file descriptors. */ 957 for (cmsg = (struct cmsghdr *)control; 958 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 959 cmsg = CMSG_NEXT(cmsg)) { 960 if (cmsg->cmsg_level == SOL_SOCKET && 961 cmsg->cmsg_type == SCM_RIGHTS) { 962 close_fds(CMSG_CONTENT(cmsg), 963 (int)CMSG_CONTENTLEN(cmsg), 964 startoff - (int)sizeof (struct cmsghdr)); 965 } 966 startoff -= cmsg->cmsg_len; 967 } 968 } 969 970 /* 971 * Returns a pointer/length for the file descriptors contained 972 * in the control buffer. Returns with *fdlenp == -1 if there are no 973 * file descriptor options present. This is different than there being 974 * a zero-length file descriptor option. 975 * Fail if there are multiple SCM_RIGHT cmsgs. 976 */ 977 int 978 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 979 void **fdsp, int *fdlenp) 980 { 981 struct cmsghdr *cmsg; 982 void *fds; 983 int fdlen; 984 985 if (control == NULL) { 986 *fdsp = NULL; 987 *fdlenp = -1; 988 return (0); 989 } 990 991 if (oldflg) { 992 *fdsp = control; 993 if (controllen == 0) 994 *fdlenp = -1; 995 else 996 *fdlenp = controllen; 997 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 998 return (0); 999 } 1000 1001 fds = NULL; 1002 fdlen = 0; 1003 1004 for (cmsg = (struct cmsghdr *)control; 1005 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1006 cmsg = CMSG_NEXT(cmsg)) { 1007 if (cmsg->cmsg_level == SOL_SOCKET && 1008 cmsg->cmsg_type == SCM_RIGHTS) { 1009 if (fds != NULL) 1010 return (EINVAL); 1011 fds = CMSG_CONTENT(cmsg); 1012 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1013 dprint(1, ("so_getfdopt: new %lu\n", 1014 (size_t)CMSG_CONTENTLEN(cmsg))); 1015 } 1016 } 1017 if (fds == NULL) { 1018 dprint(1, ("so_getfdopt: NONE\n")); 1019 *fdlenp = -1; 1020 } else 1021 *fdlenp = fdlen; 1022 *fdsp = fds; 1023 return (0); 1024 } 1025 1026 /* 1027 * Return the length of the options including any file descriptor options. 1028 */ 1029 t_uscalar_t 1030 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1031 { 1032 struct cmsghdr *cmsg; 1033 t_uscalar_t optlen = 0; 1034 t_uscalar_t len; 1035 1036 if (control == NULL) 1037 return (0); 1038 1039 if (oldflg) 1040 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1041 fdbuf_optlen(controllen))); 1042 1043 for (cmsg = (struct cmsghdr *)control; 1044 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1045 cmsg = CMSG_NEXT(cmsg)) { 1046 if (cmsg->cmsg_level == SOL_SOCKET && 1047 cmsg->cmsg_type == SCM_RIGHTS) { 1048 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1049 } else { 1050 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1051 } 1052 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1053 sizeof (struct T_opthdr)); 1054 } 1055 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1056 controllen, oldflg, optlen)); 1057 return (optlen); 1058 } 1059 1060 /* 1061 * Copy options from control to the mblk. Skip any file descriptor options. 1062 */ 1063 void 1064 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1065 { 1066 struct T_opthdr toh; 1067 struct cmsghdr *cmsg; 1068 1069 if (control == NULL) 1070 return; 1071 1072 if (oldflg) { 1073 /* No real options - caller has handled file descriptors */ 1074 return; 1075 } 1076 for (cmsg = (struct cmsghdr *)control; 1077 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1078 cmsg = CMSG_NEXT(cmsg)) { 1079 /* 1080 * Note: The caller handles file descriptors prior 1081 * to calling this function. 1082 */ 1083 t_uscalar_t len; 1084 1085 if (cmsg->cmsg_level == SOL_SOCKET && 1086 cmsg->cmsg_type == SCM_RIGHTS) 1087 continue; 1088 1089 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1090 toh.level = cmsg->cmsg_level; 1091 toh.name = cmsg->cmsg_type; 1092 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1093 toh.status = 0; 1094 1095 soappendmsg(mp, &toh, sizeof (toh)); 1096 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1097 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1098 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1099 } 1100 } 1101 1102 /* 1103 * Return the length of the control message derived from the options. 1104 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1105 * When oldflg is set only include SO_FILEP. 1106 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1107 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1108 * also be checked for any possible impacts. 1109 */ 1110 t_uscalar_t 1111 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1112 { 1113 t_uscalar_t cmsglen = 0; 1114 struct T_opthdr *tohp; 1115 t_uscalar_t len; 1116 t_uscalar_t last_roundup = 0; 1117 1118 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1119 1120 for (tohp = (struct T_opthdr *)opt; 1121 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1122 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1123 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1124 tohp->level, tohp->name, tohp->len)); 1125 if (tohp->level == SOL_SOCKET && 1126 (tohp->name == SO_SRCADDR || 1127 tohp->name == SO_UNIX_CLOSE)) { 1128 continue; 1129 } 1130 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1131 struct fdbuf *fdbuf; 1132 int fdbuflen; 1133 1134 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1135 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1136 1137 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1138 continue; 1139 if (oldflg) { 1140 cmsglen += fdbuf_cmsglen(fdbuflen); 1141 continue; 1142 } 1143 len = fdbuf_cmsglen(fdbuflen); 1144 } else if (tohp->level == SOL_SOCKET && 1145 tohp->name == SCM_TIMESTAMP) { 1146 if (oldflg) 1147 continue; 1148 1149 if (get_udatamodel() == DATAMODEL_NATIVE) { 1150 len = sizeof (struct timeval); 1151 } else { 1152 len = sizeof (struct timeval32); 1153 } 1154 } else { 1155 if (oldflg) 1156 continue; 1157 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1158 } 1159 /* 1160 * Exclude roundup for last option to not set 1161 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1162 */ 1163 last_roundup = (t_uscalar_t) 1164 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1165 (len + (int)sizeof (struct cmsghdr))); 1166 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1167 last_roundup; 1168 } 1169 cmsglen -= last_roundup; 1170 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1171 optlen, oldflg, cmsglen)); 1172 return (cmsglen); 1173 } 1174 1175 /* 1176 * Copy options from options to the control. Convert SO_FILEP to 1177 * file descriptors. 1178 * Returns errno or zero. 1179 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1180 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1181 * also be checked for any possible impacts. 1182 */ 1183 int 1184 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1185 void *control, t_uscalar_t controllen) 1186 { 1187 struct T_opthdr *tohp; 1188 struct cmsghdr *cmsg; 1189 struct fdbuf *fdbuf; 1190 int fdbuflen; 1191 int error; 1192 #if defined(DEBUG) || defined(__lint) 1193 struct cmsghdr *cend = (struct cmsghdr *) 1194 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1195 #endif 1196 cmsg = (struct cmsghdr *)control; 1197 1198 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1199 1200 for (tohp = (struct T_opthdr *)opt; 1201 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1202 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1203 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1204 tohp->level, tohp->name, tohp->len)); 1205 1206 if (tohp->level == SOL_SOCKET && 1207 (tohp->name == SO_SRCADDR || 1208 tohp->name == SO_UNIX_CLOSE)) { 1209 continue; 1210 } 1211 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1212 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1213 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1214 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1215 1216 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1217 return (EPROTO); 1218 if (oldflg) { 1219 error = fdbuf_extract(fdbuf, control, 1220 (int)controllen); 1221 if (error != 0) 1222 return (error); 1223 continue; 1224 } else { 1225 int fdlen; 1226 1227 fdlen = (int)fdbuf_cmsglen( 1228 (int)_TPI_TOPT_DATALEN(tohp)); 1229 1230 cmsg->cmsg_level = tohp->level; 1231 cmsg->cmsg_type = SCM_RIGHTS; 1232 cmsg->cmsg_len = (socklen_t)(fdlen + 1233 sizeof (struct cmsghdr)); 1234 1235 error = fdbuf_extract(fdbuf, 1236 CMSG_CONTENT(cmsg), fdlen); 1237 if (error != 0) 1238 return (error); 1239 } 1240 } else if (tohp->level == SOL_SOCKET && 1241 tohp->name == SCM_TIMESTAMP) { 1242 timestruc_t *timestamp; 1243 1244 if (oldflg) 1245 continue; 1246 1247 cmsg->cmsg_level = tohp->level; 1248 cmsg->cmsg_type = tohp->name; 1249 1250 timestamp = 1251 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1252 sizeof (intptr_t)); 1253 1254 if (get_udatamodel() == DATAMODEL_NATIVE) { 1255 struct timeval tv; 1256 1257 cmsg->cmsg_len = sizeof (struct timeval) + 1258 sizeof (struct cmsghdr); 1259 tv.tv_sec = timestamp->tv_sec; 1260 tv.tv_usec = timestamp->tv_nsec / 1261 (NANOSEC / MICROSEC); 1262 /* 1263 * on LP64 systems, the struct timeval in 1264 * the destination will not be 8-byte aligned, 1265 * so use bcopy to avoid alignment trouble 1266 */ 1267 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1268 } else { 1269 struct timeval32 *time32; 1270 1271 cmsg->cmsg_len = sizeof (struct timeval32) + 1272 sizeof (struct cmsghdr); 1273 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1274 time32->tv_sec = (time32_t)timestamp->tv_sec; 1275 time32->tv_usec = 1276 (int32_t)(timestamp->tv_nsec / 1277 (NANOSEC / MICROSEC)); 1278 } 1279 1280 } else { 1281 if (oldflg) 1282 continue; 1283 1284 cmsg->cmsg_level = tohp->level; 1285 cmsg->cmsg_type = tohp->name; 1286 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) + 1287 sizeof (struct cmsghdr)); 1288 1289 /* copy content to control data part */ 1290 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1291 CMSG_CONTENTLEN(cmsg)); 1292 } 1293 /* move to next CMSG structure! */ 1294 cmsg = CMSG_NEXT(cmsg); 1295 } 1296 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1297 control, controllen, (void *)cend, (void *)cmsg)); 1298 ASSERT(cmsg <= cend); 1299 return (0); 1300 } 1301 1302 /* 1303 * Extract the SO_SRCADDR option value if present. 1304 */ 1305 void 1306 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1307 t_uscalar_t *srclenp) 1308 { 1309 struct T_opthdr *tohp; 1310 1311 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1312 1313 ASSERT(srcp != NULL && srclenp != NULL); 1314 *srcp = NULL; 1315 *srclenp = 0; 1316 1317 for (tohp = (struct T_opthdr *)opt; 1318 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1319 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1320 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1321 tohp->level, tohp->name, tohp->len)); 1322 if (tohp->level == SOL_SOCKET && 1323 tohp->name == SO_SRCADDR) { 1324 *srcp = _TPI_TOPT_DATA(tohp); 1325 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1326 } 1327 } 1328 } 1329 1330 /* 1331 * Verify if the SO_UNIX_CLOSE option is present. 1332 */ 1333 int 1334 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1335 { 1336 struct T_opthdr *tohp; 1337 1338 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1339 1340 for (tohp = (struct T_opthdr *)opt; 1341 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1342 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1343 dprint(1, 1344 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1345 tohp->level, tohp->name, tohp->len)); 1346 if (tohp->level == SOL_SOCKET && 1347 tohp->name == SO_UNIX_CLOSE) 1348 return (1); 1349 } 1350 return (0); 1351 } 1352 1353 /* 1354 * Allocate an M_PROTO message. 1355 * 1356 * If allocation fails the behavior depends on sleepflg: 1357 * _ALLOC_NOSLEEP fail immediately 1358 * _ALLOC_INTR sleep for memory until a signal is caught 1359 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1360 */ 1361 mblk_t * 1362 soallocproto(size_t size, int sleepflg, cred_t *cr) 1363 { 1364 mblk_t *mp; 1365 1366 /* Round up size for reuse */ 1367 size = MAX(size, 64); 1368 if (cr != NULL) 1369 mp = allocb_cred(size, cr, curproc->p_pid); 1370 else 1371 mp = allocb(size, BPRI_MED); 1372 1373 if (mp == NULL) { 1374 int error; /* Dummy - error not returned to caller */ 1375 1376 switch (sleepflg) { 1377 case _ALLOC_SLEEP: 1378 if (cr != NULL) { 1379 mp = allocb_cred_wait(size, STR_NOSIG, &error, 1380 cr, curproc->p_pid); 1381 } else { 1382 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, 1383 &error); 1384 } 1385 ASSERT(mp); 1386 break; 1387 case _ALLOC_INTR: 1388 if (cr != NULL) { 1389 mp = allocb_cred_wait(size, 0, &error, cr, 1390 curproc->p_pid); 1391 } else { 1392 mp = allocb_wait(size, BPRI_MED, 0, &error); 1393 } 1394 if (mp == NULL) { 1395 /* Caught signal while sleeping for memory */ 1396 eprintline(ENOBUFS); 1397 return (NULL); 1398 } 1399 break; 1400 case _ALLOC_NOSLEEP: 1401 default: 1402 eprintline(ENOBUFS); 1403 return (NULL); 1404 } 1405 } 1406 DB_TYPE(mp) = M_PROTO; 1407 return (mp); 1408 } 1409 1410 /* 1411 * Allocate an M_PROTO message with a single component. 1412 * len is the length of buf. size is the amount to allocate. 1413 * 1414 * buf can be NULL with a non-zero len. 1415 * This results in a bzero'ed chunk being placed the message. 1416 */ 1417 mblk_t * 1418 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg, 1419 cred_t *cr) 1420 { 1421 mblk_t *mp; 1422 1423 if (size == 0) 1424 size = len; 1425 1426 ASSERT(size >= len); 1427 /* Round up size for reuse */ 1428 size = MAX(size, 64); 1429 mp = soallocproto(size, sleepflg, cr); 1430 if (mp == NULL) 1431 return (NULL); 1432 mp->b_datap->db_type = M_PROTO; 1433 if (len != 0) { 1434 if (buf != NULL) 1435 bcopy(buf, mp->b_wptr, len); 1436 else 1437 bzero(mp->b_wptr, len); 1438 mp->b_wptr += len; 1439 } 1440 return (mp); 1441 } 1442 1443 /* 1444 * Append buf/len to mp. 1445 * The caller has to ensure that there is enough room in the mblk. 1446 * 1447 * buf can be NULL with a non-zero len. 1448 * This results in a bzero'ed chunk being placed the message. 1449 */ 1450 void 1451 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1452 { 1453 ASSERT(mp); 1454 1455 if (len != 0) { 1456 /* Assert for room left */ 1457 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1458 if (buf != NULL) 1459 bcopy(buf, mp->b_wptr, len); 1460 else 1461 bzero(mp->b_wptr, len); 1462 } 1463 mp->b_wptr += len; 1464 } 1465 1466 /* 1467 * Create a message using two kernel buffers. 1468 * If size is set that will determine the allocation size (e.g. for future 1469 * soappendmsg calls). If size is zero it is derived from the buffer 1470 * lengths. 1471 */ 1472 mblk_t * 1473 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1474 ssize_t size, int sleepflg, cred_t *cr) 1475 { 1476 mblk_t *mp; 1477 1478 if (size == 0) 1479 size = len1 + len2; 1480 ASSERT(size >= len1 + len2); 1481 1482 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1483 if (mp) 1484 soappendmsg(mp, buf2, len2); 1485 return (mp); 1486 } 1487 1488 /* 1489 * Create a message using three kernel buffers. 1490 * If size is set that will determine the allocation size (for future 1491 * soappendmsg calls). If size is zero it is derived from the buffer 1492 * lengths. 1493 */ 1494 mblk_t * 1495 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1496 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr) 1497 { 1498 mblk_t *mp; 1499 1500 if (size == 0) 1501 size = len1 + len2 +len3; 1502 ASSERT(size >= len1 + len2 + len3); 1503 1504 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1505 if (mp != NULL) { 1506 soappendmsg(mp, buf2, len2); 1507 soappendmsg(mp, buf3, len3); 1508 } 1509 return (mp); 1510 } 1511 1512 #ifdef DEBUG 1513 char * 1514 pr_state(uint_t state, uint_t mode) 1515 { 1516 static char buf[1024]; 1517 1518 buf[0] = 0; 1519 if (state & SS_ISCONNECTED) 1520 (void) strcat(buf, "ISCONNECTED "); 1521 if (state & SS_ISCONNECTING) 1522 (void) strcat(buf, "ISCONNECTING "); 1523 if (state & SS_ISDISCONNECTING) 1524 (void) strcat(buf, "ISDISCONNECTING "); 1525 if (state & SS_CANTSENDMORE) 1526 (void) strcat(buf, "CANTSENDMORE "); 1527 1528 if (state & SS_CANTRCVMORE) 1529 (void) strcat(buf, "CANTRCVMORE "); 1530 if (state & SS_ISBOUND) 1531 (void) strcat(buf, "ISBOUND "); 1532 if (state & SS_NDELAY) 1533 (void) strcat(buf, "NDELAY "); 1534 if (state & SS_NONBLOCK) 1535 (void) strcat(buf, "NONBLOCK "); 1536 1537 if (state & SS_ASYNC) 1538 (void) strcat(buf, "ASYNC "); 1539 if (state & SS_ACCEPTCONN) 1540 (void) strcat(buf, "ACCEPTCONN "); 1541 if (state & SS_SAVEDEOR) 1542 (void) strcat(buf, "SAVEDEOR "); 1543 1544 if (state & SS_RCVATMARK) 1545 (void) strcat(buf, "RCVATMARK "); 1546 if (state & SS_OOBPEND) 1547 (void) strcat(buf, "OOBPEND "); 1548 if (state & SS_HAVEOOBDATA) 1549 (void) strcat(buf, "HAVEOOBDATA "); 1550 if (state & SS_HADOOBDATA) 1551 (void) strcat(buf, "HADOOBDATA "); 1552 1553 if (mode & SM_PRIV) 1554 (void) strcat(buf, "PRIV "); 1555 if (mode & SM_ATOMIC) 1556 (void) strcat(buf, "ATOMIC "); 1557 if (mode & SM_ADDR) 1558 (void) strcat(buf, "ADDR "); 1559 if (mode & SM_CONNREQUIRED) 1560 (void) strcat(buf, "CONNREQUIRED "); 1561 1562 if (mode & SM_FDPASSING) 1563 (void) strcat(buf, "FDPASSING "); 1564 if (mode & SM_EXDATA) 1565 (void) strcat(buf, "EXDATA "); 1566 if (mode & SM_OPTDATA) 1567 (void) strcat(buf, "OPTDATA "); 1568 if (mode & SM_BYTESTREAM) 1569 (void) strcat(buf, "BYTESTREAM "); 1570 return (buf); 1571 } 1572 1573 char * 1574 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 1575 { 1576 static char buf[1024]; 1577 1578 if (addr == NULL || addrlen == 0) { 1579 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr); 1580 return (buf); 1581 } 1582 switch (family) { 1583 case AF_INET: { 1584 struct sockaddr_in sin; 1585 1586 bcopy(addr, &sin, sizeof (sin)); 1587 1588 (void) sprintf(buf, "(len %d) %x/%d", 1589 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 1590 break; 1591 } 1592 case AF_INET6: { 1593 struct sockaddr_in6 sin6; 1594 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 1595 1596 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 1597 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 1598 addrlen, 1599 ntohs(piece[0]), ntohs(piece[1]), 1600 ntohs(piece[2]), ntohs(piece[3]), 1601 ntohs(piece[4]), ntohs(piece[5]), 1602 ntohs(piece[6]), ntohs(piece[7]), 1603 ntohs(sin6.sin6_port)); 1604 break; 1605 } 1606 case AF_UNIX: { 1607 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 1608 1609 (void) sprintf(buf, "(len %d) %s", addrlen, 1610 (soun == NULL) ? "(none)" : soun->sun_path); 1611 break; 1612 } 1613 default: 1614 (void) sprintf(buf, "(unknown af %d)", family); 1615 break; 1616 } 1617 return (buf); 1618 } 1619 1620 /* The logical equivalence operator (a if-and-only-if b) */ 1621 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b)))) 1622 1623 /* 1624 * Verify limitations and invariants on oob state. 1625 * Return 1 if OK, otherwise 0 so that it can be used as 1626 * ASSERT(verify_oobstate(so)); 1627 */ 1628 int 1629 so_verify_oobstate(struct sonode *so) 1630 { 1631 boolean_t havemark; 1632 1633 ASSERT(MUTEX_HELD(&so->so_lock)); 1634 1635 /* 1636 * The possible state combinations are: 1637 * 0 1638 * SS_OOBPEND 1639 * SS_OOBPEND|SS_HAVEOOBDATA 1640 * SS_OOBPEND|SS_HADOOBDATA 1641 * SS_HADOOBDATA 1642 */ 1643 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 1644 case 0: 1645 case SS_OOBPEND: 1646 case SS_OOBPEND|SS_HAVEOOBDATA: 1647 case SS_OOBPEND|SS_HADOOBDATA: 1648 case SS_HADOOBDATA: 1649 break; 1650 default: 1651 printf("Bad oob state 1 (%p): state %s\n", 1652 (void *)so, pr_state(so->so_state, so->so_mode)); 1653 return (0); 1654 } 1655 1656 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 1657 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 1658 printf("Bad oob state 2 (%p): state %s\n", 1659 (void *)so, pr_state(so->so_state, so->so_mode)); 1660 return (0); 1661 } 1662 1663 /* 1664 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND 1665 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt. 1666 */ 1667 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 : 1668 SOTOTPI(so)->sti_oobsigcnt > 0; 1669 1670 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK), 1671 so->so_state & SS_OOBPEND)) { 1672 printf("Bad oob state 3 (%p): state %s\n", 1673 (void *)so, pr_state(so->so_state, so->so_mode)); 1674 return (0); 1675 } 1676 1677 /* 1678 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 1679 */ 1680 if (!(so->so_options & SO_OOBINLINE) && 1681 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 1682 printf("Bad oob state 4 (%p): state %s\n", 1683 (void *)so, pr_state(so->so_state, so->so_mode)); 1684 return (0); 1685 } 1686 1687 if (!SOCK_IS_NONSTR(so) && 1688 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) { 1689 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 1690 (void *)so, SOTOTPI(so)->sti_oobsigcnt, 1691 SOTOTPI(so)->sti_oobcnt, 1692 pr_state(so->so_state, so->so_mode)); 1693 return (0); 1694 } 1695 1696 return (1); 1697 } 1698 #undef EQUIVALENT 1699 #endif /* DEBUG */ 1700 1701 /* initialize sockfs zone specific kstat related items */ 1702 void * 1703 sock_kstat_init(zoneid_t zoneid) 1704 { 1705 kstat_t *ksp; 1706 1707 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 1708 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 1709 1710 if (ksp != NULL) { 1711 ksp->ks_update = sockfs_update; 1712 ksp->ks_snapshot = sockfs_snapshot; 1713 ksp->ks_lock = &socklist.sl_lock; 1714 ksp->ks_private = (void *)(uintptr_t)zoneid; 1715 kstat_install(ksp); 1716 } 1717 1718 return (ksp); 1719 } 1720 1721 /* tear down sockfs zone specific kstat related items */ 1722 /*ARGSUSED*/ 1723 void 1724 sock_kstat_fini(zoneid_t zoneid, void *arg) 1725 { 1726 kstat_t *ksp = (kstat_t *)arg; 1727 1728 if (ksp != NULL) { 1729 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 1730 kstat_delete(ksp); 1731 } 1732 } 1733 1734 /* 1735 * Zones: 1736 * Note that nactive is going to be different for each zone. 1737 * This means we require kstat to call sockfs_update and then sockfs_snapshot 1738 * for the same zone, or sockfs_snapshot will be taken into the wrong size 1739 * buffer. This is safe, but if the buffer is too small, user will not be 1740 * given details of all sockets. However, as this kstat has a ks_lock, kstat 1741 * driver will keep it locked between the update and the snapshot, so no 1742 * other process (zone) can currently get inbetween resulting in a wrong size 1743 * buffer allocation. 1744 */ 1745 static int 1746 sockfs_update(kstat_t *ksp, int rw) 1747 { 1748 uint_t n, nactive = 0; /* # of active AF_UNIX sockets */ 1749 uint_t tsze; 1750 struct sonode *so; /* current sonode on socklist */ 1751 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1752 1753 tsze = 0; 1754 1755 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1756 1757 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1758 return (EACCES); 1759 } 1760 1761 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1762 if (so->so_count != 0 && so->so_zoneid == myzoneid) { 1763 1764 nactive++; 1765 1766 mutex_enter(&so->so_pid_tree_lock); 1767 n = avl_numnodes(&so->so_pid_tree); 1768 mutex_exit(&so->so_pid_tree_lock); 1769 1770 tsze += sizeof (struct sockinfo); 1771 tsze += (n > 1) ? ((n - 1) * sizeof (pid_t)) : 0; 1772 } 1773 } 1774 ksp->ks_ndata = nactive; 1775 ksp->ks_data_size = tsze; 1776 1777 return (0); 1778 } 1779 1780 static int 1781 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 1782 { 1783 int ns; /* # of sonodes we've copied */ 1784 struct sonode *so; /* current sonode on socklist */ 1785 struct sockinfo *psi; /* where we put sockinfo data */ 1786 t_uscalar_t sn_len; /* soa_len */ 1787 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1788 sotpi_info_t *sti; 1789 1790 uint_t sze; 1791 mblk_t *mblk; 1792 conn_pid_info_t *cpi; 1793 1794 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1795 1796 ksp->ks_snaptime = gethrtime(); 1797 1798 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1799 return (EACCES); 1800 } 1801 1802 /* 1803 * for each sonode on the socklist, we massage the important 1804 * info into buf, in k_sockinfo format. 1805 */ 1806 psi = (struct sockinfo *)buf; 1807 ns = 0; 1808 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1809 /* only stuff active sonodes and the same zone: */ 1810 if (so->so_count == 0 || so->so_zoneid != myzoneid) { 1811 continue; 1812 } 1813 1814 mblk = so_get_sock_pid_mblk((sock_upper_handle_t)so); 1815 if (mblk == NULL) { 1816 continue; 1817 } 1818 cpi = (conn_pid_info_t *)mblk->b_datap->db_base; 1819 sze = sizeof (struct sockinfo); 1820 sze += (cpi->cpi_pids_cnt > 1) ? 1821 ((cpi->cpi_pids_cnt - 1) * sizeof (pid_t)) : 0; 1822 1823 /* 1824 * If the sonode was activated between the update and the 1825 * snapshot, we're done - as this is only a snapshot. We need 1826 * to make sure that we have space for this sockinfo. In the 1827 * time window between the update and the snapshot, the size of 1828 * sockinfo may change, as new pids are added/removed to/from 1829 * the list. We have to take that into consideration and only 1830 * include the sockinfo if we have enough space. That means the 1831 * number of entries we return by snapshot might not equal the 1832 * the number of entries calculated by update. 1833 */ 1834 if (((caddr_t)(psi) + sze) > 1835 ((caddr_t)buf + ksp->ks_data_size)) { 1836 break; 1837 } 1838 1839 sti = SOTOTPI(so); 1840 /* copy important info into buf: */ 1841 psi->si_size = sze; 1842 psi->si_family = so->so_family; 1843 psi->si_type = so->so_type; 1844 psi->si_flag = so->so_flag; 1845 psi->si_state = so->so_state; 1846 psi->si_serv_type = sti->sti_serv_type; 1847 psi->si_ux_laddr_sou_magic = 1848 sti->sti_ux_laddr.soua_magic; 1849 psi->si_ux_faddr_sou_magic = 1850 sti->sti_ux_faddr.soua_magic; 1851 psi->si_laddr_soa_len = sti->sti_laddr.soa_len; 1852 psi->si_faddr_soa_len = sti->sti_faddr.soa_len; 1853 psi->si_szoneid = so->so_zoneid; 1854 psi->si_faddr_noxlate = sti->sti_faddr_noxlate; 1855 1856 1857 mutex_enter(&so->so_lock); 1858 1859 if (sti->sti_laddr_sa != NULL) { 1860 ASSERT(sti->sti_laddr_sa->sa_data != NULL); 1861 sn_len = sti->sti_laddr_len; 1862 ASSERT(sn_len <= sizeof (short) + 1863 sizeof (psi->si_laddr_sun_path)); 1864 1865 psi->si_laddr_family = 1866 sti->sti_laddr_sa->sa_family; 1867 if (sn_len != 0) { 1868 /* AF_UNIX socket names are NULL terminated */ 1869 (void) strncpy(psi->si_laddr_sun_path, 1870 sti->sti_laddr_sa->sa_data, 1871 sizeof (psi->si_laddr_sun_path)); 1872 sn_len = strlen(psi->si_laddr_sun_path); 1873 } 1874 psi->si_laddr_sun_path[sn_len] = 0; 1875 } 1876 1877 if (sti->sti_faddr_sa != NULL) { 1878 ASSERT(sti->sti_faddr_sa->sa_data != NULL); 1879 sn_len = sti->sti_faddr_len; 1880 ASSERT(sn_len <= sizeof (short) + 1881 sizeof (psi->si_faddr_sun_path)); 1882 1883 psi->si_faddr_family = 1884 sti->sti_faddr_sa->sa_family; 1885 if (sn_len != 0) { 1886 (void) strncpy(psi->si_faddr_sun_path, 1887 sti->sti_faddr_sa->sa_data, 1888 sizeof (psi->si_faddr_sun_path)); 1889 sn_len = strlen(psi->si_faddr_sun_path); 1890 } 1891 psi->si_faddr_sun_path[sn_len] = 0; 1892 } 1893 1894 mutex_exit(&so->so_lock); 1895 1896 (void) sprintf(psi->si_son_straddr, "%p", (void *)so); 1897 (void) sprintf(psi->si_lvn_straddr, "%p", 1898 (void *)sti->sti_ux_laddr.soua_vp); 1899 (void) sprintf(psi->si_fvn_straddr, "%p", 1900 (void *)sti->sti_ux_faddr.soua_vp); 1901 1902 psi->si_pids[0] = 0; 1903 if ((psi->si_pn_cnt = cpi->cpi_pids_cnt) > 0) { 1904 (void) memcpy(psi->si_pids, cpi->cpi_pids, 1905 psi->si_pn_cnt * sizeof (pid_t)); 1906 } 1907 1908 freemsg(mblk); 1909 1910 psi = (struct sockinfo *)((caddr_t)psi + psi->si_size); 1911 ns++; 1912 } 1913 1914 ksp->ks_ndata = ns; 1915 return (0); 1916 } 1917 1918 ssize_t 1919 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 1920 { 1921 struct uio auio; 1922 struct iovec aiov[MSG_MAXIOVLEN]; 1923 register vnode_t *vp; 1924 int ioflag, rwflag; 1925 ssize_t cnt; 1926 int error = 0; 1927 int iovcnt = 0; 1928 short fflag; 1929 1930 vp = fp->f_vnode; 1931 fflag = fp->f_flag; 1932 1933 rwflag = 0; 1934 aiov[0].iov_base = (caddr_t)buf; 1935 aiov[0].iov_len = size; 1936 iovcnt = 1; 1937 cnt = (ssize_t)size; 1938 (void) VOP_RWLOCK(vp, rwflag, NULL); 1939 1940 auio.uio_loffset = fileoff; 1941 auio.uio_iov = aiov; 1942 auio.uio_iovcnt = iovcnt; 1943 auio.uio_resid = cnt; 1944 auio.uio_segflg = UIO_SYSSPACE; 1945 auio.uio_llimit = MAXOFFSET_T; 1946 auio.uio_fmode = fflag; 1947 auio.uio_extflg = UIO_COPY_CACHED; 1948 1949 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1950 1951 /* If read sync is not asked for, filter sync flags */ 1952 if ((ioflag & FRSYNC) == 0) 1953 ioflag &= ~(FSYNC|FDSYNC); 1954 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1955 cnt -= auio.uio_resid; 1956 1957 VOP_RWUNLOCK(vp, rwflag, NULL); 1958 1959 if (error == EINTR && cnt != 0) 1960 error = 0; 1961 out: 1962 if (error != 0) { 1963 *err = error; 1964 return (0); 1965 } else { 1966 *err = 0; 1967 return (cnt); 1968 } 1969 } 1970 1971 int 1972 so_copyin(const void *from, void *to, size_t size, int fromkernel) 1973 { 1974 if (fromkernel) { 1975 bcopy(from, to, size); 1976 return (0); 1977 } 1978 return (xcopyin(from, to, size)); 1979 } 1980 1981 int 1982 so_copyout(const void *from, void *to, size_t size, int tokernel) 1983 { 1984 if (tokernel) { 1985 bcopy(from, to, size); 1986 return (0); 1987 } 1988 return (xcopyout(from, to, size)); 1989 }