1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/t_lock.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/buf.h>
31 #include <sys/conf.h>
32 #include <sys/cred.h>
33 #include <sys/kmem.h>
34 #include <sys/sysmacros.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/debug.h>
39 #include <sys/errno.h>
40 #include <sys/time.h>
41 #include <sys/file.h>
42 #include <sys/open.h>
43 #include <sys/user.h>
44 #include <sys/termios.h>
45 #include <sys/stream.h>
46 #include <sys/strsubr.h>
47 #include <sys/strsun.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/cmn_err.h>
52 #include <sys/mkdev.h>
53 #include <sys/pathname.h>
54 #include <sys/ddi.h>
55 #include <sys/stat.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/zone.h>
59
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 #include <sys/un.h>
64 #include <sys/ucred.h>
65
66 #include <sys/tiuser.h>
67 #define _SUN_TPI_VERSION 2
68 #include <sys/tihdr.h>
69
70 #include <c2/audit.h>
71
72 #include <fs/sockfs/nl7c.h>
73 #include <fs/sockfs/sockcommon.h>
74 #include <fs/sockfs/sockfilter_impl.h>
75 #include <fs/sockfs/socktpi.h>
76 #include <fs/sockfs/socktpi_impl.h>
77 #include <fs/sockfs/sodirect.h>
78
79 /*
80 * Macros that operate on struct cmsghdr.
81 * The CMSG_VALID macro does not assume that the last option buffer is padded.
82 */
83 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
84 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
85 #define CMSG_VALID(cmsg, start, end) \
86 (ISALIGNED_cmsghdr(cmsg) && \
87 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
88 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
89 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
90 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
91 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
92
93 dev_t sockdev; /* For fsid in getattr */
94 int sockfs_defer_nl7c_init = 0;
95
96 struct socklist socklist;
97
98 struct kmem_cache *socket_cache;
99
100 /*
101 * sockconf_lock protects the socket configuration (socket types and
102 * socket filters) which is changed via the sockconfig system call.
103 */
104 krwlock_t sockconf_lock;
105
106 static int sockfs_update(kstat_t *, int);
107 static int sockfs_snapshot(kstat_t *, void *, int);
108 extern smod_info_t *sotpi_smod_create(void);
109
110 extern void sendfile_init();
111
112 extern void nl7c_init(void);
113
114 extern int modrootloaded;
115
116 /*
117 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
118 * Returns with the vnode held.
119 */
120 int
121 sogetvp(char *devpath, vnode_t **vpp, int uioflag)
122 {
123 struct snode *csp;
124 vnode_t *vp, *dvp;
125 major_t maj;
126 int error;
127
128 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
129
130 /*
131 * Lookup the underlying filesystem vnode.
132 */
133 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
134 if (error)
135 return (error);
136
137 /* Check that it is the correct vnode */
138 if (vp->v_type != VCHR) {
139 VN_RELE(vp);
140 return (ENOTSOCK);
141 }
142
143 /*
144 * If devpath went through devfs, the device should already
145 * be configured. If devpath is a mknod file, however, we
146 * need to make sure the device is properly configured.
147 * To do this, we do something similar to spec_open()
148 * except that we resolve to the minor/leaf level since
149 * we need to return a vnode.
150 */
151 csp = VTOS(VTOS(vp)->s_commonvp);
152 if (!(csp->s_flag & SDIPSET)) {
153 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
154 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
155 if (error == 0)
156 error = devfs_lookupname(pathname, NULLVPP, &dvp);
157 VN_RELE(vp);
158 kmem_free(pathname, MAXPATHLEN);
159 if (error != 0)
160 return (ENXIO);
161 vp = dvp; /* use the devfs vp */
162 }
163
164 /* device is configured at this point */
165 maj = getmajor(vp->v_rdev);
166 if (!STREAMSTAB(maj)) {
167 VN_RELE(vp);
168 return (ENOSTR);
169 }
170
171 *vpp = vp;
172 return (0);
173 }
174
175 /*
176 * Update the accessed, updated, or changed times in an sonode
177 * with the current time.
178 *
179 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
180 * attributes in a fstat call. (They return the current time and 0 for
181 * all timestamps, respectively.) We maintain the current timestamps
182 * here primarily so that should sockmod be popped the resulting
183 * file descriptor will behave like a stream w.r.t. the timestamps.
184 */
185 void
186 so_update_attrs(struct sonode *so, int flag)
187 {
188 time_t now = gethrestime_sec();
189
190 if (SOCK_IS_NONSTR(so))
191 return;
192
193 mutex_enter(&so->so_lock);
194 so->so_flag |= flag;
195 if (flag & SOACC)
196 SOTOTPI(so)->sti_atime = now;
197 if (flag & SOMOD)
198 SOTOTPI(so)->sti_mtime = now;
199 mutex_exit(&so->so_lock);
200 }
201
202 extern so_create_func_t sock_comm_create_function;
203 extern so_destroy_func_t sock_comm_destroy_function;
204 /*
205 * Init function called when sockfs is loaded.
206 */
207 int
208 sockinit(int fstype, char *name)
209 {
210 static const fs_operation_def_t sock_vfsops_template[] = {
211 NULL, NULL
212 };
213 int error;
214 major_t dev;
215 char *err_str;
216
217 error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
218 if (error != 0) {
219 zcmn_err(GLOBAL_ZONEID, CE_WARN,
220 "sockinit: bad vfs ops template");
221 return (error);
222 }
223
224 error = vn_make_ops(name, socket_vnodeops_template,
225 &socket_vnodeops);
226 if (error != 0) {
227 err_str = "sockinit: bad socket vnode ops template";
228 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
229 socket_vnodeops = NULL;
230 goto failure;
231 }
232
233 socket_cache = kmem_cache_create("socket_cache",
234 sizeof (struct sonode), 0, sonode_constructor,
235 sonode_destructor, NULL, NULL, NULL, 0);
236
237 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
238
239 error = socktpi_init();
240 if (error != 0) {
241 err_str = NULL;
242 goto failure;
243 }
244
245 error = sod_init();
246 if (error != 0) {
247 err_str = NULL;
248 goto failure;
249 }
250
251 /*
252 * Set up the default create and destroy functions
253 */
254 sock_comm_create_function = socket_sonode_create;
255 sock_comm_destroy_function = socket_sonode_destroy;
256
257 /*
258 * Build initial list mapping socket parameters to vnode.
259 */
260 smod_init();
261 smod_add(sotpi_smod_create());
262
263 sockparams_init();
264
265 /*
266 * If sockets are needed before init runs /sbin/soconfig
267 * it is possible to preload the sockparams list here using
268 * calls like:
269 * sockconfig(1,2,3, "/dev/tcp", 0);
270 */
271
272 /*
273 * Create a unique dev_t for use in so_fsid.
274 */
275
276 if ((dev = getudev()) == (major_t)-1)
277 dev = 0;
278 sockdev = makedevice(dev, 0);
279
280 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
281 sendfile_init();
282 if (!modrootloaded) {
283 sockfs_defer_nl7c_init = 1;
284 } else {
285 nl7c_init();
286 }
287
288 /* Initialize socket filters */
289 sof_init();
290
291 return (0);
292
293 failure:
294 (void) vfs_freevfsops_by_type(fstype);
295 if (socket_vnodeops != NULL)
296 vn_freevnodeops(socket_vnodeops);
297 if (err_str != NULL)
298 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
299 return (error);
300 }
301
302 /*
303 * Caller must hold the mutex. Used to set SOLOCKED.
304 */
305 void
306 so_lock_single(struct sonode *so)
307 {
308 ASSERT(MUTEX_HELD(&so->so_lock));
309
310 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
311 cv_wait_stop(&so->so_single_cv, &so->so_lock,
312 SO_LOCK_WAKEUP_TIME);
313 }
314 so->so_flag |= SOLOCKED;
315 }
316
317 /*
318 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
319 * Used to clear SOLOCKED or SOASYNC_UNBIND.
320 */
321 void
322 so_unlock_single(struct sonode *so, int flag)
323 {
324 ASSERT(MUTEX_HELD(&so->so_lock));
325 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
326 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
327 ASSERT(so->so_flag & flag);
328 /*
329 * Process the T_DISCON_IND on sti_discon_ind_mp.
330 *
331 * Call to so_drain_discon_ind will result in so_lock
332 * being dropped and re-acquired later.
333 */
334 if (!SOCK_IS_NONSTR(so)) {
335 sotpi_info_t *sti = SOTOTPI(so);
336
337 if (sti->sti_discon_ind_mp != NULL)
338 so_drain_discon_ind(so);
339 }
340
341 cv_signal(&so->so_single_cv);
342 so->so_flag &= ~flag;
343 }
344
345 /*
346 * Caller must hold the mutex. Used to set SOREADLOCKED.
347 * If the caller wants nonblocking behavior it should set fmode.
348 */
349 int
350 so_lock_read(struct sonode *so, int fmode)
351 {
352 ASSERT(MUTEX_HELD(&so->so_lock));
353
354 while (so->so_flag & SOREADLOCKED) {
355 if (fmode & (FNDELAY|FNONBLOCK))
356 return (EWOULDBLOCK);
357 cv_wait_stop(&so->so_read_cv, &so->so_lock,
358 SO_LOCK_WAKEUP_TIME);
359 }
360 so->so_flag |= SOREADLOCKED;
361 return (0);
362 }
363
364 /*
365 * Like so_lock_read above but allows signals.
366 */
367 int
368 so_lock_read_intr(struct sonode *so, int fmode)
369 {
370 ASSERT(MUTEX_HELD(&so->so_lock));
371
372 while (so->so_flag & SOREADLOCKED) {
373 if (fmode & (FNDELAY|FNONBLOCK))
374 return (EWOULDBLOCK);
375 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
376 return (EINTR);
377 }
378 so->so_flag |= SOREADLOCKED;
379 return (0);
380 }
381
382 /*
383 * Caller must hold the mutex. Used to clear SOREADLOCKED,
384 * set in so_lock_read() or so_lock_read_intr().
385 */
386 void
387 so_unlock_read(struct sonode *so)
388 {
389 ASSERT(MUTEX_HELD(&so->so_lock));
390 ASSERT(so->so_flag & SOREADLOCKED);
391
392 cv_signal(&so->so_read_cv);
393 so->so_flag &= ~SOREADLOCKED;
394 }
395
396 /*
397 * Verify that the specified offset falls within the mblk and
398 * that the resulting pointer is aligned.
399 * Returns NULL if not.
400 */
401 void *
402 sogetoff(mblk_t *mp, t_uscalar_t offset,
403 t_uscalar_t length, uint_t align_size)
404 {
405 uintptr_t ptr1, ptr2;
406
407 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
408 ptr1 = (uintptr_t)mp->b_rptr + offset;
409 ptr2 = (uintptr_t)ptr1 + length;
410 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
411 eprintline(0);
412 return (NULL);
413 }
414 if ((ptr1 & (align_size - 1)) != 0) {
415 eprintline(0);
416 return (NULL);
417 }
418 return ((void *)ptr1);
419 }
420
421 /*
422 * Return the AF_UNIX underlying filesystem vnode matching a given name.
423 * Makes sure the sending and the destination sonodes are compatible.
424 * The vnode is returned held.
425 *
426 * The underlying filesystem VSOCK vnode has a v_stream pointer that
427 * references the actual stream head (hence indirectly the actual sonode).
428 */
429 static int
430 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
431 vnode_t **vpp)
432 {
433 vnode_t *vp; /* Underlying filesystem vnode */
434 vnode_t *rvp; /* real vnode */
435 vnode_t *svp; /* sockfs vnode */
436 struct sonode *so2;
437 int error;
438
439 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
440 soun->sun_path));
441
442 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
443 if (error) {
444 eprintsoline(so, error);
445 return (error);
446 }
447
448 /*
449 * Traverse lofs mounts get the real vnode
450 */
451 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
452 VN_HOLD(rvp); /* hold the real vnode */
453 VN_RELE(vp); /* release hold from lookup */
454 vp = rvp;
455 }
456
457 if (vp->v_type != VSOCK) {
458 error = ENOTSOCK;
459 eprintsoline(so, error);
460 goto done2;
461 }
462
463 if (checkaccess) {
464 /*
465 * Check that we have permissions to access the destination
466 * vnode. This check is not done in BSD but it is required
467 * by X/Open.
468 */
469 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) {
470 eprintsoline(so, error);
471 goto done2;
472 }
473 }
474
475 /*
476 * Check if the remote socket has been closed.
477 *
478 * Synchronize with vn_rele_stream by holding v_lock while traversing
479 * v_stream->sd_vnode.
480 */
481 mutex_enter(&vp->v_lock);
482 if (vp->v_stream == NULL) {
483 mutex_exit(&vp->v_lock);
484 if (so->so_type == SOCK_DGRAM)
485 error = EDESTADDRREQ;
486 else
487 error = ECONNREFUSED;
488
489 eprintsoline(so, error);
490 goto done2;
491 }
492 ASSERT(vp->v_stream->sd_vnode);
493 svp = vp->v_stream->sd_vnode;
494 /*
495 * holding v_lock on underlying filesystem vnode and acquiring
496 * it on sockfs vnode. Assumes that no code ever attempts to
497 * acquire these locks in the reverse order.
498 */
499 VN_HOLD(svp);
500 mutex_exit(&vp->v_lock);
501
502 if (svp->v_type != VSOCK) {
503 error = ENOTSOCK;
504 eprintsoline(so, error);
505 goto done;
506 }
507
508 so2 = VTOSO(svp);
509
510 if (so->so_type != so2->so_type) {
511 error = EPROTOTYPE;
512 eprintsoline(so, error);
513 goto done;
514 }
515
516 VN_RELE(svp);
517 *vpp = vp;
518 return (0);
519
520 done:
521 VN_RELE(svp);
522 done2:
523 VN_RELE(vp);
524 return (error);
525 }
526
527 /*
528 * Verify peer address for connect and sendto/sendmsg.
529 * Since sendto/sendmsg would not get synchronous errors from the transport
530 * provider we have to do these ugly checks in the socket layer to
531 * preserve compatibility with SunOS 4.X.
532 */
533 int
534 so_addr_verify(struct sonode *so, const struct sockaddr *name,
535 socklen_t namelen)
536 {
537 int family;
538
539 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
540 (void *)so, (void *)name, namelen));
541
542 ASSERT(name != NULL);
543
544 family = so->so_family;
545 switch (family) {
546 case AF_INET:
547 if (name->sa_family != family) {
548 eprintsoline(so, EAFNOSUPPORT);
549 return (EAFNOSUPPORT);
550 }
551 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
552 eprintsoline(so, EINVAL);
553 return (EINVAL);
554 }
555 break;
556 case AF_INET6: {
557 #ifdef DEBUG
558 struct sockaddr_in6 *sin6;
559 #endif /* DEBUG */
560
561 if (name->sa_family != family) {
562 eprintsoline(so, EAFNOSUPPORT);
563 return (EAFNOSUPPORT);
564 }
565 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
566 eprintsoline(so, EINVAL);
567 return (EINVAL);
568 }
569 #ifdef DEBUG
570 /* Verify that apps don't forget to clear sin6_scope_id etc */
571 sin6 = (struct sockaddr_in6 *)name;
572 if (sin6->sin6_scope_id != 0 &&
573 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
574 zcmn_err(getzoneid(), CE_WARN,
575 "connect/send* with uninitialized sin6_scope_id "
576 "(%d) on socket. Pid = %d\n",
577 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
578 }
579 #endif /* DEBUG */
580 break;
581 }
582 case AF_UNIX:
583 if (SOTOTPI(so)->sti_faddr_noxlate) {
584 return (0);
585 }
586 if (namelen < (socklen_t)sizeof (short)) {
587 eprintsoline(so, ENOENT);
588 return (ENOENT);
589 }
590 if (name->sa_family != family) {
591 eprintsoline(so, EAFNOSUPPORT);
592 return (EAFNOSUPPORT);
593 }
594 /* MAXPATHLEN + soun_family + nul termination */
595 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
596 eprintsoline(so, ENAMETOOLONG);
597 return (ENAMETOOLONG);
598 }
599
600 break;
601
602 default:
603 /*
604 * Default is don't do any length or sa_family check
605 * to allow non-sockaddr style addresses.
606 */
607 break;
608 }
609
610 return (0);
611 }
612
613
614 /*
615 * Translate an AF_UNIX sockaddr_un to the transport internal name.
616 * Assumes caller has called so_addr_verify first.
617 */
618 /*ARGSUSED*/
619 int
620 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
621 socklen_t namelen, int checkaccess,
622 void **addrp, socklen_t *addrlenp)
623 {
624 int error;
625 struct sockaddr_un *soun;
626 vnode_t *vp;
627 void *addr;
628 socklen_t addrlen;
629 sotpi_info_t *sti = SOTOTPI(so);
630
631 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
632 (void *)so, (void *)name, namelen, checkaccess));
633
634 ASSERT(name != NULL);
635 ASSERT(so->so_family == AF_UNIX);
636 ASSERT(!sti->sti_faddr_noxlate);
637 ASSERT(namelen >= (socklen_t)sizeof (short));
638 ASSERT(name->sa_family == AF_UNIX);
639 soun = (struct sockaddr_un *)name;
640 /*
641 * Lookup vnode for the specified path name and verify that
642 * it is a socket.
643 */
644 error = so_ux_lookup(so, soun, checkaccess, &vp);
645 if (error) {
646 eprintsoline(so, error);
647 return (error);
648 }
649 /*
650 * Use the address of the peer vnode as the address to send
651 * to. We release the peer vnode here. In case it has been
652 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
653 * transport the message will get an error or be dropped.
654 */
655 sti->sti_ux_faddr.soua_vp = vp;
656 sti->sti_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT;
657 addr = &sti->sti_ux_faddr;
658 addrlen = (socklen_t)sizeof (sti->sti_ux_faddr);
659 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
660 addrlen, (void *)vp));
661 VN_RELE(vp);
662 *addrp = addr;
663 *addrlenp = (socklen_t)addrlen;
664 return (0);
665 }
666
667 /*
668 * Esballoc free function for messages that contain SO_FILEP option.
669 * Decrement the reference count on the file pointers using closef.
670 */
671 void
672 fdbuf_free(struct fdbuf *fdbuf)
673 {
674 int i;
675 struct file *fp;
676
677 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
678 for (i = 0; i < fdbuf->fd_numfd; i++) {
679 /*
680 * We need pointer size alignment for fd_fds. On a LP64
681 * kernel, the required alignment is 8 bytes while
682 * the option headers and values are only 4 bytes
683 * aligned. So its safer to do a bcopy compared to
684 * assigning fdbuf->fd_fds[i] to fp.
685 */
686 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
687 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
688 (void) closef(fp);
689 }
690 if (fdbuf->fd_ebuf != NULL)
691 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
692 kmem_free(fdbuf, fdbuf->fd_size);
693 }
694
695 /*
696 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
697 * Waits if memory is not available.
698 */
699 mblk_t *
700 fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
701 {
702 uchar_t *buf;
703 mblk_t *mp;
704
705 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
706 buf = kmem_alloc(size, KM_SLEEP);
707 fdbuf->fd_ebuf = (caddr_t)buf;
708 fdbuf->fd_ebuflen = size;
709 fdbuf->fd_frtn.free_func = fdbuf_free;
710 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
711
712 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
713 mp->b_datap->db_type = M_PROTO;
714 return (mp);
715 }
716
717 /*
718 * Extract file descriptors from a fdbuf.
719 * Return list in rights/rightslen.
720 */
721 /*ARGSUSED*/
722 static int
723 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen)
724 {
725 int i, fd;
726 int *rp;
727 struct file *fp;
728 int numfd;
729
730 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
731 fdbuf->fd_numfd, rightslen));
732
733 numfd = fdbuf->fd_numfd;
734 ASSERT(rightslen == numfd * (int)sizeof (int));
735
736 /*
737 * Allocate a file descriptor and increment the f_count.
738 * The latter is needed since we always call fdbuf_free
739 * which performs a closef.
740 */
741 rp = (int *)rights;
742 for (i = 0; i < numfd; i++) {
743 if ((fd = ufalloc(0)) == -1)
744 goto cleanup;
745 /*
746 * We need pointer size alignment for fd_fds. On a LP64
747 * kernel, the required alignment is 8 bytes while
748 * the option headers and values are only 4 bytes
749 * aligned. So its safer to do a bcopy compared to
750 * assigning fdbuf->fd_fds[i] to fp.
751 */
752 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
753 mutex_enter(&fp->f_tlock);
754 fp->f_count++;
755 mutex_exit(&fp->f_tlock);
756 setf(fd, fp);
757 *rp++ = fd;
758
759 /*
760 * Add the current pid to the list associated with this
761 * descriptor.
762 */
763 if (fp->f_vnode != NULL)
764 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
765 (intptr_t)curproc->p_pidp->pid_id, FKIOCTL, kcred,
766 NULL, NULL);
767
768 if (AU_AUDITING())
769 audit_fdrecv(fd, fp);
770 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
771 i, fd, (void *)fp, fp->f_count));
772 }
773 return (0);
774
775 cleanup:
776 /*
777 * Undo whatever partial work the loop above has done.
778 */
779 {
780 int j;
781
782 rp = (int *)rights;
783 for (j = 0; j < i; j++) {
784 dprint(0,
785 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
786 (void) closeandsetf(*rp++, NULL);
787 }
788 }
789
790 return (EMFILE);
791 }
792
793 /*
794 * Insert file descriptors into an fdbuf.
795 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
796 * by calling fdbuf_free().
797 */
798 int
799 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
800 {
801 int numfd, i;
802 int *fds;
803 struct file *fp;
804 struct fdbuf *fdbuf;
805 int fdbufsize;
806
807 dprint(1, ("fdbuf_create: len %d\n", rightslen));
808
809 numfd = rightslen / (int)sizeof (int);
810
811 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
812 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
813 fdbuf->fd_size = fdbufsize;
814 fdbuf->fd_numfd = 0;
815 fdbuf->fd_ebuf = NULL;
816 fdbuf->fd_ebuflen = 0;
817 fds = (int *)rights;
818 for (i = 0; i < numfd; i++) {
819 if ((fp = getf(fds[i])) == NULL) {
820 fdbuf_free(fdbuf);
821 return (EBADF);
822 }
823 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
824 i, fds[i], (void *)fp, fp->f_count));
825 mutex_enter(&fp->f_tlock);
826 fp->f_count++;
827 mutex_exit(&fp->f_tlock);
828 /*
829 * The maximum alignment for fdbuf (or any option header
830 * and its value) it 4 bytes. On a LP64 kernel, the alignment
831 * is not sufficient for pointers (fd_fds in this case). Since
832 * we just did a kmem_alloc (we get a double word alignment),
833 * we don't need to do anything on the send side (we loose
834 * the double word alignment because fdbuf goes after an
835 * option header (eg T_unitdata_req) which is only 4 byte
836 * aligned). We take care of this when we extract the file
837 * descriptor in fdbuf_extract or fdbuf_free.
838 */
839 fdbuf->fd_fds[i] = fp;
840 fdbuf->fd_numfd++;
841 releasef(fds[i]);
842 if (AU_AUDITING())
843 audit_fdsend(fds[i], fp, 0);
844 }
845 *fdbufp = fdbuf;
846 return (0);
847 }
848
849 static int
850 fdbuf_optlen(int rightslen)
851 {
852 int numfd;
853
854 numfd = rightslen / (int)sizeof (int);
855
856 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
857 }
858
859 static t_uscalar_t
860 fdbuf_cmsglen(int fdbuflen)
861 {
862 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
863 (int)sizeof (struct file *) * (int)sizeof (int));
864 }
865
866
867 /*
868 * Return non-zero if the mblk and fdbuf are consistent.
869 */
870 static int
871 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
872 {
873 if (fdbuflen >= FDBUF_HDRSIZE &&
874 fdbuflen == fdbuf->fd_size) {
875 frtn_t *frp = mp->b_datap->db_frtnp;
876 /*
877 * Check that the SO_FILEP portion of the
878 * message has not been modified by
879 * the loopback transport. The sending sockfs generates
880 * a message that is esballoc'ed with the free function
881 * being fdbuf_free() and where free_arg contains the
882 * identical information as the SO_FILEP content.
883 *
884 * If any of these constraints are not satisfied we
885 * silently ignore the option.
886 */
887 ASSERT(mp);
888 if (frp != NULL &&
889 frp->free_func == fdbuf_free &&
890 frp->free_arg != NULL &&
891 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
892 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
893 (void *)fdbuf, fdbuflen));
894 return (1);
895 } else {
896 zcmn_err(getzoneid(), CE_WARN,
897 "sockfs: mismatched fdbuf content (%p)",
898 (void *)mp);
899 return (0);
900 }
901 } else {
902 zcmn_err(getzoneid(), CE_WARN,
903 "sockfs: mismatched fdbuf len %d, %d\n",
904 fdbuflen, fdbuf->fd_size);
905 return (0);
906 }
907 }
908
909 /*
910 * When the file descriptors returned by sorecvmsg can not be passed
911 * to the application this routine will cleanup the references on
912 * the files. Start at startoff bytes into the buffer.
913 */
914 static void
915 close_fds(void *fdbuf, int fdbuflen, int startoff)
916 {
917 int *fds = (int *)fdbuf;
918 int numfd = fdbuflen / (int)sizeof (int);
919 int i;
920
921 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
922
923 for (i = 0; i < numfd; i++) {
924 if (startoff < 0)
925 startoff = 0;
926 if (startoff < (int)sizeof (int)) {
927 /*
928 * This file descriptor is partially or fully after
929 * the offset
930 */
931 dprint(0,
932 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
933 (void) closeandsetf(fds[i], NULL);
934 }
935 startoff -= (int)sizeof (int);
936 }
937 }
938
939 /*
940 * Close all file descriptors contained in the control part starting at
941 * the startoffset.
942 */
943 void
944 so_closefds(void *control, t_uscalar_t controllen, int oldflg,
945 int startoff)
946 {
947 struct cmsghdr *cmsg;
948
949 if (control == NULL)
950 return;
951
952 if (oldflg) {
953 close_fds(control, controllen, startoff);
954 return;
955 }
956 /* Scan control part for file descriptors. */
957 for (cmsg = (struct cmsghdr *)control;
958 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
959 cmsg = CMSG_NEXT(cmsg)) {
960 if (cmsg->cmsg_level == SOL_SOCKET &&
961 cmsg->cmsg_type == SCM_RIGHTS) {
962 close_fds(CMSG_CONTENT(cmsg),
963 (int)CMSG_CONTENTLEN(cmsg),
964 startoff - (int)sizeof (struct cmsghdr));
965 }
966 startoff -= cmsg->cmsg_len;
967 }
968 }
969
970 /*
971 * Returns a pointer/length for the file descriptors contained
972 * in the control buffer. Returns with *fdlenp == -1 if there are no
973 * file descriptor options present. This is different than there being
974 * a zero-length file descriptor option.
975 * Fail if there are multiple SCM_RIGHT cmsgs.
976 */
977 int
978 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
979 void **fdsp, int *fdlenp)
980 {
981 struct cmsghdr *cmsg;
982 void *fds;
983 int fdlen;
984
985 if (control == NULL) {
986 *fdsp = NULL;
987 *fdlenp = -1;
988 return (0);
989 }
990
991 if (oldflg) {
992 *fdsp = control;
993 if (controllen == 0)
994 *fdlenp = -1;
995 else
996 *fdlenp = controllen;
997 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
998 return (0);
999 }
1000
1001 fds = NULL;
1002 fdlen = 0;
1003
1004 for (cmsg = (struct cmsghdr *)control;
1005 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1006 cmsg = CMSG_NEXT(cmsg)) {
1007 if (cmsg->cmsg_level == SOL_SOCKET &&
1008 cmsg->cmsg_type == SCM_RIGHTS) {
1009 if (fds != NULL)
1010 return (EINVAL);
1011 fds = CMSG_CONTENT(cmsg);
1012 fdlen = (int)CMSG_CONTENTLEN(cmsg);
1013 dprint(1, ("so_getfdopt: new %lu\n",
1014 (size_t)CMSG_CONTENTLEN(cmsg)));
1015 }
1016 }
1017 if (fds == NULL) {
1018 dprint(1, ("so_getfdopt: NONE\n"));
1019 *fdlenp = -1;
1020 } else
1021 *fdlenp = fdlen;
1022 *fdsp = fds;
1023 return (0);
1024 }
1025
1026 /*
1027 * Return the length of the options including any file descriptor options.
1028 */
1029 t_uscalar_t
1030 so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1031 {
1032 struct cmsghdr *cmsg;
1033 t_uscalar_t optlen = 0;
1034 t_uscalar_t len;
1035
1036 if (control == NULL)
1037 return (0);
1038
1039 if (oldflg)
1040 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1041 fdbuf_optlen(controllen)));
1042
1043 for (cmsg = (struct cmsghdr *)control;
1044 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1045 cmsg = CMSG_NEXT(cmsg)) {
1046 if (cmsg->cmsg_level == SOL_SOCKET &&
1047 cmsg->cmsg_type == SCM_RIGHTS) {
1048 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1049 } else {
1050 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1051 }
1052 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1053 sizeof (struct T_opthdr));
1054 }
1055 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1056 controllen, oldflg, optlen));
1057 return (optlen);
1058 }
1059
1060 /*
1061 * Copy options from control to the mblk. Skip any file descriptor options.
1062 */
1063 void
1064 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1065 {
1066 struct T_opthdr toh;
1067 struct cmsghdr *cmsg;
1068
1069 if (control == NULL)
1070 return;
1071
1072 if (oldflg) {
1073 /* No real options - caller has handled file descriptors */
1074 return;
1075 }
1076 for (cmsg = (struct cmsghdr *)control;
1077 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1078 cmsg = CMSG_NEXT(cmsg)) {
1079 /*
1080 * Note: The caller handles file descriptors prior
1081 * to calling this function.
1082 */
1083 t_uscalar_t len;
1084
1085 if (cmsg->cmsg_level == SOL_SOCKET &&
1086 cmsg->cmsg_type == SCM_RIGHTS)
1087 continue;
1088
1089 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1090 toh.level = cmsg->cmsg_level;
1091 toh.name = cmsg->cmsg_type;
1092 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1093 toh.status = 0;
1094
1095 soappendmsg(mp, &toh, sizeof (toh));
1096 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1097 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1098 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1099 }
1100 }
1101
1102 /*
1103 * Return the length of the control message derived from the options.
1104 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1105 * When oldflg is set only include SO_FILEP.
1106 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1107 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1108 * also be checked for any possible impacts.
1109 */
1110 t_uscalar_t
1111 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1112 {
1113 t_uscalar_t cmsglen = 0;
1114 struct T_opthdr *tohp;
1115 t_uscalar_t len;
1116 t_uscalar_t last_roundup = 0;
1117
1118 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1119
1120 for (tohp = (struct T_opthdr *)opt;
1121 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1122 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1123 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1124 tohp->level, tohp->name, tohp->len));
1125 if (tohp->level == SOL_SOCKET &&
1126 (tohp->name == SO_SRCADDR ||
1127 tohp->name == SO_UNIX_CLOSE)) {
1128 continue;
1129 }
1130 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1131 struct fdbuf *fdbuf;
1132 int fdbuflen;
1133
1134 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1135 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1136
1137 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1138 continue;
1139 if (oldflg) {
1140 cmsglen += fdbuf_cmsglen(fdbuflen);
1141 continue;
1142 }
1143 len = fdbuf_cmsglen(fdbuflen);
1144 } else if (tohp->level == SOL_SOCKET &&
1145 tohp->name == SCM_TIMESTAMP) {
1146 if (oldflg)
1147 continue;
1148
1149 if (get_udatamodel() == DATAMODEL_NATIVE) {
1150 len = sizeof (struct timeval);
1151 } else {
1152 len = sizeof (struct timeval32);
1153 }
1154 } else {
1155 if (oldflg)
1156 continue;
1157 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1158 }
1159 /*
1160 * Exclude roundup for last option to not set
1161 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1162 */
1163 last_roundup = (t_uscalar_t)
1164 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1165 (len + (int)sizeof (struct cmsghdr)));
1166 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1167 last_roundup;
1168 }
1169 cmsglen -= last_roundup;
1170 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1171 optlen, oldflg, cmsglen));
1172 return (cmsglen);
1173 }
1174
1175 /*
1176 * Copy options from options to the control. Convert SO_FILEP to
1177 * file descriptors.
1178 * Returns errno or zero.
1179 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1180 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1181 * also be checked for any possible impacts.
1182 */
1183 int
1184 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg,
1185 void *control, t_uscalar_t controllen)
1186 {
1187 struct T_opthdr *tohp;
1188 struct cmsghdr *cmsg;
1189 struct fdbuf *fdbuf;
1190 int fdbuflen;
1191 int error;
1192 #if defined(DEBUG) || defined(__lint)
1193 struct cmsghdr *cend = (struct cmsghdr *)
1194 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1195 #endif
1196 cmsg = (struct cmsghdr *)control;
1197
1198 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1199
1200 for (tohp = (struct T_opthdr *)opt;
1201 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1202 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1203 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1204 tohp->level, tohp->name, tohp->len));
1205
1206 if (tohp->level == SOL_SOCKET &&
1207 (tohp->name == SO_SRCADDR ||
1208 tohp->name == SO_UNIX_CLOSE)) {
1209 continue;
1210 }
1211 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1212 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1213 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1214 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1215
1216 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1217 return (EPROTO);
1218 if (oldflg) {
1219 error = fdbuf_extract(fdbuf, control,
1220 (int)controllen);
1221 if (error != 0)
1222 return (error);
1223 continue;
1224 } else {
1225 int fdlen;
1226
1227 fdlen = (int)fdbuf_cmsglen(
1228 (int)_TPI_TOPT_DATALEN(tohp));
1229
1230 cmsg->cmsg_level = tohp->level;
1231 cmsg->cmsg_type = SCM_RIGHTS;
1232 cmsg->cmsg_len = (socklen_t)(fdlen +
1233 sizeof (struct cmsghdr));
1234
1235 error = fdbuf_extract(fdbuf,
1236 CMSG_CONTENT(cmsg), fdlen);
1237 if (error != 0)
1238 return (error);
1239 }
1240 } else if (tohp->level == SOL_SOCKET &&
1241 tohp->name == SCM_TIMESTAMP) {
1242 timestruc_t *timestamp;
1243
1244 if (oldflg)
1245 continue;
1246
1247 cmsg->cmsg_level = tohp->level;
1248 cmsg->cmsg_type = tohp->name;
1249
1250 timestamp =
1251 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1252 sizeof (intptr_t));
1253
1254 if (get_udatamodel() == DATAMODEL_NATIVE) {
1255 struct timeval tv;
1256
1257 cmsg->cmsg_len = sizeof (struct timeval) +
1258 sizeof (struct cmsghdr);
1259 tv.tv_sec = timestamp->tv_sec;
1260 tv.tv_usec = timestamp->tv_nsec /
1261 (NANOSEC / MICROSEC);
1262 /*
1263 * on LP64 systems, the struct timeval in
1264 * the destination will not be 8-byte aligned,
1265 * so use bcopy to avoid alignment trouble
1266 */
1267 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
1268 } else {
1269 struct timeval32 *time32;
1270
1271 cmsg->cmsg_len = sizeof (struct timeval32) +
1272 sizeof (struct cmsghdr);
1273 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1274 time32->tv_sec = (time32_t)timestamp->tv_sec;
1275 time32->tv_usec =
1276 (int32_t)(timestamp->tv_nsec /
1277 (NANOSEC / MICROSEC));
1278 }
1279
1280 } else {
1281 if (oldflg)
1282 continue;
1283
1284 cmsg->cmsg_level = tohp->level;
1285 cmsg->cmsg_type = tohp->name;
1286 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) +
1287 sizeof (struct cmsghdr));
1288
1289 /* copy content to control data part */
1290 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
1291 CMSG_CONTENTLEN(cmsg));
1292 }
1293 /* move to next CMSG structure! */
1294 cmsg = CMSG_NEXT(cmsg);
1295 }
1296 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1297 control, controllen, (void *)cend, (void *)cmsg));
1298 ASSERT(cmsg <= cend);
1299 return (0);
1300 }
1301
1302 /*
1303 * Extract the SO_SRCADDR option value if present.
1304 */
1305 void
1306 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1307 t_uscalar_t *srclenp)
1308 {
1309 struct T_opthdr *tohp;
1310
1311 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1312
1313 ASSERT(srcp != NULL && srclenp != NULL);
1314 *srcp = NULL;
1315 *srclenp = 0;
1316
1317 for (tohp = (struct T_opthdr *)opt;
1318 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1319 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1320 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1321 tohp->level, tohp->name, tohp->len));
1322 if (tohp->level == SOL_SOCKET &&
1323 tohp->name == SO_SRCADDR) {
1324 *srcp = _TPI_TOPT_DATA(tohp);
1325 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1326 }
1327 }
1328 }
1329
1330 /*
1331 * Verify if the SO_UNIX_CLOSE option is present.
1332 */
1333 int
1334 so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1335 {
1336 struct T_opthdr *tohp;
1337
1338 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1339
1340 for (tohp = (struct T_opthdr *)opt;
1341 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1342 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1343 dprint(1,
1344 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1345 tohp->level, tohp->name, tohp->len));
1346 if (tohp->level == SOL_SOCKET &&
1347 tohp->name == SO_UNIX_CLOSE)
1348 return (1);
1349 }
1350 return (0);
1351 }
1352
1353 /*
1354 * Allocate an M_PROTO message.
1355 *
1356 * If allocation fails the behavior depends on sleepflg:
1357 * _ALLOC_NOSLEEP fail immediately
1358 * _ALLOC_INTR sleep for memory until a signal is caught
1359 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1360 */
1361 mblk_t *
1362 soallocproto(size_t size, int sleepflg, cred_t *cr)
1363 {
1364 mblk_t *mp;
1365
1366 /* Round up size for reuse */
1367 size = MAX(size, 64);
1368 if (cr != NULL)
1369 mp = allocb_cred(size, cr, curproc->p_pid);
1370 else
1371 mp = allocb(size, BPRI_MED);
1372
1373 if (mp == NULL) {
1374 int error; /* Dummy - error not returned to caller */
1375
1376 switch (sleepflg) {
1377 case _ALLOC_SLEEP:
1378 if (cr != NULL) {
1379 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1380 cr, curproc->p_pid);
1381 } else {
1382 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1383 &error);
1384 }
1385 ASSERT(mp);
1386 break;
1387 case _ALLOC_INTR:
1388 if (cr != NULL) {
1389 mp = allocb_cred_wait(size, 0, &error, cr,
1390 curproc->p_pid);
1391 } else {
1392 mp = allocb_wait(size, BPRI_MED, 0, &error);
1393 }
1394 if (mp == NULL) {
1395 /* Caught signal while sleeping for memory */
1396 eprintline(ENOBUFS);
1397 return (NULL);
1398 }
1399 break;
1400 case _ALLOC_NOSLEEP:
1401 default:
1402 eprintline(ENOBUFS);
1403 return (NULL);
1404 }
1405 }
1406 DB_TYPE(mp) = M_PROTO;
1407 return (mp);
1408 }
1409
1410 /*
1411 * Allocate an M_PROTO message with a single component.
1412 * len is the length of buf. size is the amount to allocate.
1413 *
1414 * buf can be NULL with a non-zero len.
1415 * This results in a bzero'ed chunk being placed the message.
1416 */
1417 mblk_t *
1418 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1419 cred_t *cr)
1420 {
1421 mblk_t *mp;
1422
1423 if (size == 0)
1424 size = len;
1425
1426 ASSERT(size >= len);
1427 /* Round up size for reuse */
1428 size = MAX(size, 64);
1429 mp = soallocproto(size, sleepflg, cr);
1430 if (mp == NULL)
1431 return (NULL);
1432 mp->b_datap->db_type = M_PROTO;
1433 if (len != 0) {
1434 if (buf != NULL)
1435 bcopy(buf, mp->b_wptr, len);
1436 else
1437 bzero(mp->b_wptr, len);
1438 mp->b_wptr += len;
1439 }
1440 return (mp);
1441 }
1442
1443 /*
1444 * Append buf/len to mp.
1445 * The caller has to ensure that there is enough room in the mblk.
1446 *
1447 * buf can be NULL with a non-zero len.
1448 * This results in a bzero'ed chunk being placed the message.
1449 */
1450 void
1451 soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1452 {
1453 ASSERT(mp);
1454
1455 if (len != 0) {
1456 /* Assert for room left */
1457 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1458 if (buf != NULL)
1459 bcopy(buf, mp->b_wptr, len);
1460 else
1461 bzero(mp->b_wptr, len);
1462 }
1463 mp->b_wptr += len;
1464 }
1465
1466 /*
1467 * Create a message using two kernel buffers.
1468 * If size is set that will determine the allocation size (e.g. for future
1469 * soappendmsg calls). If size is zero it is derived from the buffer
1470 * lengths.
1471 */
1472 mblk_t *
1473 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1474 ssize_t size, int sleepflg, cred_t *cr)
1475 {
1476 mblk_t *mp;
1477
1478 if (size == 0)
1479 size = len1 + len2;
1480 ASSERT(size >= len1 + len2);
1481
1482 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1483 if (mp)
1484 soappendmsg(mp, buf2, len2);
1485 return (mp);
1486 }
1487
1488 /*
1489 * Create a message using three kernel buffers.
1490 * If size is set that will determine the allocation size (for future
1491 * soappendmsg calls). If size is zero it is derived from the buffer
1492 * lengths.
1493 */
1494 mblk_t *
1495 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1496 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
1497 {
1498 mblk_t *mp;
1499
1500 if (size == 0)
1501 size = len1 + len2 +len3;
1502 ASSERT(size >= len1 + len2 + len3);
1503
1504 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1505 if (mp != NULL) {
1506 soappendmsg(mp, buf2, len2);
1507 soappendmsg(mp, buf3, len3);
1508 }
1509 return (mp);
1510 }
1511
1512 #ifdef DEBUG
1513 char *
1514 pr_state(uint_t state, uint_t mode)
1515 {
1516 static char buf[1024];
1517
1518 buf[0] = 0;
1519 if (state & SS_ISCONNECTED)
1520 (void) strcat(buf, "ISCONNECTED ");
1521 if (state & SS_ISCONNECTING)
1522 (void) strcat(buf, "ISCONNECTING ");
1523 if (state & SS_ISDISCONNECTING)
1524 (void) strcat(buf, "ISDISCONNECTING ");
1525 if (state & SS_CANTSENDMORE)
1526 (void) strcat(buf, "CANTSENDMORE ");
1527
1528 if (state & SS_CANTRCVMORE)
1529 (void) strcat(buf, "CANTRCVMORE ");
1530 if (state & SS_ISBOUND)
1531 (void) strcat(buf, "ISBOUND ");
1532 if (state & SS_NDELAY)
1533 (void) strcat(buf, "NDELAY ");
1534 if (state & SS_NONBLOCK)
1535 (void) strcat(buf, "NONBLOCK ");
1536
1537 if (state & SS_ASYNC)
1538 (void) strcat(buf, "ASYNC ");
1539 if (state & SS_ACCEPTCONN)
1540 (void) strcat(buf, "ACCEPTCONN ");
1541 if (state & SS_SAVEDEOR)
1542 (void) strcat(buf, "SAVEDEOR ");
1543
1544 if (state & SS_RCVATMARK)
1545 (void) strcat(buf, "RCVATMARK ");
1546 if (state & SS_OOBPEND)
1547 (void) strcat(buf, "OOBPEND ");
1548 if (state & SS_HAVEOOBDATA)
1549 (void) strcat(buf, "HAVEOOBDATA ");
1550 if (state & SS_HADOOBDATA)
1551 (void) strcat(buf, "HADOOBDATA ");
1552
1553 if (mode & SM_PRIV)
1554 (void) strcat(buf, "PRIV ");
1555 if (mode & SM_ATOMIC)
1556 (void) strcat(buf, "ATOMIC ");
1557 if (mode & SM_ADDR)
1558 (void) strcat(buf, "ADDR ");
1559 if (mode & SM_CONNREQUIRED)
1560 (void) strcat(buf, "CONNREQUIRED ");
1561
1562 if (mode & SM_FDPASSING)
1563 (void) strcat(buf, "FDPASSING ");
1564 if (mode & SM_EXDATA)
1565 (void) strcat(buf, "EXDATA ");
1566 if (mode & SM_OPTDATA)
1567 (void) strcat(buf, "OPTDATA ");
1568 if (mode & SM_BYTESTREAM)
1569 (void) strcat(buf, "BYTESTREAM ");
1570 return (buf);
1571 }
1572
1573 char *
1574 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1575 {
1576 static char buf[1024];
1577
1578 if (addr == NULL || addrlen == 0) {
1579 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
1580 return (buf);
1581 }
1582 switch (family) {
1583 case AF_INET: {
1584 struct sockaddr_in sin;
1585
1586 bcopy(addr, &sin, sizeof (sin));
1587
1588 (void) sprintf(buf, "(len %d) %x/%d",
1589 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1590 break;
1591 }
1592 case AF_INET6: {
1593 struct sockaddr_in6 sin6;
1594 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1595
1596 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
1597 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1598 addrlen,
1599 ntohs(piece[0]), ntohs(piece[1]),
1600 ntohs(piece[2]), ntohs(piece[3]),
1601 ntohs(piece[4]), ntohs(piece[5]),
1602 ntohs(piece[6]), ntohs(piece[7]),
1603 ntohs(sin6.sin6_port));
1604 break;
1605 }
1606 case AF_UNIX: {
1607 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1608
1609 (void) sprintf(buf, "(len %d) %s", addrlen,
1610 (soun == NULL) ? "(none)" : soun->sun_path);
1611 break;
1612 }
1613 default:
1614 (void) sprintf(buf, "(unknown af %d)", family);
1615 break;
1616 }
1617 return (buf);
1618 }
1619
1620 /* The logical equivalence operator (a if-and-only-if b) */
1621 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1622
1623 /*
1624 * Verify limitations and invariants on oob state.
1625 * Return 1 if OK, otherwise 0 so that it can be used as
1626 * ASSERT(verify_oobstate(so));
1627 */
1628 int
1629 so_verify_oobstate(struct sonode *so)
1630 {
1631 boolean_t havemark;
1632
1633 ASSERT(MUTEX_HELD(&so->so_lock));
1634
1635 /*
1636 * The possible state combinations are:
1637 * 0
1638 * SS_OOBPEND
1639 * SS_OOBPEND|SS_HAVEOOBDATA
1640 * SS_OOBPEND|SS_HADOOBDATA
1641 * SS_HADOOBDATA
1642 */
1643 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1644 case 0:
1645 case SS_OOBPEND:
1646 case SS_OOBPEND|SS_HAVEOOBDATA:
1647 case SS_OOBPEND|SS_HADOOBDATA:
1648 case SS_HADOOBDATA:
1649 break;
1650 default:
1651 printf("Bad oob state 1 (%p): state %s\n",
1652 (void *)so, pr_state(so->so_state, so->so_mode));
1653 return (0);
1654 }
1655
1656 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1657 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
1658 printf("Bad oob state 2 (%p): state %s\n",
1659 (void *)so, pr_state(so->so_state, so->so_mode));
1660 return (0);
1661 }
1662
1663 /*
1664 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1665 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1666 */
1667 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1668 SOTOTPI(so)->sti_oobsigcnt > 0;
1669
1670 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
1671 so->so_state & SS_OOBPEND)) {
1672 printf("Bad oob state 3 (%p): state %s\n",
1673 (void *)so, pr_state(so->so_state, so->so_mode));
1674 return (0);
1675 }
1676
1677 /*
1678 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1679 */
1680 if (!(so->so_options & SO_OOBINLINE) &&
1681 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
1682 printf("Bad oob state 4 (%p): state %s\n",
1683 (void *)so, pr_state(so->so_state, so->so_mode));
1684 return (0);
1685 }
1686
1687 if (!SOCK_IS_NONSTR(so) &&
1688 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
1689 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1690 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1691 SOTOTPI(so)->sti_oobcnt,
1692 pr_state(so->so_state, so->so_mode));
1693 return (0);
1694 }
1695
1696 return (1);
1697 }
1698 #undef EQUIVALENT
1699 #endif /* DEBUG */
1700
1701 /* initialize sockfs zone specific kstat related items */
1702 void *
1703 sock_kstat_init(zoneid_t zoneid)
1704 {
1705 kstat_t *ksp;
1706
1707 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1708 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1709
1710 if (ksp != NULL) {
1711 ksp->ks_update = sockfs_update;
1712 ksp->ks_snapshot = sockfs_snapshot;
1713 ksp->ks_lock = &socklist.sl_lock;
1714 ksp->ks_private = (void *)(uintptr_t)zoneid;
1715 kstat_install(ksp);
1716 }
1717
1718 return (ksp);
1719 }
1720
1721 /* tear down sockfs zone specific kstat related items */
1722 /*ARGSUSED*/
1723 void
1724 sock_kstat_fini(zoneid_t zoneid, void *arg)
1725 {
1726 kstat_t *ksp = (kstat_t *)arg;
1727
1728 if (ksp != NULL) {
1729 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1730 kstat_delete(ksp);
1731 }
1732 }
1733
1734 /*
1735 * Zones:
1736 * Note that nactive is going to be different for each zone.
1737 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1738 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1739 * buffer. This is safe, but if the buffer is too small, user will not be
1740 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1741 * driver will keep it locked between the update and the snapshot, so no
1742 * other process (zone) can currently get inbetween resulting in a wrong size
1743 * buffer allocation.
1744 */
1745 static int
1746 sockfs_update(kstat_t *ksp, int rw)
1747 {
1748 uint_t n, nactive = 0; /* # of active AF_UNIX sockets */
1749 uint_t tsze = 0;
1750 struct sonode *so; /* current sonode on socklist */
1751 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1752
1753 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1754
1755 if (rw == KSTAT_WRITE) { /* bounce all writes */
1756 return (EACCES);
1757 }
1758
1759 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1760 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
1761
1762 nactive++;
1763
1764 mutex_enter(&so->so_pid_tree_lock);
1765 n = avl_numnodes(&so->so_pid_tree);
1766 mutex_exit(&so->so_pid_tree_lock);
1767
1768 tsze += sizeof (struct sockinfo);
1769 tsze += (n > 1) ? ((n - 1) * sizeof (pid_t)) : 0;
1770 }
1771 }
1772 ksp->ks_ndata = nactive;
1773 ksp->ks_data_size = tsze;
1774
1775 return (0);
1776 }
1777
1778 static int
1779 sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1780 {
1781 int ns; /* # of sonodes we've copied */
1782 struct sonode *so; /* current sonode on socklist */
1783 struct sockinfo *psi; /* where we put sockinfo data */
1784 t_uscalar_t sn_len; /* soa_len */
1785 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1786 sotpi_info_t *sti;
1787
1788 uint_t sze;
1789 mblk_t *mblk;
1790 conn_pid_info_t *cpi;
1791
1792 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1793
1794 ksp->ks_snaptime = gethrtime();
1795
1796 if (rw == KSTAT_WRITE) { /* bounce all writes */
1797 return (EACCES);
1798 }
1799
1800 /*
1801 * for each sonode on the socklist, we massage the important
1802 * info into buf, in k_sockinfo format.
1803 */
1804 psi = (struct sockinfo *)buf;
1805 ns = 0;
1806 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1807 /* only stuff active sonodes and the same zone: */
1808 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
1809 continue;
1810 }
1811
1812 mblk = so_get_sock_pid_mblk((sock_upper_handle_t)so);
1813 if (mblk == NULL) {
1814 continue;
1815 }
1816 cpi = (conn_pid_info_t *)mblk->b_datap->db_base;
1817 sze = sizeof (struct sockinfo);
1818 sze += (cpi->cpi_pids_cnt > 1) ?
1819 ((cpi->cpi_pids_cnt - 1) * sizeof (pid_t)) : 0;
1820
1821 /*
1822 * If the sonode was activated between the update and the
1823 * snapshot, we're done - as this is only a snapshot. We need
1824 * to make sure that we have space for this sockinfo. In the
1825 * time window between the update and the snapshot, the size of
1826 * sockinfo may change, as new pids are added/removed to/from
1827 * the list. We have to take that into consideration and only
1828 * include the sockinfo if we have enough space. That means the
1829 * number of entries we return by snapshot might not equal the
1830 * the number of entries calculated by update.
1831 */
1832 if (((caddr_t)(psi) + sze) >
1833 ((caddr_t)buf + ksp->ks_data_size)) {
1834 break;
1835 }
1836
1837 sti = SOTOTPI(so);
1838 /* copy important info into buf: */
1839 psi->si_size = sze;
1840 psi->si_family = so->so_family;
1841 psi->si_type = so->so_type;
1842 psi->si_flag = so->so_flag;
1843 psi->si_state = so->so_state;
1844 psi->si_serv_type = sti->sti_serv_type;
1845 psi->si_ux_laddr_sou_magic =
1846 sti->sti_ux_laddr.soua_magic;
1847 psi->si_ux_faddr_sou_magic =
1848 sti->sti_ux_faddr.soua_magic;
1849 psi->si_laddr_soa_len = sti->sti_laddr.soa_len;
1850 psi->si_faddr_soa_len = sti->sti_faddr.soa_len;
1851 psi->si_szoneid = so->so_zoneid;
1852 psi->si_faddr_noxlate = sti->sti_faddr_noxlate;
1853
1854
1855 mutex_enter(&so->so_lock);
1856
1857 if (sti->sti_laddr_sa != NULL) {
1858 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1859 sn_len = sti->sti_laddr_len;
1860 ASSERT(sn_len <= sizeof (short) +
1861 sizeof (psi->si_laddr_sun_path));
1862
1863 psi->si_laddr_family =
1864 sti->sti_laddr_sa->sa_family;
1865 if (sn_len != 0) {
1866 /* AF_UNIX socket names are NULL terminated */
1867 (void) strncpy(psi->si_laddr_sun_path,
1868 sti->sti_laddr_sa->sa_data,
1869 sizeof (psi->si_laddr_sun_path));
1870 sn_len = strlen(psi->si_laddr_sun_path);
1871 }
1872 psi->si_laddr_sun_path[sn_len] = 0;
1873 }
1874
1875 if (sti->sti_faddr_sa != NULL) {
1876 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1877 sn_len = sti->sti_faddr_len;
1878 ASSERT(sn_len <= sizeof (short) +
1879 sizeof (psi->si_faddr_sun_path));
1880
1881 psi->si_faddr_family =
1882 sti->sti_faddr_sa->sa_family;
1883 if (sn_len != 0) {
1884 (void) strncpy(psi->si_faddr_sun_path,
1885 sti->sti_faddr_sa->sa_data,
1886 sizeof (psi->si_faddr_sun_path));
1887 sn_len = strlen(psi->si_faddr_sun_path);
1888 }
1889 psi->si_faddr_sun_path[sn_len] = 0;
1890 }
1891
1892 mutex_exit(&so->so_lock);
1893
1894 (void) sprintf(psi->si_son_straddr, "%p", (void *)so);
1895 (void) sprintf(psi->si_lvn_straddr, "%p",
1896 (void *)sti->sti_ux_laddr.soua_vp);
1897 (void) sprintf(psi->si_fvn_straddr, "%p",
1898 (void *)sti->sti_ux_faddr.soua_vp);
1899
1900 psi->si_pids[0] = 0;
1901 if ((psi->si_pn_cnt = cpi->cpi_pids_cnt) > 0) {
1902 (void) memcpy(psi->si_pids, cpi->cpi_pids,
1903 psi->si_pn_cnt * sizeof (pid_t));
1904 }
1905
1906 freemsg(mblk);
1907
1908 psi = (struct sockinfo *)((caddr_t)psi + psi->si_size);
1909 ns++;
1910 }
1911
1912 ksp->ks_ndata = ns;
1913 return (0);
1914 }
1915
1916 ssize_t
1917 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1918 {
1919 struct uio auio;
1920 struct iovec aiov[MSG_MAXIOVLEN];
1921 register vnode_t *vp;
1922 int ioflag, rwflag;
1923 ssize_t cnt;
1924 int error = 0;
1925 int iovcnt = 0;
1926 short fflag;
1927
1928 vp = fp->f_vnode;
1929 fflag = fp->f_flag;
1930
1931 rwflag = 0;
1932 aiov[0].iov_base = (caddr_t)buf;
1933 aiov[0].iov_len = size;
1934 iovcnt = 1;
1935 cnt = (ssize_t)size;
1936 (void) VOP_RWLOCK(vp, rwflag, NULL);
1937
1938 auio.uio_loffset = fileoff;
1939 auio.uio_iov = aiov;
1940 auio.uio_iovcnt = iovcnt;
1941 auio.uio_resid = cnt;
1942 auio.uio_segflg = UIO_SYSSPACE;
1943 auio.uio_llimit = MAXOFFSET_T;
1944 auio.uio_fmode = fflag;
1945 auio.uio_extflg = UIO_COPY_CACHED;
1946
1947 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1948
1949 /* If read sync is not asked for, filter sync flags */
1950 if ((ioflag & FRSYNC) == 0)
1951 ioflag &= ~(FSYNC|FDSYNC);
1952 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1953 cnt -= auio.uio_resid;
1954
1955 VOP_RWUNLOCK(vp, rwflag, NULL);
1956
1957 if (error == EINTR && cnt != 0)
1958 error = 0;
1959 out:
1960 if (error != 0) {
1961 *err = error;
1962 return (0);
1963 } else {
1964 *err = 0;
1965 return (cnt);
1966 }
1967 }
1968
1969 int
1970 so_copyin(const void *from, void *to, size_t size, int fromkernel)
1971 {
1972 if (fromkernel) {
1973 bcopy(from, to, size);
1974 return (0);
1975 }
1976 return (xcopyin(from, to, size));
1977 }
1978
1979 int
1980 so_copyout(const void *from, void *to, size_t size, int tokernel)
1981 {
1982 if (tokernel) {
1983 bcopy(from, to, size);
1984 return (0);
1985 }
1986 return (xcopyout(from, to, size));
1987 }