Print this page
12046 Provide /proc/<PID>/fdinfo/

@@ -20,10 +20,11 @@
  */
 
 /*
  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
  */
 
 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
 /*        All Rights Reserved   */
 

@@ -63,10 +64,22 @@
 #include <sys/processor.h>
 #include <sys/cpuvar.h>
 #include <sys/copyops.h>
 #include <sys/time.h>
 #include <sys/msacct.h>
+#include <sys/flock_impl.h>
+#include <sys/stropts.h>
+#include <sys/strsubr.h>
+#include <sys/pathname.h>
+#include <sys/mode.h>
+#include <sys/socketvar.h>
+#include <sys/autoconf.h>
+#include <sys/dtrace.h>
+#include <sys/timod.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <inet/cc.h>
 #include <vm/as.h>
 #include <vm/rm.h>
 #include <vm/seg.h>
 #include <vm/seg_vn.h>
 #include <vm/seg_dev.h>

@@ -1452,11 +1465,62 @@
         } while (cp > cbuf);
 
         return (len);
 }
 
+file_t *
+pr_getf(proc_t *p, uint_t fd, short *flag)
+{
+        uf_entry_t *ufp;
+        uf_info_t *fip;
+        file_t *fp;
+
+        ASSERT(MUTEX_HELD(&p->p_lock));
+
+        fip = P_FINFO(p);
+
+        if (fd >= fip->fi_nfiles)
+                return (NULL);
+
+        mutex_exit(&p->p_lock);
+        mutex_enter(&fip->fi_lock);
+        UF_ENTER(ufp, fip, fd);
+        if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
+                if (flag != NULL)
+                        *flag = ufp->uf_flag;
+                ufp->uf_refcnt++;
+        } else {
+                fp = NULL;
+        }
+        UF_EXIT(ufp);
+        mutex_exit(&fip->fi_lock);
+        mutex_enter(&p->p_lock);
+
+        return (fp);
+}
+
 void
+pr_releasef(proc_t *p, uint_t fd)
+{
+        uf_entry_t *ufp;
+        uf_info_t *fip;
+
+        ASSERT(MUTEX_HELD(&p->p_lock));
+
+        fip = P_FINFO(p);
+
+        mutex_exit(&p->p_lock);
+        mutex_enter(&fip->fi_lock);
+        UF_ENTER(ufp, fip, fd);
+        ASSERT(ufp->uf_refcnt > 0);
+        ufp->uf_refcnt--;
+        UF_EXIT(ufp);
+        mutex_exit(&fip->fi_lock);
+        mutex_enter(&p->p_lock);
+}
+
+void
 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
 {
         char *s = name;
         struct vfs *vfsp;
         struct vfssw *vfsswp;

@@ -1561,10 +1625,22 @@
         iol->piol_usedsize += itemsize;
         bzero(new, itemsize);
         return (new);
 }
 
+void
+pr_iol_freelist(list_t *iolhead)
+{
+        piol_t  *iol;
+
+        while ((iol = list_head(iolhead)) != NULL) {
+                list_remove(iolhead, iol);
+                kmem_free(iol, iol->piol_size);
+        }
+        list_destroy(iolhead);
+}
+
 int
 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
 {
         int error = errin;
         piol_t  *iol;

@@ -2373,10 +2449,479 @@
                         mutex_enter(&p->p_lock);
                 }
         }
 }
 
+static size_t
+prfdinfomisc(list_t *data, uint_t type, void *val, off_t vlen)
+{
+        pr_misc_header_t *misc;
+        size_t len;
+
+        len = sizeof (*misc) + vlen;
+
+        if (data != NULL) {
+                misc = pr_iol_newbuf(data, len);
+                misc->pr_misc_type = type;
+                misc->pr_misc_size = len;
+                misc++;
+                bcopy((char *)val, (char *)misc, vlen);
+        }
+
+        return (len);
+}
+
+/*
+ * There's no elegant way to determine if a character device
+ * supports TLI, so just check a hardcoded list of known TLI
+ * devices.
+ */
+
+static boolean_t
+pristli(vnode_t *vp)
+{
+        static const char *tlidevs[] = {
+            "udp", "udp6", "tcp", "tcp6", NULL
+        };
+        struct devnames *dnp;
+        major_t major;
+        uint_t i;
+
+        ASSERT(vp != NULL);
+        ASSERT(vp->v_type == VCHR);
+
+        if (vp->v_rdev == 0)
+                return (B_FALSE);
+
+        major = getmajor(vp->v_rdev);
+        if (major == DDI_MAJOR_T_NONE || major > devcnt)
+                return (B_FALSE);
+
+        dnp = &devnamesp[major];
+
+        for (i = 0; tlidevs[i] != NULL; i++) {
+                if (strcmp(dnp->dn_name, tlidevs[i]) == 0)
+                        return (B_TRUE);
+        }
+
+        return (B_FALSE);
+}
+
+static size_t
+prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
+{
+        char pathname[MAXPATHLEN];
+        vnode_t *vrootp;
+        size_t sz = 0;
+
+        mutex_enter(&p->p_lock);
+        if ((vrootp = PTOU(p)->u_rdir) == NULL)
+                vrootp = rootdir;
+        VN_HOLD(vrootp);
+        mutex_exit(&p->p_lock);
+
+        if (vnodetopath(vrootp, vp, pathname, sizeof (pathname), cred) == 0) {
+                sz += prfdinfomisc(data, PR_PATHNAME,
+                    pathname, strlen(pathname));
+        }
+        VN_RELE(vrootp);
+
+        return (sz);
+}
+
+static size_t
+prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
+{
+        strcmd_t strcmd;
+        int32_t rval;
+        size_t sz = 0;
+
+        strcmd.sc_cmd = TI_GETMYNAME;
+        strcmd.sc_timeout = 1;
+        strcmd.sc_len = STRCMDBUFSIZE;
+
+        if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
+            &rval, NULL) == 0 && strcmd.sc_len > 0) {
+                sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
+                    strcmd.sc_len);
+        }
+
+        strcmd.sc_cmd = TI_GETPEERNAME;
+        strcmd.sc_timeout = 1;
+        strcmd.sc_len = STRCMDBUFSIZE;
+
+        if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
+            &rval, NULL) == 0 && strcmd.sc_len > 0) {
+                sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
+                    strcmd.sc_len);
+        }
+
+        return (sz);
+}
+
+static size_t
+prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
+{
+        sonode_t *so;
+        socklen_t vlen;
+        size_t sz = 0;
+        uint_t i;
+
+        if (vp->v_stream) {
+                so = VTOSO(vp->v_stream->sd_vnode);
+
+                if (so->so_version == SOV_STREAM)
+                        so = NULL;
+        } else {
+                so = VTOSO(vp);
+        }
+
+        if (so == NULL)
+                return (0);
+
+        DTRACE_PROBE1(sonode, sonode_t *, so);
+
+        /* prmisc - PR_SOCKETNAME */
+
+        struct sockaddr_storage buf;
+        struct sockaddr *name = (struct sockaddr *)&buf;
+
+        vlen = sizeof (buf);
+        if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
+                sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
+
+        /* prmisc - PR_PEERSOCKNAME */
+
+        vlen = sizeof (buf);
+        if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
+                sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
+
+        /* prmisc - PR_SOCKOPTS_BOOL_OPTS */
+
+        static struct boolopt {
+                int             level;
+                int             opt;
+                int             bopt;
+        } boolopts[] = {
+                { SOL_SOCKET, SO_DEBUG,         PR_SO_DEBUG },
+                { SOL_SOCKET, SO_REUSEADDR,     PR_SO_REUSEADDR },
+#ifdef SO_REUSEPORT
+                /* SmartOS and OmniOS have SO_REUSEPORT */
+                { SOL_SOCKET, SO_REUSEPORT,     PR_SO_REUSEPORT },
+#endif
+                { SOL_SOCKET, SO_KEEPALIVE,     PR_SO_KEEPALIVE },
+                { SOL_SOCKET, SO_DONTROUTE,     PR_SO_DONTROUTE },
+                { SOL_SOCKET, SO_BROADCAST,     PR_SO_BROADCAST },
+                { SOL_SOCKET, SO_OOBINLINE,     PR_SO_OOBINLINE },
+                { SOL_SOCKET, SO_DGRAM_ERRIND,  PR_SO_DGRAM_ERRIND },
+                { SOL_SOCKET, SO_ALLZONES,      PR_SO_ALLZONES },
+                { SOL_SOCKET, SO_MAC_EXEMPT,    PR_SO_MAC_EXEMPT },
+                { SOL_SOCKET, SO_MAC_IMPLICIT,  PR_SO_MAC_IMPLICIT },
+                { SOL_SOCKET, SO_EXCLBIND,      PR_SO_EXCLBIND },
+                { SOL_SOCKET, SO_VRRP,          PR_SO_VRRP },
+                { IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
+                    PR_UDP_NAT_T_ENDPOINT }
+        };
+        prsockopts_bool_opts_t opts;
+        int val;
+
+        if (data != NULL) {
+                opts.prsock_bool_opts = 0;
+
+                for (i = 0; i < sizeof (boolopts) / sizeof (boolopts[0]);
+                    i++) {
+                        vlen = sizeof (val);
+                        if (SOP_GETSOCKOPT(so, boolopts[i].level,
+                            boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
+                            val != 0) {
+                                opts.prsock_bool_opts |= boolopts[i].bopt;
+                        }
+                }
+        }
+
+        sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
+
+        /* prmisc - PR_SOCKOPT_LINGER */
+
+        struct linger l;
+
+        vlen = sizeof (l);
+        if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
+            0, cred) == 0 && vlen > 0) {
+                sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
+        }
+
+        /* prmisc - PR_SOCKOPT_* int types */
+
+        static struct sopt {
+                int             level;
+                int             opt;
+                int             bopt;
+        } sopts[] = {
+                { SOL_SOCKET, SO_TYPE,          PR_SOCKOPT_TYPE },
+                { SOL_SOCKET, SO_SNDBUF,        PR_SOCKOPT_SNDBUF },
+                { SOL_SOCKET, SO_RCVBUF,        PR_SOCKOPT_RCVBUF }
+        };
+
+        for (i = 0; i < sizeof (sopts) / sizeof (sopts[0]); i++) {
+                vlen = sizeof (val);
+                if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
+                    &val, &vlen, 0, cred) == 0 && vlen > 0) {
+                        sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
+                }
+        }
+
+        /* prmisc - PR_SOCKOPT_IP_NEXTHOP */
+
+        in_addr_t nexthop_val;
+
+        vlen = sizeof (nexthop_val);
+        if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
+            &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
+                sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
+                    &nexthop_val, vlen);
+        }
+
+        /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
+
+        struct sockaddr_in6 nexthop6_val;
+
+        vlen = sizeof (nexthop6_val);
+        if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
+            &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
+                sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
+                    &nexthop6_val, vlen);
+        }
+
+        /* prmisc - PR_SOCKOPT_TCP_CONGESTION */
+
+        char cong[CC_ALGO_NAME_MAX];
+
+        vlen = sizeof (cong);
+        if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
+            &cong, &vlen, 0, cred) == 0 && vlen > 0) {
+                sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
+        }
+
+        /* prmisc - PR_SOCKFILTERS_PRIV */
+
+        struct fil_info fi;
+
+        vlen = sizeof (fi);
+        if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
+            &fi, &vlen, 0, cred) == 0 && vlen != 0) {
+                pr_misc_header_t *misc;
+                size_t len;
+
+                /*
+                 * We limit the number of returned filters to 32.
+                 * This is the maximum number that pfiles will print
+                 * anyway.
+                 */
+                vlen = MIN(32, fi.fi_pos + 1);
+                vlen *= sizeof (fi);
+
+                len = sizeof (*misc) + vlen;
+                sz += len;
+
+                if (data != NULL) {
+                        misc = pr_iol_newbuf(data, len);
+                        misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
+                        misc->pr_misc_size = len;
+                        misc++;
+                        len = vlen;
+                        if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
+                            misc, &vlen, 0, cred) == 0) {
+                                /*
+                                 * In case the number of filters has reduced
+                                 * since the first call, explicitly zero out
+                                 * any unpopulated space.
+                                 */
+                                if (vlen < len)
+                                        bzero(misc + vlen, len - vlen);
+                        } else {
+                                /* Something went wrong, zero out the result */
+                                bzero(misc, vlen);
+                        }
+                }
+        }
+
+        return (sz);
+}
+
+u_offset_t
+prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
+{
+        u_offset_t sz;
+
+        /*
+         * All fdinfo files will be at least this big -
+         * sizeof fdinfo struct + zero length trailer
+         */
+        sz = offsetof(prfdinfov2_t, pr_misc) + sizeof (pr_misc_header_t);
+
+        /* Pathname */
+        if (vp->v_type != VSOCK && vp->v_type != VDOOR)
+                sz += prfdinfopath(p, vp, NULL, cred);
+
+        /* Socket options */
+        if (vp->v_type == VSOCK)
+                sz += prfdinfosockopt(vp, NULL, cred);
+
+        /* TLI/XTI sockets */
+        if (vp->v_type == VCHR && pristli(vp) && vp->v_stream != NULL)
+                sz += prfdinfotlisockopt(vp, NULL, cred);
+
+        return (sz);
+}
+
+int
+prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfov2_t *fdinfo, cred_t *cred,
+    list_t *data)
+{
+        vattr_t vattr;
+        int error;
+
+        if (vp == NULL)
+                return (ENOENT);
+
+        /*
+         * Initialise defaults for values that do not default to zero.
+         */
+        fdinfo->pr_uid = (uid_t)-1;
+        fdinfo->pr_gid = (gid_t)-1;
+        fdinfo->pr_size = -1;
+        fdinfo->pr_locktype = F_UNLCK;
+        fdinfo->pr_lockpid = -1;
+        fdinfo->pr_locksysid = -1;
+        fdinfo->pr_peerpid = -1;
+
+        /* Offset */
+
+        switch (vp->v_type) {
+        case VFIFO:
+        case VDOOR:
+        case VSOCK:
+                /* Do not provide an offset for these file types */
+                fdinfo->pr_offset = -1;
+                break;
+        }
+
+        /* Attributes */
+        vattr.va_mask = AT_STAT;
+        if (VOP_GETATTR(vp, &vattr, 0, cred, NULL) == 0) {
+                fdinfo->pr_major = getmajor(vattr.va_fsid);
+                fdinfo->pr_minor = getminor(vattr.va_fsid);
+                fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
+                fdinfo->pr_rminor = getminor(vattr.va_rdev);
+                fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
+                fdinfo->pr_size = (off64_t)vattr.va_size;
+                fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
+                fdinfo->pr_uid = vattr.va_uid;
+                fdinfo->pr_gid = vattr.va_gid;
+                if (vp->v_type == VSOCK)
+                        fdinfo->pr_fileflags |= sock_getfasync(vp);
+        }
+
+        /* locks */
+
+        flock64_t bf;
+
+        bzero(&bf, sizeof (bf));
+        bf.l_type = F_WRLCK;
+
+        if (VOP_FRLOCK(vp, F_GETLK, &bf,
+            (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
+            cred, NULL) == 0 && bf.l_type != F_UNLCK) {
+                fdinfo->pr_locktype = bf.l_type;
+                fdinfo->pr_lockpid = bf.l_pid;
+                fdinfo->pr_locksysid = bf.l_sysid;
+        }
+
+        /* peer cred */
+
+        k_peercred_t kpc;
+
+        switch (vp->v_type) {
+        case VFIFO:
+        case VSOCK: {
+                int32_t rval;
+
+                error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
+                    FKIOCTL, cred, &rval, NULL);
+                break;
+        }
+        case VCHR: {
+                struct strioctl strioc;
+                int32_t rval;
+
+                if (vp->v_stream == NULL) {
+                        error = ENOTSUP;
+                        break;
+                }
+                strioc.ic_cmd = _I_GETPEERCRED;
+                strioc.ic_timout = INFTIM;
+                strioc.ic_len = (int)sizeof (k_peercred_t);
+                strioc.ic_dp = (char *)&kpc;
+
+                error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
+                    STR_NOSIG | K_TO_K, cred, &rval);
+                break;
+        }
+        default:
+                error = ENOTSUP;
+        }
+
+        if (error == 0 && kpc.pc_cr != NULL) {
+                proc_t *peerp;
+
+                fdinfo->pr_peerpid = kpc.pc_cpid;
+
+                crfree(kpc.pc_cr);
+
+                mutex_enter(&pidlock);
+                if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
+                        user_t *up;
+
+                        mutex_enter(&peerp->p_lock);
+                        mutex_exit(&pidlock);
+
+                        up = PTOU(peerp);
+                        bcopy(up->u_comm, fdinfo->pr_peername,
+                            MIN(sizeof (up->u_comm),
+                            sizeof (fdinfo->pr_peername) - 1));
+
+                        mutex_exit(&peerp->p_lock);
+                } else {
+                        mutex_exit(&pidlock);
+                }
+        }
+
+        /*
+         * Don't attempt to determine the vnode path for a socket or a door
+         * as it will cause a linear scan of the dnlc table given there is no
+         * v_path associated with the vnode.
+         */
+        if (vp->v_type != VSOCK && vp->v_type != VDOOR)
+                (void) prfdinfopath(p, vp, data, cred);
+
+        if (vp->v_type == VSOCK)
+                (void) prfdinfosockopt(vp, data, cred);
+
+        /* TLI/XTI stream sockets */
+        if (vp->v_type == VCHR && pristli(vp) && vp->v_stream != NULL)
+                (void) prfdinfotlisockopt(vp, data, cred);
+
+        /*
+         * Add a terminating record with a zero size (pr_iol_newbuf will
+         * bzero the memory)
+         */
+        (void) pr_iol_newbuf(data, sizeof (pr_misc_header_t));
+
+        return (0);
+}
+
 #ifdef _SYSCALL32_IMPL
 void
 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
 {
         kthread_t *t;