1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  28  */
  29 
  30 /*
  31  * lxproc -- a loosely Linux-compatible /proc
  32  *
  33  * The aspiration here is to provide something that sufficiently approximates
  34  * the Linux /proc implementation for purposes of offering some compatibility
  35  * for simple Linux /proc readers (e.g., ps/top/htop).  However, it is not
  36  * intended to exactly mimic Linux semantics; when choosing between offering
  37  * compatibility and telling the truth, we emphatically pick the truth.  A
  38  * particular glaring example of this is the Linux notion of "tasks" (that is,
  39  * threads), which -- due to historical misadventures on Linux -- allocate their
  40  * identifiers from the process identifier space.  (That is, each thread has in
  41  * effect a pid.)  Some Linux /proc readers have come to depend on this
  42  * attribute, and become confused when threads appear with proper identifiers,
  43  * so we simply opt for the pre-2.6 behavior, and do not present the tasks
  44  * directory at all.  Similarly, when choosing between offering compatibility
  45  * and remaining consistent with our broader security model, we (obviously)
  46  * choose security over compatibility.  In short, this is meant to be a best
  47  * effort -- no more.
  48  */
  49 
  50 #include <sys/cpupart.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/session.h>
  53 #include <sys/vmparam.h>
  54 #include <sys/mman.h>
  55 #include <vm/rm.h>
  56 #include <vm/seg_vn.h>
  57 #include <sys/sdt.h>
  58 #include <sys/strlog.h>
  59 #include <sys/stropts.h>
  60 #include <sys/cmn_err.h>
  61 #include <sys/x86_archext.h>
  62 #include <sys/archsystm.h>
  63 #include <sys/fp.h>
  64 #include <sys/pool_pset.h>
  65 #include <sys/pset.h>
  66 #include <sys/zone.h>
  67 #include <sys/pghw.h>
  68 #include <sys/vfs_opreg.h>
  69 
  70 /* Dependent on procfs */
  71 extern kthread_t *prchoose(proc_t *);
  72 
  73 #include "lxproc.h"
  74 
  75 extern pgcnt_t swapfs_minfree;
  76 extern time_t boot_time;
  77 
  78 /*
  79  * Pointer to the vnode ops vector for this fs.
  80  * This is instantiated in lxprinit() in lxpr_vfsops.c
  81  */
  82 vnodeops_t *lxpr_vnodeops;
  83 
  84 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
  85 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
  86     caller_context_t *);
  87 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
  88 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
  89     caller_context_t *);
  90 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
  91 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
  92     pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
  93     pathname_t *);
  94 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
  95     caller_context_t *, int);
  96 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
  97 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
  98 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
  99 static int lxpr_sync(void);
 100 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
 101 
 102 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
 103 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
 104 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
 105 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
 106 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
 107 
 108 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
 109 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
 110 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
 111 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
 112 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
 113 
 114 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
 115 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
 116 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
 117 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
 118 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
 119 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
 120 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
 121 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
 122 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
 123 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
 124 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 125 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
 126 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
 127 
 128 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
 129 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
 130 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 131 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
 132 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
 133 
 134 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
 135 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
 136 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
 137 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
 138 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 139 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
 140 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
 141 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
 142 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
 143 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
 144 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
 145 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
 146 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
 147 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
 148 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
 149 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
 150 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
 151 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
 152 
 153 /*
 154  * Simple conversion
 155  */
 156 #define btok(x) ((x) >> 10)                       /* bytes to kbytes */
 157 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
 158 
 159 /*
 160  * The lxproc vnode operations vector
 161  */
 162 const fs_operation_def_t lxpr_vnodeops_template[] = {
 163         VOPNAME_OPEN,           { .vop_open = lxpr_open },
 164         VOPNAME_CLOSE,          { .vop_close = lxpr_close },
 165         VOPNAME_READ,           { .vop_read = lxpr_read },
 166         VOPNAME_GETATTR,        { .vop_getattr = lxpr_getattr },
 167         VOPNAME_ACCESS,         { .vop_access = lxpr_access },
 168         VOPNAME_LOOKUP,         { .vop_lookup = lxpr_lookup },
 169         VOPNAME_READDIR,        { .vop_readdir = lxpr_readdir },
 170         VOPNAME_READLINK,       { .vop_readlink = lxpr_readlink },
 171         VOPNAME_FSYNC,          { .error = lxpr_sync },
 172         VOPNAME_SEEK,           { .error = lxpr_sync },
 173         VOPNAME_INACTIVE,       { .vop_inactive = lxpr_inactive },
 174         VOPNAME_CMP,            { .vop_cmp = lxpr_cmp },
 175         VOPNAME_REALVP,         { .vop_realvp = lxpr_realvp },
 176         NULL,                   NULL
 177 };
 178 
 179 /*
 180  * file contents of an lxproc directory.
 181  */
 182 static lxpr_dirent_t lxpr_dir[] = {
 183         { LXPR_CMDLINE,         "cmdline" },
 184         { LXPR_CPUINFO,         "cpuinfo" },
 185         { LXPR_DEVICES,         "devices" },
 186         { LXPR_DMA,             "dma" },
 187         { LXPR_FILESYSTEMS,     "filesystems" },
 188         { LXPR_INTERRUPTS,      "interrupts" },
 189         { LXPR_IOPORTS,         "ioports" },
 190         { LXPR_KCORE,           "kcore" },
 191         { LXPR_KMSG,            "kmsg" },
 192         { LXPR_LOADAVG,         "loadavg" },
 193         { LXPR_MEMINFO,         "meminfo" },
 194         { LXPR_MOUNTS,          "mounts" },
 195         { LXPR_NETDIR,          "net" },
 196         { LXPR_PARTITIONS,      "partitions" },
 197         { LXPR_SELF,            "self" },
 198         { LXPR_STAT,            "stat" },
 199         { LXPR_UPTIME,          "uptime" },
 200         { LXPR_VERSION,         "version" }
 201 };
 202 
 203 #define PROCDIRFILES    (sizeof (lxpr_dir) / sizeof (lxpr_dir[0]))
 204 
 205 /*
 206  * Contents of an /lxproc/<pid> directory.
 207  */
 208 static lxpr_dirent_t piddir[] = {
 209         { LXPR_PID_CMDLINE,     "cmdline" },
 210         { LXPR_PID_CPU,         "cpu" },
 211         { LXPR_PID_CURDIR,      "cwd" },
 212         { LXPR_PID_ENV,         "environ" },
 213         { LXPR_PID_EXE,         "exe" },
 214         { LXPR_PID_MAPS,        "maps" },
 215         { LXPR_PID_MEM,         "mem" },
 216         { LXPR_PID_ROOTDIR,     "root" },
 217         { LXPR_PID_STAT,        "stat" },
 218         { LXPR_PID_STATM,       "statm" },
 219         { LXPR_PID_STATUS,      "status" },
 220         { LXPR_PID_FDDIR,       "fd" }
 221 };
 222 
 223 #define PIDDIRFILES     (sizeof (piddir) / sizeof (piddir[0]))
 224 
 225 /*
 226  * contents of /lxproc/net directory
 227  */
 228 static lxpr_dirent_t netdir[] = {
 229         { LXPR_NET_ARP,         "arp" },
 230         { LXPR_NET_DEV,         "dev" },
 231         { LXPR_NET_DEV_MCAST,   "dev_mcast" },
 232         { LXPR_NET_IGMP,        "igmp" },
 233         { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
 234         { LXPR_NET_IP_MR_VIF,   "ip_mr_vif" },
 235         { LXPR_NET_MCFILTER,    "mcfilter" },
 236         { LXPR_NET_NETSTAT,     "netstat" },
 237         { LXPR_NET_RAW,         "raw" },
 238         { LXPR_NET_ROUTE,       "route" },
 239         { LXPR_NET_RPC,         "rpc" },
 240         { LXPR_NET_RT_CACHE,    "rt_cache" },
 241         { LXPR_NET_SOCKSTAT,    "sockstat" },
 242         { LXPR_NET_SNMP,        "snmp" },
 243         { LXPR_NET_STAT,        "stat" },
 244         { LXPR_NET_TCP,         "tcp" },
 245         { LXPR_NET_UDP,         "udp" },
 246         { LXPR_NET_UNIX,        "unix" }
 247 };
 248 
 249 #define NETDIRFILES     (sizeof (netdir) / sizeof (netdir[0]))
 250 
 251 /*
 252  * These are the major signal number differences between Linux and native:
 253  *
 254  *      ====================================
 255  *      | Number | Linux      | Native     |
 256  *      | ====== | =========  | ========== |
 257  *      |    7   | SIGBUS     | SIGEMT     |
 258  *      |   10   | SIGUSR1    | SIGBUS     |
 259  *      |   12   | SIGUSR2    | SIGSYS     |
 260  *      |   16   | SIGSTKFLT  | SIGUSR1    |
 261  *      |   17   | SIGCHLD    | SIGUSR2    |
 262  *      |   18   | SIGCONT    | SIGCHLD    |
 263  *      |   19   | SIGSTOP    | SIGPWR     |
 264  *      |   20   | SIGTSTP    | SIGWINCH   |
 265  *      |   21   | SIGTTIN    | SIGURG     |
 266  *      |   22   | SIGTTOU    | SIGPOLL    |
 267  *      |   23   | SIGURG     | SIGSTOP    |
 268  *      |   24   | SIGXCPU    | SIGTSTP    |
 269  *      |   25   | SIGXFSZ    | SIGCONT    |
 270  *      |   26   | SIGVTALARM | SIGTTIN    |
 271  *      |   27   | SIGPROF    | SIGTTOU    |
 272  *      |   28   | SIGWINCH   | SIGVTALARM |
 273  *      |   29   | SIGPOLL    | SIGPROF    |
 274  *      |   30   | SIGPWR     | SIGXCPU    |
 275  *      |   31   | SIGSYS     | SIGXFSZ    |
 276  *      ====================================
 277  *
 278  * Not every Linux signal maps to a native signal, nor does every native
 279  * signal map to a Linux counterpart. However, when signals do map, the
 280  * mapping is unique.
 281  */
 282 static int
 283 lxpr_sigmap[NSIG] = {
 284         0,
 285         LX_SIGHUP,
 286         LX_SIGINT,
 287         LX_SIGQUIT,
 288         LX_SIGILL,
 289         LX_SIGTRAP,
 290         LX_SIGABRT,
 291         LX_SIGSTKFLT,
 292         LX_SIGFPE,
 293         LX_SIGKILL,
 294         LX_SIGBUS,
 295         LX_SIGSEGV,
 296         LX_SIGSYS,
 297         LX_SIGPIPE,
 298         LX_SIGALRM,
 299         LX_SIGTERM,
 300         LX_SIGUSR1,
 301         LX_SIGUSR2,
 302         LX_SIGCHLD,
 303         LX_SIGPWR,
 304         LX_SIGWINCH,
 305         LX_SIGURG,
 306         LX_SIGPOLL,
 307         LX_SIGSTOP,
 308         LX_SIGTSTP,
 309         LX_SIGCONT,
 310         LX_SIGTTIN,
 311         LX_SIGTTOU,
 312         LX_SIGVTALRM,
 313         LX_SIGPROF,
 314         LX_SIGXCPU,
 315         LX_SIGXFSZ,
 316         -1,                     /* 32:  illumos SIGWAITING */
 317         -1,                     /* 33:  illumos SIGLWP */
 318         -1,                     /* 34:  illumos SIGFREEZE */
 319         -1,                     /* 35:  illumos SIGTHAW */
 320         -1,                     /* 36:  illumos SIGCANCEL */
 321         -1,                     /* 37:  illumos SIGLOST */
 322         -1,                     /* 38:  illumos SIGXRES */
 323         -1,                     /* 39:  illumos SIGJVM1 */
 324         -1,                     /* 40:  illumos SIGJVM2 */
 325         LX_SIGRTMIN,            /* 41:  illumos _SIGRTMIN */
 326         LX_SIGRTMIN + 1,
 327         LX_SIGRTMIN + 2,
 328         LX_SIGRTMIN + 3,
 329         LX_SIGRTMIN + 4,
 330         LX_SIGRTMIN + 5,
 331         LX_SIGRTMIN + 6,
 332         LX_SIGRTMIN + 7,
 333         LX_SIGRTMIN + 8,
 334         LX_SIGRTMIN + 9,
 335         LX_SIGRTMIN + 10,
 336         LX_SIGRTMIN + 11,
 337         LX_SIGRTMIN + 12,
 338         LX_SIGRTMIN + 13,
 339         LX_SIGRTMIN + 14,
 340         LX_SIGRTMIN + 15,
 341         LX_SIGRTMIN + 16,
 342         LX_SIGRTMIN + 17,
 343         LX_SIGRTMIN + 18,
 344         LX_SIGRTMIN + 19,
 345         LX_SIGRTMIN + 20,
 346         LX_SIGRTMIN + 21,
 347         LX_SIGRTMIN + 22,
 348         LX_SIGRTMIN + 23,
 349         LX_SIGRTMIN + 24,
 350         LX_SIGRTMIN + 25,
 351         LX_SIGRTMIN + 26,
 352         LX_SIGRTMIN + 27,
 353         LX_SIGRTMIN + 28,
 354         LX_SIGRTMIN + 29,
 355         LX_SIGRTMIN + 30,
 356         LX_SIGRTMAX,
 357 };
 358 
 359 /*
 360  * lxpr_open(): Vnode operation for VOP_OPEN()
 361  */
 362 static int
 363 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 364 {
 365         vnode_t         *vp = *vpp;
 366         lxpr_node_t     *lxpnp = VTOLXP(vp);
 367         lxpr_nodetype_t type = lxpnp->lxpr_type;
 368         vnode_t         *rvp;
 369         int             error = 0;
 370 
 371         /*
 372          * We only allow reading in this file systrem
 373          */
 374         if (flag & FWRITE)
 375                 return (EROFS);
 376 
 377         /*
 378          * If we are opening an underlying file only allow regular files
 379          * reject the open for anything but a regular file.
 380          * Just do it if we are opening the current or root directory.
 381          */
 382         if (lxpnp->lxpr_realvp != NULL) {
 383                 rvp = lxpnp->lxpr_realvp;
 384 
 385                 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
 386                         error = EACCES;
 387                 else {
 388                         /*
 389                          * Need to hold rvp since VOP_OPEN() may release it.
 390                          */
 391                         VN_HOLD(rvp);
 392                         error = VOP_OPEN(&rvp, flag, cr, ct);
 393                         if (error) {
 394                                 VN_RELE(rvp);
 395                         } else {
 396                                 *vpp = rvp;
 397                                 VN_RELE(vp);
 398                         }
 399                 }
 400         }
 401 
 402         if (type == LXPR_KMSG) {
 403                 ldi_ident_t     li = VTOLXPM(vp)->lxprm_li;
 404                 struct strioctl str;
 405                 int             rv;
 406 
 407                 /*
 408                  * Open the zone's console device using the layered driver
 409                  * interface.
 410                  */
 411                 if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
 412                     &lxpnp->lxpr_cons_ldih, li)) != 0)
 413                         return (error);
 414 
 415                 /*
 416                  * Send an ioctl to the underlying console device, letting it
 417                  * know we're interested in getting console messages.
 418                  */
 419                 str.ic_cmd = I_CONSLOG;
 420                 str.ic_timout = 0;
 421                 str.ic_len = 0;
 422                 str.ic_dp = NULL;
 423                 if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
 424                     (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
 425                         return (error);
 426         }
 427 
 428         return (error);
 429 }
 430 
 431 
 432 /*
 433  * lxpr_close(): Vnode operation for VOP_CLOSE()
 434  */
 435 /* ARGSUSED */
 436 static int
 437 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 438     caller_context_t *ct)
 439 {
 440         lxpr_node_t     *lxpr = VTOLXP(vp);
 441         lxpr_nodetype_t type = lxpr->lxpr_type;
 442         int             err;
 443 
 444         /*
 445          * we should never get here because the close is done on the realvp
 446          * for these nodes
 447          */
 448         ASSERT(type != LXPR_PID_FD_FD &&
 449             type != LXPR_PID_CURDIR &&
 450             type != LXPR_PID_ROOTDIR &&
 451             type != LXPR_PID_EXE);
 452 
 453         if (type == LXPR_KMSG) {
 454                 if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
 455                         return (err);
 456         }
 457 
 458         return (0);
 459 }
 460 
 461 static void (*lxpr_read_function[LXPR_NFILES])() = {
 462         lxpr_read_isdir,                /* /proc                */
 463         lxpr_read_isdir,                /* /proc/<pid>            */
 464         lxpr_read_pid_cmdline,          /* /proc/<pid>/cmdline    */
 465         lxpr_read_empty,                /* /proc/<pid>/cpu        */
 466         lxpr_read_invalid,              /* /proc/<pid>/cwd        */
 467         lxpr_read_empty,                /* /proc/<pid>/environ    */
 468         lxpr_read_invalid,              /* /proc/<pid>/exe        */
 469         lxpr_read_pid_maps,             /* /proc/<pid>/maps       */
 470         lxpr_read_empty,                /* /proc/<pid>/mem        */
 471         lxpr_read_invalid,              /* /proc/<pid>/root       */
 472         lxpr_read_pid_stat,             /* /proc/<pid>/stat       */
 473         lxpr_read_pid_statm,            /* /proc/<pid>/statm      */
 474         lxpr_read_pid_status,           /* /proc/<pid>/status     */
 475         lxpr_read_isdir,                /* /proc/<pid>/fd */
 476         lxpr_read_fd,                   /* /proc/<pid>/fd/nn      */
 477         lxpr_read_empty,                /* /proc/cmdline        */
 478         lxpr_read_cpuinfo,              /* /proc/cpuinfo        */
 479         lxpr_read_empty,                /* /proc/devices        */
 480         lxpr_read_empty,                /* /proc/dma            */
 481         lxpr_read_empty,                /* /proc/filesystems    */
 482         lxpr_read_empty,                /* /proc/interrupts     */
 483         lxpr_read_empty,                /* /proc/ioports        */
 484         lxpr_read_empty,                /* /proc/kcore          */
 485         lxpr_read_kmsg,                 /* /proc/kmsg           */
 486         lxpr_read_loadavg,              /* /proc/loadavg        */
 487         lxpr_read_meminfo,              /* /proc/meminfo        */
 488         lxpr_read_mounts,               /* /proc/mounts         */
 489         lxpr_read_isdir,                /* /proc/net            */
 490         lxpr_read_net_arp,              /* /proc/net/arp        */
 491         lxpr_read_net_dev,              /* /proc/net/dev        */
 492         lxpr_read_net_dev_mcast,        /* /proc/net/dev_mcast  */
 493         lxpr_read_net_igmp,             /* /proc/net/igmp       */
 494         lxpr_read_net_ip_mr_cache,      /* /proc/net/ip_mr_cache */
 495         lxpr_read_net_ip_mr_vif,        /* /proc/net/ip_mr_vif  */
 496         lxpr_read_net_mcfilter,         /* /proc/net/mcfilter   */
 497         lxpr_read_net_netstat,          /* /proc/net/netstat    */
 498         lxpr_read_net_raw,              /* /proc/net/raw        */
 499         lxpr_read_net_route,            /* /proc/net/route      */
 500         lxpr_read_net_rpc,              /* /proc/net/rpc        */
 501         lxpr_read_net_rt_cache,         /* /proc/net/rt_cache   */
 502         lxpr_read_net_sockstat,         /* /proc/net/sockstat   */
 503         lxpr_read_net_snmp,             /* /proc/net/snmp       */
 504         lxpr_read_net_stat,             /* /proc/net/stat       */
 505         lxpr_read_net_tcp,              /* /proc/net/tcp        */
 506         lxpr_read_net_udp,              /* /proc/net/udp        */
 507         lxpr_read_net_unix,             /* /proc/net/unix       */
 508         lxpr_read_partitions,           /* /proc/partitions     */
 509         lxpr_read_invalid,              /* /proc/self           */
 510         lxpr_read_stat,                 /* /proc/stat           */
 511         lxpr_read_uptime,               /* /proc/uptime         */
 512         lxpr_read_version,              /* /proc/version        */
 513 };
 514 
 515 /*
 516  * Array of lookup functions, indexed by /lxproc file type.
 517  */
 518 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
 519         lxpr_lookup_procdir,            /* /proc                */
 520         lxpr_lookup_piddir,             /* /proc/<pid>            */
 521         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cmdline    */
 522         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cpu        */
 523         lxpr_lookup_not_a_dir,          /* /proc/<pid>/cwd        */
 524         lxpr_lookup_not_a_dir,          /* /proc/<pid>/environ    */
 525         lxpr_lookup_not_a_dir,          /* /proc/<pid>/exe        */
 526         lxpr_lookup_not_a_dir,          /* /proc/<pid>/maps       */
 527         lxpr_lookup_not_a_dir,          /* /proc/<pid>/mem        */
 528         lxpr_lookup_not_a_dir,          /* /proc/<pid>/root       */
 529         lxpr_lookup_not_a_dir,          /* /proc/<pid>/stat       */
 530         lxpr_lookup_not_a_dir,          /* /proc/<pid>/statm      */
 531         lxpr_lookup_not_a_dir,          /* /proc/<pid>/status     */
 532         lxpr_lookup_fddir,              /* /proc/<pid>/fd */
 533         lxpr_lookup_not_a_dir,          /* /proc/<pid>/fd/nn      */
 534         lxpr_lookup_not_a_dir,          /* /proc/cmdline        */
 535         lxpr_lookup_not_a_dir,          /* /proc/cpuinfo        */
 536         lxpr_lookup_not_a_dir,          /* /proc/devices        */
 537         lxpr_lookup_not_a_dir,          /* /proc/dma            */
 538         lxpr_lookup_not_a_dir,          /* /proc/filesystems    */
 539         lxpr_lookup_not_a_dir,          /* /proc/interrupts     */
 540         lxpr_lookup_not_a_dir,          /* /proc/ioports        */
 541         lxpr_lookup_not_a_dir,          /* /proc/kcore          */
 542         lxpr_lookup_not_a_dir,          /* /proc/kmsg           */
 543         lxpr_lookup_not_a_dir,          /* /proc/loadavg        */
 544         lxpr_lookup_not_a_dir,          /* /proc/meminfo        */
 545         lxpr_lookup_not_a_dir,          /* /proc/mounts         */
 546         lxpr_lookup_netdir,             /* /proc/net            */
 547         lxpr_lookup_not_a_dir,          /* /proc/net/arp        */
 548         lxpr_lookup_not_a_dir,          /* /proc/net/dev        */
 549         lxpr_lookup_not_a_dir,          /* /proc/net/dev_mcast  */
 550         lxpr_lookup_not_a_dir,          /* /proc/net/igmp       */
 551         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_cache */
 552         lxpr_lookup_not_a_dir,          /* /proc/net/ip_mr_vif  */
 553         lxpr_lookup_not_a_dir,          /* /proc/net/mcfilter   */
 554         lxpr_lookup_not_a_dir,          /* /proc/net/netstat    */
 555         lxpr_lookup_not_a_dir,          /* /proc/net/raw        */
 556         lxpr_lookup_not_a_dir,          /* /proc/net/route      */
 557         lxpr_lookup_not_a_dir,          /* /proc/net/rpc        */
 558         lxpr_lookup_not_a_dir,          /* /proc/net/rt_cache   */
 559         lxpr_lookup_not_a_dir,          /* /proc/net/sockstat   */
 560         lxpr_lookup_not_a_dir,          /* /proc/net/snmp       */
 561         lxpr_lookup_not_a_dir,          /* /proc/net/stat       */
 562         lxpr_lookup_not_a_dir,          /* /proc/net/tcp        */
 563         lxpr_lookup_not_a_dir,          /* /proc/net/udp        */
 564         lxpr_lookup_not_a_dir,          /* /proc/net/unix       */
 565         lxpr_lookup_not_a_dir,          /* /proc/partitions     */
 566         lxpr_lookup_not_a_dir,          /* /proc/self           */
 567         lxpr_lookup_not_a_dir,          /* /proc/stat           */
 568         lxpr_lookup_not_a_dir,          /* /proc/uptime         */
 569         lxpr_lookup_not_a_dir,          /* /proc/version        */
 570 };
 571 
 572 /*
 573  * Array of readdir functions, indexed by /proc file type.
 574  */
 575 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
 576         lxpr_readdir_procdir,           /* /proc                */
 577         lxpr_readdir_piddir,            /* /proc/<pid>            */
 578         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cmdline    */
 579         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cpu        */
 580         lxpr_readdir_not_a_dir,         /* /proc/<pid>/cwd        */
 581         lxpr_readdir_not_a_dir,         /* /proc/<pid>/environ    */
 582         lxpr_readdir_not_a_dir,         /* /proc/<pid>/exe        */
 583         lxpr_readdir_not_a_dir,         /* /proc/<pid>/maps       */
 584         lxpr_readdir_not_a_dir,         /* /proc/<pid>/mem        */
 585         lxpr_readdir_not_a_dir,         /* /proc/<pid>/root       */
 586         lxpr_readdir_not_a_dir,         /* /proc/<pid>/stat       */
 587         lxpr_readdir_not_a_dir,         /* /proc/<pid>/statm      */
 588         lxpr_readdir_not_a_dir,         /* /proc/<pid>/status     */
 589         lxpr_readdir_fddir,             /* /proc/<pid>/fd */
 590         lxpr_readdir_not_a_dir,         /* /proc/<pid>/fd/nn      */
 591         lxpr_readdir_not_a_dir,         /* /proc/cmdline        */
 592         lxpr_readdir_not_a_dir,         /* /proc/cpuinfo        */
 593         lxpr_readdir_not_a_dir,         /* /proc/devices        */
 594         lxpr_readdir_not_a_dir,         /* /proc/dma            */
 595         lxpr_readdir_not_a_dir,         /* /proc/filesystems    */
 596         lxpr_readdir_not_a_dir,         /* /proc/interrupts     */
 597         lxpr_readdir_not_a_dir,         /* /proc/ioports        */
 598         lxpr_readdir_not_a_dir,         /* /proc/kcore          */
 599         lxpr_readdir_not_a_dir,         /* /proc/kmsg           */
 600         lxpr_readdir_not_a_dir,         /* /proc/loadavg        */
 601         lxpr_readdir_not_a_dir,         /* /proc/meminfo        */
 602         lxpr_readdir_not_a_dir,         /* /proc/mounts         */
 603         lxpr_readdir_netdir,            /* /proc/net            */
 604         lxpr_readdir_not_a_dir,         /* /proc/net/arp        */
 605         lxpr_readdir_not_a_dir,         /* /proc/net/dev        */
 606         lxpr_readdir_not_a_dir,         /* /proc/net/dev_mcast  */
 607         lxpr_readdir_not_a_dir,         /* /proc/net/igmp       */
 608         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_cache */
 609         lxpr_readdir_not_a_dir,         /* /proc/net/ip_mr_vif  */
 610         lxpr_readdir_not_a_dir,         /* /proc/net/mcfilter   */
 611         lxpr_readdir_not_a_dir,         /* /proc/net/netstat    */
 612         lxpr_readdir_not_a_dir,         /* /proc/net/raw        */
 613         lxpr_readdir_not_a_dir,         /* /proc/net/route      */
 614         lxpr_readdir_not_a_dir,         /* /proc/net/rpc        */
 615         lxpr_readdir_not_a_dir,         /* /proc/net/rt_cache   */
 616         lxpr_readdir_not_a_dir,         /* /proc/net/sockstat   */
 617         lxpr_readdir_not_a_dir,         /* /proc/net/snmp       */
 618         lxpr_readdir_not_a_dir,         /* /proc/net/stat       */
 619         lxpr_readdir_not_a_dir,         /* /proc/net/tcp        */
 620         lxpr_readdir_not_a_dir,         /* /proc/net/udp        */
 621         lxpr_readdir_not_a_dir,         /* /proc/net/unix       */
 622         lxpr_readdir_not_a_dir,         /* /proc/partitions     */
 623         lxpr_readdir_not_a_dir,         /* /proc/self           */
 624         lxpr_readdir_not_a_dir,         /* /proc/stat           */
 625         lxpr_readdir_not_a_dir,         /* /proc/uptime         */
 626         lxpr_readdir_not_a_dir,         /* /proc/version        */
 627 };
 628 
 629 
 630 /*
 631  * lxpr_read(): Vnode operation for VOP_READ()
 632  *
 633  * As the format of all the files that can be read in lxproc is human readable
 634  * and not binary structures there do not have to be different read variants
 635  * depending on whether the reading process model is 32- or 64-bit.
 636  */
 637 /* ARGSUSED */
 638 static int
 639 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
 640     caller_context_t *ct)
 641 {
 642         lxpr_node_t *lxpnp = VTOLXP(vp);
 643         lxpr_nodetype_t type = lxpnp->lxpr_type;
 644         lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
 645         int error;
 646 
 647         ASSERT(type >= 0 && type < LXPR_NFILES);
 648 
 649         lxpr_read_function[type](lxpnp, uiobuf);
 650 
 651         error = lxpr_uiobuf_flush(uiobuf);
 652         lxpr_uiobuf_free(uiobuf);
 653 
 654         return (error);
 655 }
 656 
 657 /*
 658  * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
 659  *
 660  * Various special case reads:
 661  * - trying to read a directory
 662  * - invalid file (used to mean a file that should be implemented,
 663  *   but isn't yet)
 664  * - empty file
 665  * - wait to be able to read a file that will never have anything to read
 666  */
 667 /* ARGSUSED */
 668 static void
 669 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 670 {
 671         lxpr_uiobuf_seterr(uiobuf, EISDIR);
 672 }
 673 
 674 /* ARGSUSED */
 675 static void
 676 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 677 {
 678         lxpr_uiobuf_seterr(uiobuf, EINVAL);
 679 }
 680 
 681 /* ARGSUSED */
 682 static void
 683 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 684 {
 685 }
 686 
 687 /*
 688  * lxpr_read_pid_cmdline():
 689  *
 690  * This is not precisely compatible with Linux: the Linux cmdline returns argv
 691  * with the correct separation using \0 between the arguments, but we cannot do
 692  * that without copying the real argv from the correct process context.  This
 693  * is too difficult to attempt so we pretend that the entire cmdline is just
 694  * argv[0]. This is good enough for ps and htop to display correctly, but might
 695  * cause some other things not to work correctly.
 696  */
 697 static void
 698 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 699 {
 700         proc_t *p;
 701         char *buf;
 702 
 703         ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
 704 
 705         p = lxpr_lock(lxpnp->lxpr_pid);
 706         if (p == NULL) {
 707                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
 708                 return;
 709         }
 710 
 711         buf = PTOU(p)->u_argv != 0 ? PTOU(p)->u_psargs : PTOU(p)->u_comm;
 712 
 713         lxpr_uiobuf_write(uiobuf, buf, strlen(buf) + 1);
 714         lxpr_unlock(p);
 715 }
 716 
 717 /*
 718  * lxpr_read_pid_maps(): memory map file
 719  */
 720 static void
 721 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 722 {
 723         proc_t *p;
 724         struct as *as;
 725         struct seg *seg;
 726         char *buf;
 727         int buflen = MAXPATHLEN;
 728         struct print_data {
 729                 caddr_t saddr;
 730                 caddr_t eaddr;
 731                 int type;
 732                 char prot[5];
 733                 uint32_t offset;
 734                 vnode_t *vp;
 735                 struct print_data *next;
 736         } *print_head = NULL;
 737         struct print_data **print_tail = &print_head;
 738         struct print_data *pbuf;
 739 
 740         ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
 741 
 742         p = lxpr_lock(lxpnp->lxpr_pid);
 743         if (p == NULL) {
 744                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
 745                 return;
 746         }
 747 
 748         as = p->p_as;
 749 
 750         if (as == &kas) {
 751                 lxpr_unlock(p);
 752                 return;
 753         }
 754 
 755         mutex_exit(&p->p_lock);
 756 
 757         /* Iterate over all segments in the address space */
 758         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 759         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
 760                 vnode_t *vp;
 761                 uint_t protbits;
 762 
 763                 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
 764 
 765                 pbuf->saddr = seg->s_base;
 766                 pbuf->eaddr = seg->s_base+seg->s_size;
 767                 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
 768 
 769                 /*
 770                  * Cheat and only use the protection bits of the first page
 771                  * in the segment
 772                  */
 773                 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
 774                 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
 775 
 776                 if (protbits & PROT_READ)      pbuf->prot[0] = 'r';
 777                 if (protbits & PROT_WRITE)     pbuf->prot[1] = 'w';
 778                 if (protbits & PROT_EXEC)      pbuf->prot[2] = 'x';
 779                 if (pbuf->type & MAP_SHARED)        pbuf->prot[3] = 's';
 780                 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
 781 
 782                 if (seg->s_ops == &segvn_ops &&
 783                     SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
 784                     vp != NULL && vp->v_type == VREG) {
 785                         VN_HOLD(vp);
 786                         pbuf->vp = vp;
 787                 } else {
 788                         pbuf->vp = NULL;
 789                 }
 790 
 791                 pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
 792 
 793                 pbuf->next = NULL;
 794                 *print_tail = pbuf;
 795                 print_tail = &pbuf->next;
 796         }
 797         AS_LOCK_EXIT(as, &as->a_lock);
 798         mutex_enter(&p->p_lock);
 799         lxpr_unlock(p);
 800 
 801         buf = kmem_alloc(buflen, KM_SLEEP);
 802 
 803         /* print the data we've extracted */
 804         pbuf = print_head;
 805         while (pbuf != NULL) {
 806                 struct print_data *pbuf_next;
 807                 vattr_t vattr;
 808 
 809                 int maj = 0;
 810                 int min = 0;
 811                 u_longlong_t inode = 0;
 812 
 813                 *buf = '\0';
 814                 if (pbuf->vp != NULL) {
 815                         vattr.va_mask = AT_FSID | AT_NODEID;
 816                         if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
 817                             NULL) == 0) {
 818                                 maj = getmajor(vattr.va_fsid);
 819                                 min = getminor(vattr.va_fsid);
 820                                 inode = vattr.va_nodeid;
 821                         }
 822                         (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
 823                         VN_RELE(pbuf->vp);
 824                 }
 825 
 826                 if (*buf != '\0') {
 827                         lxpr_uiobuf_printf(uiobuf,
 828                             "%08x-%08x %s %08x %02d:%03d %lld %s\n",
 829                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
 830                             maj, min, inode, buf);
 831                 } else {
 832                         lxpr_uiobuf_printf(uiobuf,
 833                             "%08x-%08x %s %08x %02d:%03d %lld\n",
 834                             pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
 835                             maj, min, inode);
 836                 }
 837 
 838                 pbuf_next = pbuf->next;
 839                 kmem_free(pbuf, sizeof (*pbuf));
 840                 pbuf = pbuf_next;
 841         }
 842 
 843         kmem_free(buf, buflen);
 844 }
 845 
 846 /*
 847  * lxpr_read_pid_statm(): memory status file
 848  */
 849 static void
 850 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 851 {
 852         proc_t *p;
 853         struct as *as;
 854         size_t vsize;
 855         size_t rss;
 856 
 857         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
 858 
 859         p = lxpr_lock(lxpnp->lxpr_pid);
 860         if (p == NULL) {
 861                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
 862                 return;
 863         }
 864 
 865         as = p->p_as;
 866 
 867         mutex_exit(&p->p_lock);
 868 
 869         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 870         vsize = btopr(as->a_resvsize);
 871         rss = rm_asrss(as);
 872         AS_LOCK_EXIT(as, &as->a_lock);
 873 
 874         mutex_enter(&p->p_lock);
 875         lxpr_unlock(p);
 876 
 877         lxpr_uiobuf_printf(uiobuf,
 878             "%lu %lu %lu %lu %lu %lu %lu\n",
 879             vsize, rss, 0l, rss, 0l, 0l, 0l);
 880 }
 881 
 882 /*
 883  * lxpr_read_pid_status(): status file
 884  */
 885 static void
 886 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
 887 {
 888         proc_t *p;
 889         kthread_t *t;
 890         user_t *up;
 891         cred_t *cr;
 892         const gid_t *groups;
 893         int    ngroups;
 894         struct as *as;
 895         char *status;
 896         pid_t pid, ppid;
 897         size_t vsize;
 898         size_t rss;
 899         k_sigset_t current, ignore, handle;
 900         int    i, lx_sig;
 901 
 902         ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
 903 
 904         p = lxpr_lock(lxpnp->lxpr_pid);
 905         if (p == NULL) {
 906                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
 907                 return;
 908         }
 909 
 910         pid = p->p_pid;
 911 
 912         /*
 913          * Convert pid to the Linux default of 1 if we're the zone's init
 914          * process
 915          */
 916         if (pid == curproc->p_zone->zone_proc_initpid) {
 917                 pid = 1;
 918                 ppid = 0;       /* parent pid for init is 0 */
 919         } else {
 920                 /*
 921                  * Make sure not to reference parent PIDs that reside outside
 922                  * the zone
 923                  */
 924                 ppid = ((p->p_flag & SZONETOP)
 925                     ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
 926 
 927                 /*
 928                  * Convert ppid to the Linux default of 1 if our parent is the
 929                  * zone's init process
 930                  */
 931                 if (ppid == curproc->p_zone->zone_proc_initpid)
 932                         ppid = 1;
 933         }
 934 
 935         t = prchoose(p);
 936         if (t != NULL) {
 937                 switch (t->t_state) {
 938                 case TS_SLEEP:
 939                         status = "S (sleeping)";
 940                         break;
 941                 case TS_RUN:
 942                 case TS_ONPROC:
 943                         status = "R (running)";
 944                         break;
 945                 case TS_ZOMB:
 946                         status = "Z (zombie)";
 947                         break;
 948                 case TS_STOPPED:
 949                         status = "T (stopped)";
 950                         break;
 951                 default:
 952                         status = "! (unknown)";
 953                         break;
 954                 }
 955                 thread_unlock(t);
 956         } else {
 957                 /*
 958                  * there is a hole in the exit code, where a proc can have
 959                  * no threads but it is yet to be flagged SZOMB. We will
 960                  * assume we are about to become a zombie
 961                  */
 962                 status = "Z (zombie)";
 963         }
 964 
 965         up = PTOU(p);
 966         mutex_enter(&p->p_crlock);
 967         crhold(cr = p->p_cred);
 968         mutex_exit(&p->p_crlock);
 969 
 970         lxpr_uiobuf_printf(uiobuf,
 971             "Name:\t%s\n"
 972             "State:\t%s\n"
 973             "Tgid:\t%d\n"
 974             "Pid:\t%d\n"
 975             "PPid:\t%d\n"
 976             "TracerPid:\t%d\n"
 977             "Uid:\t%u\t%u\t%u\t%u\n"
 978             "Gid:\t%u\t%u\t%u\t%u\n"
 979             "FDSize:\t%d\n"
 980             "Groups:\t",
 981             up->u_comm,
 982             status,
 983             pid, /* thread group id - same as pid */
 984             pid,
 985             ppid,
 986             0,
 987             crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
 988             crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
 989             p->p_fno_ctl);
 990 
 991         ngroups = crgetngroups(cr);
 992         groups  = crgetgroups(cr);
 993         for (i = 0; i < ngroups; i++) {
 994                 lxpr_uiobuf_printf(uiobuf,
 995                     "%u ",
 996                     groups[i]);
 997         }
 998         crfree(cr);
 999 
1000         as = p->p_as;
1001         if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
1002                 mutex_exit(&p->p_lock);
1003                 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1004                 vsize = as->a_resvsize;
1005                 rss = rm_asrss(as);
1006                 AS_LOCK_EXIT(as, &as->a_lock);
1007                 mutex_enter(&p->p_lock);
1008 
1009                 lxpr_uiobuf_printf(uiobuf,
1010                     "\n"
1011                     "VmSize:\t%8lu kB\n"
1012                     "VmLck:\t%8lu kB\n"
1013                     "VmRSS:\t%8lu kB\n"
1014                     "VmData:\t%8lu kB\n"
1015                     "VmStk:\t%8lu kB\n"
1016                     "VmExe:\t%8lu kB\n"
1017                     "VmLib:\t%8lu kB",
1018                     btok(vsize),
1019                     0l,
1020                     ptok(rss),
1021                     0l,
1022                     btok(p->p_stksize),
1023                     ptok(rss),
1024                     0l);
1025         }
1026 
1027         sigemptyset(&current);
1028         sigemptyset(&ignore);
1029         sigemptyset(&handle);
1030 
1031         for (i = 1; i < NSIG; i++) {
1032                 lx_sig = lxpr_sigmap[i];
1033 
1034                 if ((lx_sig > 0) && (lx_sig < LX_NSIG)) {
1035                         if (sigismember(&p->p_sig, i))
1036                                 sigaddset(&current, lx_sig);
1037 
1038                         if (up->u_signal[i - 1] == SIG_IGN)
1039                                 sigaddset(&ignore, lx_sig);
1040                         else if (up->u_signal[i - 1] != SIG_DFL)
1041                                 sigaddset(&handle, lx_sig);
1042                 }
1043         }
1044 
1045         lxpr_uiobuf_printf(uiobuf,
1046             "\n"
1047             "SigPnd:\t%08x%08x\n"
1048             "SigBlk:\t%08x%08x\n"
1049             "SigIgn:\t%08x%08x\n"
1050             "SigCgt:\t%08x%08x\n"
1051             "CapInh:\t%016x\n"
1052             "CapPrm:\t%016x\n"
1053             "CapEff:\t%016x\n",
1054             current.__sigbits[1], current.__sigbits[0],
1055             0, 0, /* signals blocked on per thread basis */
1056             ignore.__sigbits[1], ignore.__sigbits[0],
1057             handle.__sigbits[1], handle.__sigbits[0],
1058             /* Can't do anything with linux capabilities */
1059             0,
1060             0,
1061             0);
1062 
1063         lxpr_unlock(p);
1064 }
1065 
1066 
1067 /*
1068  * lxpr_read_pid_stat(): pid stat file
1069  */
1070 static void
1071 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1072 {
1073         proc_t *p;
1074         kthread_t *t;
1075         struct as *as;
1076         char stat;
1077         pid_t pid, ppid, pgpid, spid;
1078         gid_t psgid;
1079         dev_t psdev;
1080         size_t rss, vsize;
1081         int nice, pri;
1082         caddr_t wchan;
1083         processorid_t cpu;
1084 
1085         ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
1086 
1087         p = lxpr_lock(lxpnp->lxpr_pid);
1088         if (p == NULL) {
1089                 lxpr_uiobuf_seterr(uiobuf, EINVAL);
1090                 return;
1091         }
1092 
1093         pid = p->p_pid;
1094 
1095         /*
1096          * Set Linux defaults if we're the zone's init process
1097          */
1098         if (pid == curproc->p_zone->zone_proc_initpid) {
1099                 pid = 1;                /* PID for init */
1100                 ppid = 0;               /* parent PID for init is 0 */
1101                 pgpid = 0;              /* process group for init is 0 */
1102                 psgid = (gid_t)-1;      /* credential GID for init is -1 */
1103                 spid = 0;               /* session id for init is 0 */
1104                 psdev = 0;              /* session device for init is 0 */
1105         } else {
1106                 /*
1107                  * Make sure not to reference parent PIDs that reside outside
1108                  * the zone
1109                  */
1110                 ppid = ((p->p_flag & SZONETOP) ?
1111                     curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1112 
1113                 /*
1114                  * Convert ppid to the Linux default of 1 if our parent is the
1115                  * zone's init process
1116                  */
1117                 if (ppid == curproc->p_zone->zone_proc_initpid)
1118                         ppid = 1;
1119 
1120                 pgpid = p->p_pgrp;
1121 
1122                 mutex_enter(&p->p_splock);
1123                 mutex_enter(&p->p_sessp->s_lock);
1124                 spid = p->p_sessp->s_sid;
1125                 psdev = p->p_sessp->s_dev;
1126                 if (p->p_sessp->s_cred)
1127                         psgid = crgetgid(p->p_sessp->s_cred);
1128                 else
1129                         psgid = crgetgid(p->p_cred);
1130 
1131                 mutex_exit(&p->p_sessp->s_lock);
1132                 mutex_exit(&p->p_splock);
1133         }
1134 
1135         t = prchoose(p);
1136         if (t != NULL) {
1137                 switch (t->t_state) {
1138                 case TS_SLEEP:
1139                         stat = 'S'; break;
1140                 case TS_RUN:
1141                 case TS_ONPROC:
1142                         stat = 'R'; break;
1143                 case TS_ZOMB:
1144                         stat = 'Z'; break;
1145                 case TS_STOPPED:
1146                         stat = 'T'; break;
1147                 default:
1148                         stat = '!'; break;
1149                 }
1150 
1151                 if (CL_DONICE(t, NULL, 0, &nice) != 0)
1152                         nice = 0;
1153 
1154                 pri = t->t_pri;
1155                 wchan = t->t_wchan;
1156                 cpu = t->t_cpu->cpu_id;
1157                 thread_unlock(t);
1158         } else {
1159                 /* Only zombies have no threads */
1160                 stat = 'Z';
1161                 nice = 0;
1162                 pri = 0;
1163                 wchan = 0;
1164                 cpu = 0;
1165         }
1166         as = p->p_as;
1167         mutex_exit(&p->p_lock);
1168         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1169         vsize = as->a_resvsize;
1170         rss = rm_asrss(as);
1171         AS_LOCK_EXIT(as, &as->a_lock);
1172         mutex_enter(&p->p_lock);
1173 
1174         lxpr_uiobuf_printf(uiobuf,
1175             "%d (%s) %c %d %d %d %d %d "
1176             "%lu %lu %lu %lu %lu "
1177             "%lu %lu %ld %ld "
1178             "%d %d %d "
1179             "%lu "
1180             "%lu "
1181             "%lu %ld %llu "
1182             "%lu %lu %u "
1183             "%lu %lu "
1184             "%lu %lu %lu %lu "
1185             "%lu "
1186             "%lu %lu "
1187             "%d "
1188             "%d"
1189             "\n",
1190             pid, PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
1191             0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
1192             p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
1193             pri, nice, p->p_lwpcnt,
1194             0l, /* itrealvalue (time before next SIGALRM) */
1195             PTOU(p)->u_ticks,
1196             vsize, rss, p->p_vmem_ctl,
1197             0l, 0l, USRSTACK, /* startcode, endcode, startstack */
1198             0l, 0l, /* kstkesp, kstkeip */
1199             0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
1200             wchan,
1201             0l, 0l, /* nswap, cnswap */
1202             0, /* exit_signal */
1203             cpu);
1204 
1205         lxpr_unlock(p);
1206 }
1207 
1208 /* ARGSUSED */
1209 static void
1210 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1211 {
1212 }
1213 
1214 /* ARGSUSED */
1215 static void
1216 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1217 {
1218         lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
1219             "                             |  Transmit\n");
1220         lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
1221             " frame compressed multicast|bytes    packets errs drop fifo"
1222             " colls carrier compressed\n");
1223 
1224         /*
1225          * Data about each interface should go here, but that shouldn't be added
1226          * unless there is an lxproc reader that actually makes use of it (and
1227          * doesn't need anything else that we refuse to provide)...
1228          */
1229 }
1230 
1231 /* ARGSUSED */
1232 static void
1233 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1234 {
1235 }
1236 
1237 /* ARGSUSED */
1238 static void
1239 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1240 {
1241 }
1242 
1243 /* ARGSUSED */
1244 static void
1245 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1246 {
1247 }
1248 
1249 /* ARGSUSED */
1250 static void
1251 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1252 {
1253 }
1254 
1255 /* ARGSUSED */
1256 static void
1257 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1258 {
1259 }
1260 
1261 /* ARGSUSED */
1262 static void
1263 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1264 {
1265 }
1266 
1267 /* ARGSUSED */
1268 static void
1269 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1270 {
1271 }
1272 
1273 /* ARGSUSED */
1274 static void
1275 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1276 {
1277 }
1278 
1279 /* ARGSUSED */
1280 static void
1281 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1282 {
1283 }
1284 
1285 /* ARGSUSED */
1286 static void
1287 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1288 {
1289 }
1290 
1291 /* ARGSUSED */
1292 static void
1293 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1294 {
1295 }
1296 
1297 /* ARGSUSED */
1298 static void
1299 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1300 {
1301 }
1302 
1303 /* ARGSUSED */
1304 static void
1305 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1306 {
1307 }
1308 
1309 /* ARGSUSED */
1310 static void
1311 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1312 {
1313 }
1314 
1315 /* ARGSUSED */
1316 static void
1317 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1318 {
1319 }
1320 
1321 /* ARGSUSED */
1322 static void
1323 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1324 {
1325 }
1326 
1327 /*
1328  * lxpr_read_kmsg(): read the contents of the kernel message queue. We
1329  * translate this into the reception of console messages for this zone; each
1330  * read copies out a single zone console message, or blocks until the next one
1331  * is produced.
1332  */
1333 
1334 #define LX_KMSG_PRI     "<0>"
1335 
1336 static void
1337 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
1338 {
1339         ldi_handle_t    lh = lxpnp->lxpr_cons_ldih;
1340         mblk_t          *mp;
1341 
1342         if (ldi_getmsg(lh, &mp, NULL) == 0) {
1343                 /*
1344                  * lxproc doesn't like successive reads to the same file
1345                  * descriptor unless we do an explicit rewind each time.
1346                  */
1347                 lxpr_uiobuf_seek(uiobuf, 0);
1348 
1349                 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
1350                     mp->b_cont->b_rptr);
1351 
1352                 freemsg(mp);
1353         }
1354 }
1355 
1356 /*
1357  * lxpr_read_loadavg(): read the contents of the "loadavg" file.  We do just
1358  * enough for uptime and other simple lxproc readers to work
1359  */
1360 extern int nthread;
1361 
1362 static void
1363 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1364 {
1365         ulong_t avenrun1;
1366         ulong_t avenrun5;
1367         ulong_t avenrun15;
1368         ulong_t avenrun1_cs;
1369         ulong_t avenrun5_cs;
1370         ulong_t avenrun15_cs;
1371         int loadavg[3];
1372         int *loadbuf;
1373         cpupart_t *cp;
1374         zone_t *zone = LXPTOZ(lxpnp);
1375 
1376         uint_t nrunnable = 0;
1377         rctl_qty_t nlwps;
1378 
1379         ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
1380 
1381         mutex_enter(&cpu_lock);
1382 
1383         /*
1384          * Need to add up values over all CPU partitions. If pools are active,
1385          * only report the values of the zone's partition, which by definition
1386          * includes the current CPU.
1387          */
1388         if (pool_pset_enabled()) {
1389                 psetid_t psetid = zone_pset_get(curproc->p_zone);
1390 
1391                 ASSERT(curproc->p_zone != &zone0);
1392                 cp = CPU->cpu_part;
1393 
1394                 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
1395                 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
1396                 loadbuf = &loadavg[0];
1397         } else {
1398                 cp = cp_list_head;
1399                 do {
1400                         nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
1401                 } while ((cp = cp->cp_next) != cp_list_head);
1402 
1403                 loadbuf = zone == global_zone ?
1404                     &avenrun[0] : zone->zone_avenrun;
1405         }
1406 
1407         /*
1408          * If we're in the non-global zone, we'll report the total number of
1409          * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
1410          * otherwise will just use nthread (which will include kernel threads,
1411          * but should be good enough for lxproc).
1412          */
1413         nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
1414 
1415         mutex_exit(&cpu_lock);
1416 
1417         avenrun1 = loadbuf[0] >> FSHIFT;
1418         avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
1419         avenrun5 = loadbuf[1] >> FSHIFT;
1420         avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
1421         avenrun15 = loadbuf[2] >> FSHIFT;
1422         avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
1423 
1424         lxpr_uiobuf_printf(uiobuf,
1425             "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
1426             avenrun1, avenrun1_cs,
1427             avenrun5, avenrun5_cs,
1428             avenrun15, avenrun15_cs,
1429             nrunnable, nlwps, 0);
1430 }
1431 
1432 /*
1433  * lxpr_read_meminfo(): read the contents of the "meminfo" file.
1434  */
1435 static void
1436 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1437 {
1438         zone_t *zone = LXPTOZ(lxpnp);
1439         int global = zone == global_zone;
1440         long total_mem, free_mem, total_swap, used_swap;
1441 
1442         ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
1443 
1444         if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
1445                 total_mem = physmem * PAGESIZE;
1446                 free_mem = freemem * PAGESIZE;
1447         } else {
1448                 total_mem = zone->zone_phys_mem_ctl;
1449                 free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
1450         }
1451 
1452         if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
1453                 total_swap = k_anoninfo.ani_max * PAGESIZE;
1454                 used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
1455         } else {
1456                 mutex_enter(&zone->zone_mem_lock);
1457                 total_swap = zone->zone_max_swap_ctl;
1458                 used_swap = zone->zone_max_swap;
1459                 mutex_exit(&zone->zone_mem_lock);
1460         }
1461 
1462         lxpr_uiobuf_printf(uiobuf,
1463             "        total:     used:    free:  shared: buffers:  cached:\n"
1464             "Mem:  %8lu %8lu %8lu %8u %8u %8u\n"
1465             "Swap: %8lu %8lu %8lu\n"
1466             "MemTotal:  %8lu kB\n"
1467             "MemFree:   %8lu kB\n"
1468             "MemShared: %8u kB\n"
1469             "Buffers:   %8u kB\n"
1470             "Cached:    %8u kB\n"
1471             "SwapCached:%8u kB\n"
1472             "Active:    %8u kB\n"
1473             "Inactive:  %8u kB\n"
1474             "HighTotal: %8u kB\n"
1475             "HighFree:  %8u kB\n"
1476             "LowTotal:  %8u kB\n"
1477             "LowFree:   %8u kB\n"
1478             "SwapTotal: %8lu kB\n"
1479             "SwapFree:  %8lu kB\n",
1480             total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
1481             total_swap, used_swap, total_swap - used_swap,
1482             btok(total_mem),                            /* MemTotal */
1483             btok(free_mem),                             /* MemFree */
1484             0,                                          /* MemShared */
1485             0,                                          /* Buffers */
1486             0,                                          /* Cached */
1487             0,                                          /* SwapCached */
1488             0,                                          /* Active */
1489             0,                                          /* Inactive */
1490             0,                                          /* HighTotal */
1491             0,                                          /* HighFree */
1492             btok(total_mem),                            /* LowTotal */
1493             btok(free_mem),                             /* LowFree */
1494             btok(total_swap),                           /* SwapTotal */
1495             btok(total_swap - used_swap));              /* SwapFree */
1496 }
1497 
1498 /*
1499  * lxpr_read_mounts():
1500  */
1501 /* ARGSUSED */
1502 static void
1503 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1504 {
1505         struct vfs *vfsp;
1506         struct vfs *vfslist;
1507         zone_t *zone = LXPTOZ(lxpnp);
1508         struct print_data {
1509                 refstr_t *vfs_mntpt;
1510                 refstr_t *vfs_resource;
1511                 uint_t vfs_flag;
1512                 int vfs_fstype;
1513                 struct print_data *next;
1514         } *print_head = NULL;
1515         struct print_data **print_tail = &print_head;
1516         struct print_data *printp;
1517 
1518         vfs_list_read_lock();
1519 
1520         if (zone == global_zone) {
1521                 vfsp = vfslist = rootvfs;
1522         } else {
1523                 vfsp = vfslist = zone->zone_vfslist;
1524                 /*
1525                  * If the zone has a root entry, it will be the first in
1526                  * the list.  If it doesn't, we conjure one up.
1527                  */
1528                 if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
1529                     zone->zone_rootpath) != 0) {
1530                         struct vfs *tvfsp;
1531                         /*
1532                          * The root of the zone is not a mount point.  The vfs
1533                          * we want to report is that of the zone's root vnode.
1534                          */
1535                         tvfsp = zone->zone_rootvp->v_vfsp;
1536 
1537                         lxpr_uiobuf_printf(uiobuf,
1538                             "/ / %s %s 0 0\n",
1539                             vfssw[tvfsp->vfs_fstype].vsw_name,
1540                             tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1541 
1542                 }
1543                 if (vfslist == NULL) {
1544                         vfs_list_unlock();
1545                         return;
1546                 }
1547         }
1548 
1549         /*
1550          * Later on we have to do a lookupname, which can end up causing
1551          * another vfs_list_read_lock() to be called. Which can lead to a
1552          * deadlock. To avoid this, we extract the data we need into a local
1553          * list, then we can run this list without holding vfs_list_read_lock()
1554          * We keep the list in the same order as the vfs_list
1555          */
1556         do {
1557                 /* Skip mounts we shouldn't show */
1558                 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1559                         goto nextfs;
1560                 }
1561 
1562                 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1563                 refstr_hold(vfsp->vfs_mntpt);
1564                 printp->vfs_mntpt = vfsp->vfs_mntpt;
1565                 refstr_hold(vfsp->vfs_resource);
1566                 printp->vfs_resource = vfsp->vfs_resource;
1567                 printp->vfs_flag = vfsp->vfs_flag;
1568                 printp->vfs_fstype = vfsp->vfs_fstype;
1569                 printp->next = NULL;
1570 
1571                 *print_tail = printp;
1572                 print_tail = &printp->next;
1573 
1574 nextfs:
1575                 vfsp = (zone == global_zone) ?
1576                     vfsp->vfs_next : vfsp->vfs_zone_next;
1577 
1578         } while (vfsp != vfslist);
1579 
1580         vfs_list_unlock();
1581 
1582         /*
1583          * now we can run through what we've extracted without holding
1584          * vfs_list_read_lock()
1585          */
1586         printp = print_head;
1587         while (printp != NULL) {
1588                 struct print_data *printp_next;
1589                 const char *resource;
1590                 char *mntpt;
1591                 struct vnode *vp;
1592                 int error;
1593 
1594                 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1595                 resource = refstr_value(printp->vfs_resource);
1596 
1597                 if (mntpt != NULL && mntpt[0] != '\0')
1598                         mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1599                 else
1600                         mntpt = "-";
1601 
1602                 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1603 
1604                 if (error != 0)
1605                         goto nextp;
1606 
1607                 if (!(vp->v_flag & VROOT)) {
1608                         VN_RELE(vp);
1609                         goto nextp;
1610                 }
1611                 VN_RELE(vp);
1612 
1613                 if (resource != NULL && resource[0] != '\0') {
1614                         if (resource[0] == '/') {
1615                                 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1616                                     ZONE_PATH_TRANSLATE(resource, zone) :
1617                                     mntpt;
1618                         }
1619                 } else {
1620                         resource = "-";
1621                 }
1622 
1623                 lxpr_uiobuf_printf(uiobuf,
1624                     "%s %s %s %s 0 0\n",
1625                     resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
1626                     printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1627 
1628 nextp:
1629                 printp_next = printp->next;
1630                 refstr_rele(printp->vfs_mntpt);
1631                 refstr_rele(printp->vfs_resource);
1632                 kmem_free(printp, sizeof (*printp));
1633                 printp = printp_next;
1634 
1635         }
1636 }
1637 
1638 /*
1639  * lxpr_read_partitions():
1640  *
1641  * We don't support partitions in a local zone because it requires access to
1642  * physical devices.  But we need to fake up enough of the file to show that we
1643  * have no partitions.
1644  */
1645 /* ARGSUSED */
1646 static void
1647 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1648 {
1649         lxpr_uiobuf_printf(uiobuf,
1650             "major minor  #blocks  name     rio rmerge rsect ruse "
1651             "wio wmerge wsect wuse running use aveq\n\n");
1652 }
1653 
1654 /*
1655  * lxpr_read_version(): read the contents of the "version" file.  Note that
1656  * we don't lie here -- we don't pretend that we're Linux.  If lxproc is to
1657  * be used in a Linux-branded zone, there will need to be a mount option to
1658  * indicate that Linux should be more fully mimicked.
1659  */
1660 /* ARGSUSED */
1661 static void
1662 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1663 {
1664         lxpr_uiobuf_printf(uiobuf,
1665             "%s version %s (%s version %d.%d.%d) "
1666             "#%s SMP %s\n",
1667             utsname.sysname, utsname.release,
1668 #if defined(__GNUC__)
1669             "gcc",
1670             __GNUC__,
1671             __GNUC_MINOR__,
1672             __GNUC_PATCHLEVEL__,
1673 #else
1674             "Sun C",
1675             __SUNPRO_C / 0x100,
1676             (__SUNPRO_C & 0xff) / 0x10,
1677             __SUNPRO_C & 0xf,
1678 #endif
1679             utsname.version,
1680             "00:00:00 00/00/00");
1681 }
1682 
1683 /*
1684  * lxpr_read_stat(): read the contents of the "stat" file.
1685  *
1686  */
1687 /* ARGSUSED */
1688 static void
1689 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1690 {
1691         cpu_t *cp, *cpstart;
1692         int pools_enabled;
1693         ulong_t idle_cum = 0;
1694         ulong_t sys_cum  = 0;
1695         ulong_t user_cum = 0;
1696         ulong_t irq_cum = 0;
1697         uint_t cpu_nrunnable_cum = 0;
1698         uint_t w_io_cum = 0;
1699 
1700         ulong_t pgpgin_cum    = 0;
1701         ulong_t pgpgout_cum   = 0;
1702         ulong_t pgswapout_cum = 0;
1703         ulong_t pgswapin_cum  = 0;
1704         ulong_t intr_cum = 0;
1705         ulong_t pswitch_cum = 0;
1706         ulong_t forks_cum = 0;
1707         hrtime_t msnsecs[NCMSTATES];
1708 
1709         /* temporary variable since scalehrtime modifies data in place */
1710         hrtime_t tmptime;
1711 
1712         ASSERT(lxpnp->lxpr_type == LXPR_STAT);
1713 
1714         mutex_enter(&cpu_lock);
1715         pools_enabled = pool_pset_enabled();
1716 
1717         /* Calculate cumulative stats */
1718         cp = cpstart = CPU->cpu_part->cp_cpulist;
1719         do {
1720                 int i;
1721 
1722                 /*
1723                  * Don't count CPUs that aren't even in the system
1724                  * or aren't up yet.
1725                  */
1726                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1727                         continue;
1728                 }
1729 
1730                 get_cpu_mstate(cp, msnsecs);
1731 
1732                 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
1733                 sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
1734                 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
1735 
1736                 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
1737                 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
1738                 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
1739                 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
1740 
1741                 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
1742                 w_io_cum += CPU_STATS(cp, sys.iowait);
1743                 for (i = 0; i < NCMSTATES; i++) {
1744                         tmptime = cp->cpu_intracct[i];
1745                         scalehrtime(&tmptime);
1746                         irq_cum += NSEC_TO_TICK(tmptime);
1747                 }
1748 
1749                 for (i = 0; i < PIL_MAX; i++)
1750                         intr_cum += CPU_STATS(cp, sys.intr[i]);
1751 
1752                 pswitch_cum += CPU_STATS(cp, sys.pswitch);
1753                 forks_cum += CPU_STATS(cp, sys.sysfork);
1754                 forks_cum += CPU_STATS(cp, sys.sysvfork);
1755 
1756                 if (pools_enabled)
1757                         cp = cp->cpu_next_part;
1758                 else
1759                         cp = cp->cpu_next;
1760         } while (cp != cpstart);
1761 
1762         lxpr_uiobuf_printf(uiobuf, "cpu %ld %ld %ld %ld %ld %ld %ld\n",
1763             user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0);
1764 
1765         /* Do per processor stats */
1766         do {
1767                 int i;
1768 
1769                 ulong_t idle_ticks;
1770                 ulong_t sys_ticks;
1771                 ulong_t user_ticks;
1772                 ulong_t irq_ticks = 0;
1773 
1774                 /*
1775                  * Don't count CPUs that aren't even in the system
1776                  * or aren't up yet.
1777                  */
1778                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1779                         continue;
1780                 }
1781 
1782                 get_cpu_mstate(cp, msnsecs);
1783 
1784                 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
1785                 sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
1786                 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
1787 
1788                 for (i = 0; i < NCMSTATES; i++) {
1789                         tmptime = cp->cpu_intracct[i];
1790                         scalehrtime(&tmptime);
1791                         irq_ticks += NSEC_TO_TICK(tmptime);
1792                 }
1793 
1794                 lxpr_uiobuf_printf(uiobuf,
1795                     "cpu%d %ld %ld %ld %ld %ld %ld %ld\n",
1796                     cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks,
1797                     0, irq_ticks, 0);
1798 
1799                 if (pools_enabled)
1800                         cp = cp->cpu_next_part;
1801                 else
1802                         cp = cp->cpu_next;
1803         } while (cp != cpstart);
1804 
1805         mutex_exit(&cpu_lock);
1806 
1807         lxpr_uiobuf_printf(uiobuf,
1808             "page %lu %lu\n"
1809             "swap %lu %lu\n"
1810             "intr %lu\n"
1811             "ctxt %lu\n"
1812             "btime %lu\n"
1813             "processes %lu\n"
1814             "procs_running %lu\n"
1815             "procs_blocked %lu\n",
1816             pgpgin_cum, pgpgout_cum,
1817             pgswapin_cum, pgswapout_cum,
1818             intr_cum,
1819             pswitch_cum,
1820             boot_time,
1821             forks_cum,
1822             cpu_nrunnable_cum,
1823             w_io_cum);
1824 }
1825 
1826 /*
1827  * lxpr_read_uptime(): read the contents of the "uptime" file.
1828  *
1829  * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
1830  * Use fixed point arithmetic to get 2 decimal places
1831  */
1832 /* ARGSUSED */
1833 static void
1834 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1835 {
1836         cpu_t *cp, *cpstart;
1837         int pools_enabled;
1838         ulong_t idle_cum = 0;
1839         ulong_t cpu_count = 0;
1840         ulong_t idle_s;
1841         ulong_t idle_cs;
1842         ulong_t up_s;
1843         ulong_t up_cs;
1844         hrtime_t birthtime;
1845         hrtime_t centi_sec = 10000000;  /* 10^7 */
1846 
1847         ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
1848 
1849         /* Calculate cumulative stats */
1850         mutex_enter(&cpu_lock);
1851         pools_enabled = pool_pset_enabled();
1852 
1853         cp = cpstart = CPU;
1854         do {
1855                 /*
1856                  * Don't count CPUs that aren't even in the system
1857                  * or aren't up yet.
1858                  */
1859                 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1860                         continue;
1861                 }
1862 
1863                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
1864                 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
1865                 cpu_count += 1;
1866 
1867                 if (pools_enabled)
1868                         cp = cp->cpu_next_part;
1869                 else
1870                         cp = cp->cpu_next;
1871         } while (cp != cpstart);
1872         mutex_exit(&cpu_lock);
1873 
1874         /* Getting the Zone zsched process startup time */
1875         birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
1876         up_cs = (gethrtime() - birthtime) / centi_sec;
1877         up_s = up_cs / 100;
1878         up_cs %= 100;
1879 
1880         ASSERT(cpu_count > 0);
1881         idle_cum /= cpu_count;
1882         idle_s = idle_cum / hz;
1883         idle_cs = idle_cum % hz;
1884         idle_cs *= 100;
1885         idle_cs /= hz;
1886 
1887         lxpr_uiobuf_printf(uiobuf,
1888             "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
1889 }
1890 
1891 static const char *amd_x_edx[] = {
1892         NULL,   NULL,   NULL,   NULL,
1893         NULL,   NULL,   NULL,   NULL,
1894         NULL,   NULL,   NULL,   "syscall",
1895         NULL,   NULL,   NULL,   NULL,
1896         NULL,   NULL,   NULL,   "mp",
1897         "nx",   NULL,   "mmxext", NULL,
1898         NULL,   NULL,   NULL,   NULL,
1899         NULL,   "lm",   "3dnowext", "3dnow"
1900 };
1901 
1902 static const char *amd_x_ecx[] = {
1903         "lahf_lm", NULL, "svm", NULL,
1904         "altmovcr8"
1905 };
1906 
1907 static const char *tm_x_edx[] = {
1908         "recovery", "longrun", NULL, "lrti"
1909 };
1910 
1911 /*
1912  * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
1913  */
1914 static const char *intc_x_edx[] = {
1915         NULL,   NULL,   NULL,   NULL,
1916         NULL,   NULL,   NULL,   NULL,
1917         NULL,   NULL,   NULL,   "syscall",
1918         NULL,   NULL,   NULL,   NULL,
1919         NULL,   NULL,   NULL,   NULL,
1920         "nx",   NULL,   NULL,   NULL,
1921         NULL,   NULL,   NULL,   NULL,
1922         NULL,   "lm",   NULL,   NULL
1923 };
1924 
1925 static const char *intc_edx[] = {
1926         "fpu",  "vme",  "de",   "pse",
1927         "tsc",  "msr",  "pae",  "mce",
1928         "cx8",  "apic",  NULL,  "sep",
1929         "mtrr", "pge",  "mca",  "cmov",
1930         "pat",  "pse36", "pn",  "clflush",
1931         NULL,   "dts",  "acpi", "mmx",
1932         "fxsr", "sse",  "sse2", "ss",
1933         "ht",   "tm",   "ia64", "pbe"
1934 };
1935 
1936 /*
1937  * "sse3" on linux is called "pni" (Prescott New Instructions).
1938  */
1939 static const char *intc_ecx[] = {
1940         "pni",  NULL,   NULL, "monitor",
1941         "ds_cpl", NULL, NULL, "est",
1942         "tm2",  NULL,   "cid", NULL,
1943         NULL,   "cx16", "xtpr"
1944 };
1945 
1946 static void
1947 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1948 {
1949         int i;
1950         uint32_t bits;
1951         cpu_t *cp, *cpstart;
1952         int pools_enabled;
1953         const char **fp;
1954         char brandstr[CPU_IDSTRLEN];
1955         struct cpuid_regs cpr;
1956         int maxeax;
1957         int std_ecx, std_edx, ext_ecx, ext_edx;
1958 
1959         ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
1960 
1961         mutex_enter(&cpu_lock);
1962         pools_enabled = pool_pset_enabled();
1963 
1964         cp = cpstart = CPU;
1965         do {
1966                 /*
1967                  * This returns the maximum eax value for standard cpuid
1968                  * functions in eax.
1969                  */
1970                 cpr.cp_eax = 0;
1971                 (void) cpuid_insn(cp, &cpr);
1972                 maxeax = cpr.cp_eax;
1973 
1974                 /*
1975                  * Get standard x86 feature flags.
1976                  */
1977                 cpr.cp_eax = 1;
1978                 (void) cpuid_insn(cp, &cpr);
1979                 std_ecx = cpr.cp_ecx;
1980                 std_edx = cpr.cp_edx;
1981 
1982                 /*
1983                  * Now get extended feature flags.
1984                  */
1985                 cpr.cp_eax = 0x80000001;
1986                 (void) cpuid_insn(cp, &cpr);
1987                 ext_ecx = cpr.cp_ecx;
1988                 ext_edx = cpr.cp_edx;
1989 
1990                 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
1991 
1992                 lxpr_uiobuf_printf(uiobuf,
1993                     "processor\t: %d\n"
1994                     "vendor_id\t: %s\n"
1995                     "cpu family\t: %d\n"
1996                     "model\t\t: %d\n"
1997                     "model name\t: %s\n"
1998                     "stepping\t: %d\n"
1999                     "cpu MHz\t\t: %u.%03u\n",
2000                     cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
2001                     cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
2002                     (uint32_t)(cpu_freq_hz / 1000000),
2003                     ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
2004 
2005                 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
2006                     getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
2007 
2008                 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
2009                         /*
2010                          * 'siblings' is used for HT-style threads
2011                          */
2012                         lxpr_uiobuf_printf(uiobuf,
2013                             "physical id\t: %lu\n"
2014                             "siblings\t: %u\n",
2015                             pg_plat_hw_instance_id(cp, PGHW_CHIP),
2016                             cpuid_get_ncpu_per_chip(cp));
2017                 }
2018 
2019                 /*
2020                  * Since we're relatively picky about running on older hardware,
2021                  * we can be somewhat cavalier about the answers to these ones.
2022                  *
2023                  * In fact, given the hardware we support, we just say:
2024                  *
2025                  *      fdiv_bug        : no    (if we're on a 64-bit kernel)
2026                  *      hlt_bug         : no
2027                  *      f00f_bug        : no
2028                  *      coma_bug        : no
2029                  *      wp              : yes   (write protect in supervsr mode)
2030                  */
2031                 lxpr_uiobuf_printf(uiobuf,
2032                     "fdiv_bug\t: %s\n"
2033                     "hlt_bug \t: no\n"
2034                     "f00f_bug\t: no\n"
2035                     "coma_bug\t: no\n"
2036                     "fpu\t\t: %s\n"
2037                     "fpu_exception\t: %s\n"
2038                     "cpuid level\t: %d\n"
2039                     "flags\t\t:",
2040 #if defined(__i386)
2041                     fpu_pentium_fdivbug ? "yes" : "no",
2042 #else
2043                     "no",
2044 #endif /* __i386 */
2045                     fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
2046                     maxeax);
2047 
2048                 for (bits = std_edx, fp = intc_edx, i = 0;
2049                     i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
2050                         if ((bits & (1 << i)) != 0 && *fp)
2051                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2052 
2053                 /*
2054                  * name additional features where appropriate
2055                  */
2056                 switch (x86_vendor) {
2057                 case X86_VENDOR_Intel:
2058                         for (bits = ext_edx, fp = intc_x_edx, i = 0;
2059                             i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
2060                             fp++, i++)
2061                                 if ((bits & (1 << i)) != 0 && *fp)
2062                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2063                         break;
2064 
2065                 case X86_VENDOR_AMD:
2066                         for (bits = ext_edx, fp = amd_x_edx, i = 0;
2067                             i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
2068                             fp++, i++)
2069                                 if ((bits & (1 << i)) != 0 && *fp)
2070                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2071 
2072                         for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
2073                             i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
2074                             fp++, i++)
2075                                 if ((bits & (1 << i)) != 0 && *fp)
2076                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2077                         break;
2078 
2079                 case X86_VENDOR_TM:
2080                         for (bits = ext_edx, fp = tm_x_edx, i = 0;
2081                             i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
2082                             fp++, i++)
2083                                 if ((bits & (1 << i)) != 0 && *fp)
2084                                         lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2085                         break;
2086                 default:
2087                         break;
2088                 }
2089 
2090                 for (bits = std_ecx, fp = intc_ecx, i = 0;
2091                     i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
2092                         if ((bits & (1 << i)) != 0 && *fp)
2093                                 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2094 
2095                 lxpr_uiobuf_printf(uiobuf, "\n\n");
2096 
2097                 if (pools_enabled)
2098                         cp = cp->cpu_next_part;
2099                 else
2100                         cp = cp->cpu_next;
2101         } while (cp != cpstart);
2102 
2103         mutex_exit(&cpu_lock);
2104 }
2105 
2106 /* ARGSUSED */
2107 static void
2108 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2109 {
2110         ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
2111         lxpr_uiobuf_seterr(uiobuf, EFAULT);
2112 }
2113 
2114 /*
2115  * lxpr_getattr(): Vnode operation for VOP_GETATTR()
2116  */
2117 static int
2118 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2119     caller_context_t *ct)
2120 {
2121         register lxpr_node_t *lxpnp = VTOLXP(vp);
2122         lxpr_nodetype_t type = lxpnp->lxpr_type;
2123         extern uint_t nproc;
2124         int error;
2125 
2126         /*
2127          * Return attributes of underlying vnode if ATTR_REAL
2128          *
2129          * but keep fd files with the symlink permissions
2130          */
2131         if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
2132                 vnode_t *rvp = lxpnp->lxpr_realvp;
2133 
2134                 /*
2135                  * withold attribute information to owner or root
2136                  */
2137                 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
2138                         return (error);
2139                 }
2140 
2141                 /*
2142                  * now its attributes
2143                  */
2144                 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
2145                         return (error);
2146                 }
2147 
2148                 /*
2149                  * if it's a file in lx /proc/pid/fd/xx then set its
2150                  * mode and keep it looking like a symlink
2151                  */
2152                 if (type == LXPR_PID_FD_FD) {
2153                         vap->va_mode = lxpnp->lxpr_mode;
2154                         vap->va_type = vp->v_type;
2155                         vap->va_size = 0;
2156                         vap->va_nlink = 1;
2157                 }
2158                 return (0);
2159         }
2160 
2161         /* Default attributes, that may be overridden below */
2162         bzero(vap, sizeof (*vap));
2163         vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
2164         vap->va_nlink = 1;
2165         vap->va_type = vp->v_type;
2166         vap->va_mode = lxpnp->lxpr_mode;
2167         vap->va_fsid = vp->v_vfsp->vfs_dev;
2168         vap->va_blksize = DEV_BSIZE;
2169         vap->va_uid = lxpnp->lxpr_uid;
2170         vap->va_gid = lxpnp->lxpr_gid;
2171         vap->va_nodeid = lxpnp->lxpr_ino;
2172 
2173         switch (type) {
2174         case LXPR_PROCDIR:
2175                 vap->va_nlink = nproc + 2 + PROCDIRFILES;
2176                 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
2177                 break;
2178         case LXPR_PIDDIR:
2179                 vap->va_nlink = PIDDIRFILES;
2180                 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
2181                 break;
2182         case LXPR_SELF:
2183                 vap->va_uid = crgetruid(curproc->p_cred);
2184                 vap->va_gid = crgetrgid(curproc->p_cred);
2185                 break;
2186         default:
2187                 break;
2188         }
2189 
2190         vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
2191         return (0);
2192 }
2193 
2194 /*
2195  * lxpr_access(): Vnode operation for VOP_ACCESS()
2196  */
2197 static int
2198 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
2199 {
2200         lxpr_node_t *lxpnp = VTOLXP(vp);
2201         int shift = 0;
2202         proc_t *tp;
2203 
2204         /* lx /proc is a read only file system */
2205         if (mode & VWRITE)
2206                 return (EROFS);
2207 
2208         /*
2209          * If this is a restricted file, check access permissions.
2210          */
2211         switch (lxpnp->lxpr_type) {
2212         case LXPR_PIDDIR:
2213                 return (0);
2214         case LXPR_PID_CURDIR:
2215         case LXPR_PID_ENV:
2216         case LXPR_PID_EXE:
2217         case LXPR_PID_MAPS:
2218         case LXPR_PID_MEM:
2219         case LXPR_PID_ROOTDIR:
2220         case LXPR_PID_FDDIR:
2221         case LXPR_PID_FD_FD:
2222                 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
2223                         return (ENOENT);
2224                 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
2225                     priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
2226                         lxpr_unlock(tp);
2227                         return (EACCES);
2228                 }
2229                 lxpr_unlock(tp);
2230         default:
2231                 break;
2232         }
2233 
2234         if (lxpnp->lxpr_realvp != NULL) {
2235                 /*
2236                  * For these we use the underlying vnode's accessibility.
2237                  */
2238                 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
2239         }
2240 
2241         /* If user is root allow access regardless of permission bits */
2242         if (secpolicy_proc_access(cr) == 0)
2243                 return (0);
2244 
2245         /*
2246          * Access check is based on only one of owner, group, public.  If not
2247          * owner, then check group.  If not a member of the group, then check
2248          * public access.
2249          */
2250         if (crgetuid(cr) != lxpnp->lxpr_uid) {
2251                 shift += 3;
2252                 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
2253                         shift += 3;
2254         }
2255 
2256         mode &= ~(lxpnp->lxpr_mode << shift);
2257 
2258         if (mode == 0)
2259                 return (0);
2260 
2261         return (EACCES);
2262 }
2263 
2264 /* ARGSUSED */
2265 static vnode_t *
2266 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
2267 {
2268         return (NULL);
2269 }
2270 
2271 /*
2272  * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
2273  */
2274 /* ARGSUSED */
2275 static int
2276 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
2277         int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2278         int *direntflags, pathname_t *realpnp)
2279 {
2280         lxpr_node_t *lxpnp = VTOLXP(dp);
2281         lxpr_nodetype_t type = lxpnp->lxpr_type;
2282         int error;
2283 
2284         ASSERT(dp->v_type == VDIR);
2285         ASSERT(type >= 0 && type < LXPR_NFILES);
2286 
2287         /*
2288          * we should never get here because the lookup
2289          * is done on the realvp for these nodes
2290          */
2291         ASSERT(type != LXPR_PID_FD_FD &&
2292             type != LXPR_PID_CURDIR &&
2293             type != LXPR_PID_ROOTDIR);
2294 
2295         /*
2296          * restrict lookup permission to owner or root
2297          */
2298         if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
2299                 return (error);
2300         }
2301 
2302         /*
2303          * Just return the parent vnode if that's where we are trying to go.
2304          */
2305         if (strcmp(comp, "..") == 0) {
2306                 VN_HOLD(lxpnp->lxpr_parent);
2307                 *vpp = lxpnp->lxpr_parent;
2308                 return (0);
2309         }
2310 
2311         /*
2312          * Special handling for directory searches.  Note: null component name
2313          * denotes that the current directory is being searched.
2314          */
2315         if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
2316                 VN_HOLD(dp);
2317                 *vpp = dp;
2318                 return (0);
2319         }
2320 
2321         *vpp = (lxpr_lookup_function[type](dp, comp));
2322         return ((*vpp == NULL) ? ENOENT : 0);
2323 }
2324 
2325 /*
2326  * Do a sequential search on the given directory table
2327  */
2328 static vnode_t *
2329 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
2330     lxpr_dirent_t *dirtab, int dirtablen)
2331 {
2332         lxpr_node_t *lxpnp;
2333         int count;
2334 
2335         for (count = 0; count < dirtablen; count++) {
2336                 if (strcmp(dirtab[count].d_name, comp) == 0) {
2337                         lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
2338                         dp = LXPTOV(lxpnp);
2339                         ASSERT(dp != NULL);
2340                         return (dp);
2341                 }
2342         }
2343         return (NULL);
2344 }
2345 
2346 static vnode_t *
2347 lxpr_lookup_piddir(vnode_t *dp, char *comp)
2348 {
2349         proc_t *p;
2350 
2351         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
2352 
2353         p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
2354         if (p == NULL)
2355                 return (NULL);
2356 
2357         dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
2358 
2359         lxpr_unlock(p);
2360 
2361         return (dp);
2362 }
2363 
2364 /*
2365  * Lookup one of the process's open files.
2366  */
2367 static vnode_t *
2368 lxpr_lookup_fddir(vnode_t *dp, char *comp)
2369 {
2370         lxpr_node_t *dlxpnp = VTOLXP(dp);
2371         lxpr_node_t *lxpnp;
2372         vnode_t *vp = NULL;
2373         proc_t *p;
2374         file_t *fp;
2375         uint_t fd;
2376         int c;
2377         uf_entry_t *ufp;
2378         uf_info_t *fip;
2379 
2380         ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
2381 
2382         /*
2383          * convert the string rendition of the filename
2384          * to a file descriptor
2385          */
2386         fd = 0;
2387         while ((c = *comp++) != '\0') {
2388                 int ofd;
2389                 if (c < '0' || c > '9')
2390                         return (NULL);
2391 
2392                 ofd = fd;
2393                 fd = 10*fd + c - '0';
2394                 /* integer overflow */
2395                 if (fd / 10 != ofd)
2396                         return (NULL);
2397         }
2398 
2399         /*
2400          * get the proc to work with and lock it
2401          */
2402         p = lxpr_lock(dlxpnp->lxpr_pid);
2403         if ((p == NULL))
2404                 return (NULL);
2405 
2406         /*
2407          * If the process is a zombie or system process
2408          * it can't have any open files.
2409          */
2410         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
2411                 lxpr_unlock(p);
2412                 return (NULL);
2413         }
2414 
2415         /*
2416          * get us a fresh node/vnode
2417          */
2418         lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
2419 
2420         /*
2421          * get open file info
2422          */
2423         fip = (&(p)->p_user.u_finfo);
2424         mutex_enter(&fip->fi_lock);
2425 
2426         /*
2427          * got the fd data so now done with this proc
2428          */
2429         lxpr_unlock(p);
2430 
2431         if (fd < fip->fi_nfiles) {
2432                 UF_ENTER(ufp, fip, fd);
2433                 /*
2434                  * ensure the fd is still kosher.
2435                  * it may have gone between the readdir and
2436                  * the lookup
2437                  */
2438                 if (fip->fi_list[fd].uf_file == NULL) {
2439                         mutex_exit(&fip->fi_lock);
2440                         UF_EXIT(ufp);
2441                         lxpr_freenode(lxpnp);
2442                         return (NULL);
2443                 }
2444 
2445                 if ((fp = ufp->uf_file) != NULL)
2446                         vp = fp->f_vnode;
2447                 UF_EXIT(ufp);
2448         }
2449         mutex_exit(&fip->fi_lock);
2450 
2451         if (vp == NULL) {
2452                 lxpr_freenode(lxpnp);
2453                 return (NULL);
2454         } else {
2455                 /*
2456                  * Fill in the lxpr_node so future references will be able to
2457                  * find the underlying vnode. The vnode is held on the realvp.
2458                  */
2459                 lxpnp->lxpr_realvp = vp;
2460                 VN_HOLD(lxpnp->lxpr_realvp);
2461         }
2462 
2463         dp = LXPTOV(lxpnp);
2464         ASSERT(dp != NULL);
2465 
2466         return (dp);
2467 }
2468 
2469 static vnode_t *
2470 lxpr_lookup_netdir(vnode_t *dp, char *comp)
2471 {
2472         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
2473 
2474         dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
2475 
2476         return (dp);
2477 }
2478 
2479 static vnode_t *
2480 lxpr_lookup_procdir(vnode_t *dp, char *comp)
2481 {
2482         ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
2483 
2484         /*
2485          * We know all the names of files & dirs in our file system structure
2486          * except those that are pid names.  These change as pids are created/
2487          * deleted etc., so we just look for a number as the first char to see
2488          * if we are we doing pid lookups.
2489          *
2490          * Don't need to check for "self" as it is implemented as a symlink
2491          */
2492         if (*comp >= '0' && *comp <= '9') {
2493                 pid_t pid = 0;
2494                 lxpr_node_t *lxpnp = NULL;
2495                 proc_t *p;
2496                 int c;
2497 
2498                 while ((c = *comp++) != '\0')
2499                         pid = 10 * pid + c - '0';
2500 
2501                 /*
2502                  * Can't continue if the process is still loading or it doesn't
2503                  * really exist yet (or maybe it just died!)
2504                  */
2505                 p = lxpr_lock(pid);
2506                 if (p == NULL)
2507                         return (NULL);
2508 
2509                 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
2510                         lxpr_unlock(p);
2511                         return (NULL);
2512                 }
2513 
2514                 /*
2515                  * allocate and fill in a new lxpr node
2516                  */
2517                 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
2518 
2519                 lxpr_unlock(p);
2520 
2521                 dp = LXPTOV(lxpnp);
2522                 ASSERT(dp != NULL);
2523 
2524                 return (dp);
2525         }
2526 
2527         /* Lookup fixed names */
2528         return (lxpr_lookup_common(dp, comp, NULL, lxpr_dir, PROCDIRFILES));
2529 }
2530 
2531 /*
2532  * lxpr_readdir(): Vnode operation for VOP_READDIR()
2533  */
2534 /* ARGSUSED */
2535 static int
2536 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
2537         caller_context_t *ct, int flags)
2538 {
2539         lxpr_node_t *lxpnp = VTOLXP(dp);
2540         lxpr_nodetype_t type = lxpnp->lxpr_type;
2541         ssize_t uresid;
2542         off_t uoffset;
2543         int error;
2544 
2545         ASSERT(dp->v_type == VDIR);
2546         ASSERT(type >= 0 && type < LXPR_NFILES);
2547 
2548         /*
2549          * we should never get here because the readdir
2550          * is done on the realvp for these nodes
2551          */
2552         ASSERT(type != LXPR_PID_FD_FD &&
2553             type != LXPR_PID_CURDIR &&
2554             type != LXPR_PID_ROOTDIR);
2555 
2556         /*
2557          * restrict readdir permission to owner or root
2558          */
2559         if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
2560                 return (error);
2561 
2562         uoffset = uiop->uio_offset;
2563         uresid = uiop->uio_resid;
2564 
2565         /* can't do negative reads */
2566         if (uoffset < 0 || uresid <= 0)
2567                 return (EINVAL);
2568 
2569         /* can't read directory entries that don't exist! */
2570         if (uoffset % LXPR_SDSIZE)
2571                 return (ENOENT);
2572 
2573         return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
2574 }
2575 
2576 /* ARGSUSED */
2577 static int
2578 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2579 {
2580         return (ENOTDIR);
2581 }
2582 
2583 /*
2584  * This has the common logic for returning directory entries
2585  */
2586 static int
2587 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
2588     lxpr_dirent_t *dirtab, int dirtablen)
2589 {
2590         /* bp holds one dirent64 structure */
2591         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2592         dirent64_t *dirent = (dirent64_t *)bp;
2593         ssize_t oresid; /* save a copy for testing later */
2594         ssize_t uresid;
2595 
2596         oresid = uiop->uio_resid;
2597 
2598         /* clear out the dirent buffer */
2599         bzero(bp, sizeof (bp));
2600 
2601         /*
2602          * Satisfy user request
2603          */
2604         while ((uresid = uiop->uio_resid) > 0) {
2605                 int dirindex;
2606                 off_t uoffset;
2607                 int reclen;
2608                 int error;
2609 
2610                 uoffset = uiop->uio_offset;
2611                 dirindex  = (uoffset / LXPR_SDSIZE) - 2;
2612 
2613                 if (uoffset == 0) {
2614 
2615                         dirent->d_ino = lxpnp->lxpr_ino;
2616                         dirent->d_name[0] = '.';
2617                         dirent->d_name[1] = '\0';
2618                         reclen = DIRENT64_RECLEN(1);
2619 
2620                 } else if (uoffset == LXPR_SDSIZE) {
2621 
2622                         dirent->d_ino = lxpr_parentinode(lxpnp);
2623                         dirent->d_name[0] = '.';
2624                         dirent->d_name[1] = '.';
2625                         dirent->d_name[2] = '\0';
2626                         reclen = DIRENT64_RECLEN(2);
2627 
2628                 } else if (dirindex < dirtablen) {
2629                         int slen = strlen(dirtab[dirindex].d_name);
2630 
2631                         dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
2632                             lxpnp->lxpr_pid, 0);
2633 
2634                         ASSERT(slen < LXPNSIZ);
2635                         (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
2636                         reclen = DIRENT64_RECLEN(slen);
2637 
2638                 } else {
2639                         /* Run out of table entries */
2640                         if (eofp) {
2641                                 *eofp = 1;
2642                         }
2643                         return (0);
2644                 }
2645 
2646                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2647                 dirent->d_reclen = (ushort_t)reclen;
2648 
2649                 /*
2650                  * if the size of the data to transfer is greater
2651                  * that that requested then we can't do it this transfer.
2652                  */
2653                 if (reclen > uresid) {
2654                         /*
2655                          * Error if no entries have been returned yet.
2656                          */
2657                         if (uresid == oresid) {
2658                                 return (EINVAL);
2659                         }
2660                         break;
2661                 }
2662 
2663                 /*
2664                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
2665                  * by the same amount.  But we want uiop->uio_offset to change
2666                  * in increments of LXPR_SDSIZE, which is different from the
2667                  * number of bytes being returned to the user.  So we set
2668                  * uiop->uio_offset separately, ignoring what uiomove() does.
2669                  */
2670                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
2671                     uiop)) != 0)
2672                         return (error);
2673 
2674                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
2675         }
2676 
2677         /* Have run out of space, but could have just done last table entry */
2678         if (eofp) {
2679                 *eofp =
2680                     (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
2681         }
2682         return (0);
2683 }
2684 
2685 
2686 static int
2687 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2688 {
2689         /* bp holds one dirent64 structure */
2690         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2691         dirent64_t *dirent = (dirent64_t *)bp;
2692         ssize_t oresid; /* save a copy for testing later */
2693         ssize_t uresid;
2694         off_t uoffset;
2695         zoneid_t zoneid;
2696         pid_t pid;
2697         int error;
2698         int ceof;
2699 
2700         ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
2701 
2702         oresid = uiop->uio_resid;
2703         zoneid = LXPTOZ(lxpnp)->zone_id;
2704 
2705         /*
2706          * We return directory entries in the order: "." and ".." then the
2707          * unique lxproc files, then the directories corresponding to the
2708          * running processes.  We have defined this as the ordering because
2709          * it allows us to more easily keep track of where we are betwen calls
2710          * to getdents().  If the number of processes changes between calls
2711          * then we can't lose track of where we are in the lxproc files.
2712          */
2713 
2714         /* Do the fixed entries */
2715         error = lxpr_readdir_common(lxpnp, uiop, &ceof, lxpr_dir,
2716             PROCDIRFILES);
2717 
2718         /* Finished if we got an error or if we couldn't do all the table */
2719         if (error != 0 || ceof == 0)
2720                 return (error);
2721 
2722         /* clear out the dirent buffer */
2723         bzero(bp, sizeof (bp));
2724 
2725         /* Do the process entries */
2726         while ((uresid = uiop->uio_resid) > 0) {
2727                 proc_t *p;
2728                 int len;
2729                 int reclen;
2730                 int i;
2731 
2732                 uoffset = uiop->uio_offset;
2733 
2734                 /*
2735                  * Stop when entire proc table has been examined.
2736                  */
2737                 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
2738                 if (i >= v.v_proc) {
2739                         /* Run out of table entries */
2740                         if (eofp) {
2741                                 *eofp = 1;
2742                         }
2743                         return (0);
2744                 }
2745                 mutex_enter(&pidlock);
2746 
2747                 /*
2748                  * Skip indices for which there is no pid_entry, PIDs for
2749                  * which there is no corresponding process, a PID of 0,
2750                  * and anything the security policy doesn't allow
2751                  * us to look at.
2752                  */
2753                 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
2754                     p->p_pid == 0 ||
2755                     secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
2756                         mutex_exit(&pidlock);
2757                         goto next;
2758                 }
2759                 mutex_exit(&pidlock);
2760 
2761                 /*
2762                  * Convert pid to the Linux default of 1 if we're the zone's
2763                  * init process, otherwise use the value from the proc
2764                  * structure
2765                  */
2766                 pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
2767                     p->p_pid : 1);
2768 
2769                 /*
2770                  * If this /proc was mounted in the global zone, view
2771                  * all procs; otherwise, only view zone member procs.
2772                  */
2773                 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
2774                         goto next;
2775                 }
2776 
2777                 ASSERT(p->p_stat != 0);
2778 
2779                 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
2780                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
2781                 ASSERT(len < LXPNSIZ);
2782                 reclen = DIRENT64_RECLEN(len);
2783 
2784                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2785                 dirent->d_reclen = (ushort_t)reclen;
2786 
2787                 /*
2788                  * if the size of the data to transfer is greater
2789                  * that that requested then we can't do it this transfer.
2790                  */
2791                 if (reclen > uresid) {
2792                         /*
2793                          * Error if no entries have been returned yet.
2794                          */
2795                         if (uresid == oresid)
2796                                 return (EINVAL);
2797                         break;
2798                 }
2799 
2800                 /*
2801                  * uiomove() updates both uiop->uio_resid and uiop->uio_offset
2802                  * by the same amount.  But we want uiop->uio_offset to change
2803                  * in increments of LXPR_SDSIZE, which is different from the
2804                  * number of bytes being returned to the user.  So we set
2805                  * uiop->uio_offset separately, in the increment of this for
2806                  * the loop, ignoring what uiomove() does.
2807                  */
2808                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
2809                     uiop)) != 0)
2810                         return (error);
2811 next:
2812                 uiop->uio_offset = uoffset + LXPR_SDSIZE;
2813         }
2814 
2815         if (eofp != NULL) {
2816                 *eofp = (uiop->uio_offset >=
2817                     ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
2818         }
2819 
2820         return (0);
2821 }
2822 
2823 static int
2824 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2825 {
2826         proc_t *p;
2827 
2828         ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
2829 
2830         /* can't read its contents if it died */
2831         mutex_enter(&pidlock);
2832 
2833         p = prfind((lxpnp->lxpr_pid == 1) ?
2834             curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
2835 
2836         if (p == NULL || p->p_stat == SIDL) {
2837                 mutex_exit(&pidlock);
2838                 return (ENOENT);
2839         }
2840         mutex_exit(&pidlock);
2841 
2842         return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
2843 }
2844 
2845 static int
2846 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2847 {
2848         ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
2849         return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
2850 }
2851 
2852 static int
2853 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2854 {
2855         /* bp holds one dirent64 structure */
2856         longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2857         dirent64_t *dirent = (dirent64_t *)bp;
2858         ssize_t oresid; /* save a copy for testing later */
2859         ssize_t uresid;
2860         off_t uoffset;
2861         int error;
2862         int ceof;
2863         proc_t *p;
2864         int fddirsize;
2865         uf_info_t *fip;
2866 
2867         ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
2868 
2869         oresid = uiop->uio_resid;
2870 
2871         /* can't read its contents if it died */
2872         p = lxpr_lock(lxpnp->lxpr_pid);
2873         if (p == NULL)
2874                 return (ENOENT);
2875 
2876         /* Get open file info */
2877         fip = (&(p)->p_user.u_finfo);
2878 
2879         if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
2880                 fddirsize = 0;
2881         } else {
2882                 fddirsize = fip->fi_nfiles;
2883         }
2884 
2885         mutex_enter(&fip->fi_lock);
2886         lxpr_unlock(p);
2887 
2888         /* Do the fixed entries (in this case just "." & "..") */
2889         error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
2890 
2891         /* Finished if we got an error or if we couldn't do all the table */
2892         if (error != 0 || ceof == 0)
2893                 return (error);
2894 
2895         /* clear out the dirent buffer */
2896         bzero(bp, sizeof (bp));
2897 
2898         /*
2899          * Loop until user's request is satisfied or until
2900          * all file descriptors have been examined.
2901          */
2902         for (; (uresid = uiop->uio_resid) > 0;
2903             uiop->uio_offset = uoffset + LXPR_SDSIZE) {
2904                 int reclen;
2905                 int fd;
2906                 int len;
2907 
2908                 uoffset = uiop->uio_offset;
2909 
2910                 /*
2911                  * Stop at the end of the fd list
2912                  */
2913                 fd = (uoffset / LXPR_SDSIZE) - 2;
2914                 if (fd >= fddirsize) {
2915                         if (eofp) {
2916                                 *eofp = 1;
2917                         }
2918                         goto out;
2919                 }
2920 
2921                 if (fip->fi_list[fd].uf_file == NULL)
2922                         continue;
2923 
2924                 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
2925                 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
2926                 ASSERT(len < LXPNSIZ);
2927                 reclen = DIRENT64_RECLEN(len);
2928 
2929                 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2930                 dirent->d_reclen = (ushort_t)reclen;
2931 
2932                 if (reclen > uresid) {
2933                         /*
2934                          * Error if no entries have been returned yet.
2935                          */
2936                         if (uresid == oresid)
2937                                 error = EINVAL;
2938                         goto out;
2939                 }
2940 
2941                 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
2942                     uiop)) != 0)
2943                         goto out;
2944         }
2945 
2946         if (eofp != NULL) {
2947                 *eofp =
2948                     (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
2949         }
2950 
2951 out:
2952         mutex_exit(&fip->fi_lock);
2953         return (error);
2954 }
2955 
2956 
2957 /*
2958  * lxpr_readlink(): Vnode operation for VOP_READLINK()
2959  */
2960 /* ARGSUSED */
2961 static int
2962 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2963 {
2964         char bp[MAXPATHLEN + 1];
2965         size_t buflen = sizeof (bp);
2966         lxpr_node_t *lxpnp = VTOLXP(vp);
2967         vnode_t *rvp = lxpnp->lxpr_realvp;
2968         pid_t pid;
2969         int error = 0;
2970 
2971         /* must be a symbolic link file */
2972         if (vp->v_type != VLNK)
2973                 return (EINVAL);
2974 
2975         /* Try to produce a symlink name for anything that has a realvp */
2976         if (rvp != NULL) {
2977                 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
2978                         return (error);
2979                 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
2980                         return (error);
2981         } else {
2982                 switch (lxpnp->lxpr_type) {
2983                 case LXPR_SELF:
2984                         /*
2985                          * Convert pid to the Linux default of 1 if we're the
2986                          * zone's init process
2987                          */
2988                         pid = ((curproc->p_pid !=
2989                             curproc->p_zone->zone_proc_initpid)
2990                             ? curproc->p_pid : 1);
2991 
2992                         /*
2993                          * Don't need to check result as every possible int
2994                          * will fit within MAXPATHLEN bytes.
2995                          */
2996                         (void) snprintf(bp, buflen, "%d", pid);
2997                         break;
2998                 case LXPR_PID_CURDIR:
2999                 case LXPR_PID_ROOTDIR:
3000                 case LXPR_PID_EXE:
3001                         return (EACCES);
3002                 default:
3003                         /*
3004                          * Need to return error so that nothing thinks
3005                          * that the symlink is empty and hence "."
3006                          */
3007                         return (EINVAL);
3008                 }
3009         }
3010 
3011         /* copy the link data to user space */
3012         return (uiomove(bp, strlen(bp), UIO_READ, uiop));
3013 }
3014 
3015 /*
3016  * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
3017  * Vnode is no longer referenced, deallocate the file
3018  * and all its resources.
3019  */
3020 /* ARGSUSED */
3021 static void
3022 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3023 {
3024         lxpr_freenode(VTOLXP(vp));
3025 }
3026 
3027 /*
3028  * lxpr_sync(): Vnode operation for VOP_SYNC()
3029  */
3030 static int
3031 lxpr_sync()
3032 {
3033         /*
3034          * Nothing to sync but this function must never fail
3035          */
3036         return (0);
3037 }
3038 
3039 /*
3040  * lxpr_cmp(): Vnode operation for VOP_CMP()
3041  */
3042 static int
3043 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
3044 {
3045         vnode_t *rvp;
3046 
3047         while (vn_matchops(vp1, lxpr_vnodeops) &&
3048             (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
3049                 vp1 = rvp;
3050         }
3051 
3052         while (vn_matchops(vp2, lxpr_vnodeops) &&
3053             (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
3054                 vp2 = rvp;
3055         }
3056 
3057         if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
3058                 return (vp1 == vp2);
3059 
3060         return (VOP_CMP(vp1, vp2, ct));
3061 }
3062 
3063 /*
3064  * lxpr_realvp(): Vnode operation for VOP_REALVP()
3065  */
3066 static int
3067 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
3068 {
3069         vnode_t *rvp;
3070 
3071         if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
3072                 vp = rvp;
3073                 if (VOP_REALVP(vp, &rvp, ct) == 0)
3074                         vp = rvp;
3075         }
3076 
3077         *vpp = vp;
3078         return (0);
3079 }