1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * lxpr_vnops.c: Vnode operations for the lx /proc file system
28 *
29 * Assumptions and Gotchas:
30 *
31 * In order to preserve Solaris' security policy. This file system's
32 * functionality does not override Solaris' security policies even if
33 * that means breaking Linux compatibility.
34 *
35 * Linux has no concept of lwps so we only implement procs here as in the
36 * old /proc interface.
37 */
38
39 #include <sys/cpupart.h>
40 #include <sys/cpuvar.h>
41 #include <sys/session.h>
42 #include <sys/vmparam.h>
43 #include <sys/mman.h>
44 #include <vm/rm.h>
45 #include <vm/seg_vn.h>
46 #include <sys/sdt.h>
47 #include <lx_signum.h>
48 #include <sys/strlog.h>
49 #include <sys/stropts.h>
50 #include <sys/cmn_err.h>
51 #include <sys/lx_brand.h>
52 #include <sys/x86_archext.h>
53 #include <sys/archsystm.h>
54 #include <sys/fp.h>
55 #include <sys/pool_pset.h>
56 #include <sys/pset.h>
57 #include <sys/zone.h>
58 #include <sys/pghw.h>
59 #include <sys/vfs_opreg.h>
60
61 /* Dependent on the Solaris procfs */
62 extern kthread_t *prchoose(proc_t *);
63
64 #include "lx_proc.h"
65
66 extern pgcnt_t swapfs_minfree;
67 extern time_t boot_time;
68
69 /*
70 * Pointer to the vnode ops vector for this fs.
71 * This is instantiated in lxprinit() in lxpr_vfsops.c
72 */
73 vnodeops_t *lxpr_vnodeops;
74
75 static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
76 static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
77 caller_context_t *);
78 static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
79 static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
80 caller_context_t *);
81 static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
82 static int lxpr_lookup(vnode_t *, char *, vnode_t **,
83 pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
84 pathname_t *);
85 static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
86 caller_context_t *, int);
87 static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
88 static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
89 static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
90 static int lxpr_sync(void);
91 static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
92
93 static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
94 static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
95 static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
96 static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
97 static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
98
99 static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
100 static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
101 static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
102 static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
103 static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
104
105 static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
106 static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
107 static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
108 static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
109 static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
110 static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
111 static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
112 static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
113 static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
114 static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
115 static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
116 static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
117 static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
118
119 static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
120 static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
121 static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
122 static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
123 static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
124
125 static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
126 static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
127 static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
128 static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
129 static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
130 static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
131 static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
132 static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
133 static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
134 static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
135 static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
136 static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
137 static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
138 static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
139 static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
140 static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
141 static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
142 static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
143
144 /*
145 * Simple conversion
146 */
147 #define btok(x) ((x) >> 10) /* bytes to kbytes */
148 #define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
149
150 /*
151 * The lx /proc vnode operations vector
152 */
153 const fs_operation_def_t lxpr_vnodeops_template[] = {
154 VOPNAME_OPEN, { .vop_open = lxpr_open },
155 VOPNAME_CLOSE, { .vop_close = lxpr_close },
156 VOPNAME_READ, { .vop_read = lxpr_read },
157 VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
158 VOPNAME_ACCESS, { .vop_access = lxpr_access },
159 VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
160 VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
161 VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
162 VOPNAME_FSYNC, { .error = lxpr_sync },
163 VOPNAME_SEEK, { .error = lxpr_sync },
164 VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
165 VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
166 VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
167 NULL, NULL
168 };
169
170
171 /*
172 * file contents of an lx /proc directory.
173 */
174 static lxpr_dirent_t lx_procdir[] = {
175 { LXPR_CMDLINE, "cmdline" },
176 { LXPR_CPUINFO, "cpuinfo" },
177 { LXPR_DEVICES, "devices" },
178 { LXPR_DMA, "dma" },
179 { LXPR_FILESYSTEMS, "filesystems" },
180 { LXPR_INTERRUPTS, "interrupts" },
181 { LXPR_IOPORTS, "ioports" },
182 { LXPR_KCORE, "kcore" },
183 { LXPR_KMSG, "kmsg" },
184 { LXPR_LOADAVG, "loadavg" },
185 { LXPR_MEMINFO, "meminfo" },
186 { LXPR_MOUNTS, "mounts" },
187 { LXPR_NETDIR, "net" },
188 { LXPR_PARTITIONS, "partitions" },
189 { LXPR_SELF, "self" },
190 { LXPR_STAT, "stat" },
191 { LXPR_UPTIME, "uptime" },
192 { LXPR_VERSION, "version" }
193 };
194
195 #define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
196
197 /*
198 * Contents of an lx /proc/<pid> directory.
199 */
200 static lxpr_dirent_t piddir[] = {
201 { LXPR_PID_CMDLINE, "cmdline" },
202 { LXPR_PID_CPU, "cpu" },
203 { LXPR_PID_CURDIR, "cwd" },
204 { LXPR_PID_ENV, "environ" },
205 { LXPR_PID_EXE, "exe" },
206 { LXPR_PID_MAPS, "maps" },
207 { LXPR_PID_MEM, "mem" },
208 { LXPR_PID_ROOTDIR, "root" },
209 { LXPR_PID_STAT, "stat" },
210 { LXPR_PID_STATM, "statm" },
211 { LXPR_PID_STATUS, "status" },
212 { LXPR_PID_FDDIR, "fd" }
213 };
214
215 #define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
216
217 /*
218 * contents of lx /proc/net directory
219 */
220 static lxpr_dirent_t netdir[] = {
221 { LXPR_NET_ARP, "arp" },
222 { LXPR_NET_DEV, "dev" },
223 { LXPR_NET_DEV_MCAST, "dev_mcast" },
224 { LXPR_NET_IGMP, "igmp" },
225 { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
226 { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
227 { LXPR_NET_MCFILTER, "mcfilter" },
228 { LXPR_NET_NETSTAT, "netstat" },
229 { LXPR_NET_RAW, "raw" },
230 { LXPR_NET_ROUTE, "route" },
231 { LXPR_NET_RPC, "rpc" },
232 { LXPR_NET_RT_CACHE, "rt_cache" },
233 { LXPR_NET_SOCKSTAT, "sockstat" },
234 { LXPR_NET_SNMP, "snmp" },
235 { LXPR_NET_STAT, "stat" },
236 { LXPR_NET_TCP, "tcp" },
237 { LXPR_NET_UDP, "udp" },
238 { LXPR_NET_UNIX, "unix" }
239 };
240
241 #define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
242
243 /*
244 * lxpr_open(): Vnode operation for VOP_OPEN()
245 */
246 static int
247 lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
248 {
249 vnode_t *vp = *vpp;
250 lxpr_node_t *lxpnp = VTOLXP(vp);
251 lxpr_nodetype_t type = lxpnp->lxpr_type;
252 vnode_t *rvp;
253 int error = 0;
254
255 /*
256 * We only allow reading in this file systrem
257 */
258 if (flag & FWRITE)
259 return (EROFS);
260
261 /*
262 * If we are opening an underlying file only allow regular files
263 * reject the open for anything but a regular file.
264 * Just do it if we are opening the current or root directory.
265 */
266 if (lxpnp->lxpr_realvp != NULL) {
267 rvp = lxpnp->lxpr_realvp;
268
269 if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
270 error = EACCES;
271 else {
272 /*
273 * Need to hold rvp since VOP_OPEN() may release it.
274 */
275 VN_HOLD(rvp);
276 error = VOP_OPEN(&rvp, flag, cr, ct);
277 if (error) {
278 VN_RELE(rvp);
279 } else {
280 *vpp = rvp;
281 VN_RELE(vp);
282 }
283 }
284 }
285
286 if (type == LXPR_KMSG) {
287 ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
288 struct strioctl str;
289 int rv;
290
291 /*
292 * Open the zone's console device using the layered driver
293 * interface.
294 */
295 if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
296 &lxpnp->lxpr_cons_ldih, li)) != 0)
297 return (error);
298
299 /*
300 * Send an ioctl to the underlying console device, letting it
301 * know we're interested in getting console messages.
302 */
303 str.ic_cmd = I_CONSLOG;
304 str.ic_timout = 0;
305 str.ic_len = 0;
306 str.ic_dp = NULL;
307 if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
308 (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
309 return (error);
310 }
311
312 return (error);
313 }
314
315
316 /*
317 * lxpr_close(): Vnode operation for VOP_CLOSE()
318 */
319 /* ARGSUSED */
320 static int
321 lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
322 caller_context_t *ct)
323 {
324 lxpr_node_t *lxpr = VTOLXP(vp);
325 lxpr_nodetype_t type = lxpr->lxpr_type;
326 int err;
327
328 /*
329 * we should never get here because the close is done on the realvp
330 * for these nodes
331 */
332 ASSERT(type != LXPR_PID_FD_FD &&
333 type != LXPR_PID_CURDIR &&
334 type != LXPR_PID_ROOTDIR &&
335 type != LXPR_PID_EXE);
336
337 if (type == LXPR_KMSG) {
338 if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
339 return (err);
340 }
341
342 return (0);
343 }
344
345 static void (*lxpr_read_function[LXPR_NFILES])() = {
346 lxpr_read_isdir, /* /proc */
347 lxpr_read_isdir, /* /proc/<pid> */
348 lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
349 lxpr_read_empty, /* /proc/<pid>/cpu */
350 lxpr_read_invalid, /* /proc/<pid>/cwd */
351 lxpr_read_empty, /* /proc/<pid>/environ */
352 lxpr_read_invalid, /* /proc/<pid>/exe */
353 lxpr_read_pid_maps, /* /proc/<pid>/maps */
354 lxpr_read_empty, /* /proc/<pid>/mem */
355 lxpr_read_invalid, /* /proc/<pid>/root */
356 lxpr_read_pid_stat, /* /proc/<pid>/stat */
357 lxpr_read_pid_statm, /* /proc/<pid>/statm */
358 lxpr_read_pid_status, /* /proc/<pid>/status */
359 lxpr_read_isdir, /* /proc/<pid>/fd */
360 lxpr_read_fd, /* /proc/<pid>/fd/nn */
361 lxpr_read_empty, /* /proc/cmdline */
362 lxpr_read_cpuinfo, /* /proc/cpuinfo */
363 lxpr_read_empty, /* /proc/devices */
364 lxpr_read_empty, /* /proc/dma */
365 lxpr_read_empty, /* /proc/filesystems */
366 lxpr_read_empty, /* /proc/interrupts */
367 lxpr_read_empty, /* /proc/ioports */
368 lxpr_read_empty, /* /proc/kcore */
369 lxpr_read_kmsg, /* /proc/kmsg */
370 lxpr_read_loadavg, /* /proc/loadavg */
371 lxpr_read_meminfo, /* /proc/meminfo */
372 lxpr_read_mounts, /* /proc/mounts */
373 lxpr_read_isdir, /* /proc/net */
374 lxpr_read_net_arp, /* /proc/net/arp */
375 lxpr_read_net_dev, /* /proc/net/dev */
376 lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
377 lxpr_read_net_igmp, /* /proc/net/igmp */
378 lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
379 lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
380 lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
381 lxpr_read_net_netstat, /* /proc/net/netstat */
382 lxpr_read_net_raw, /* /proc/net/raw */
383 lxpr_read_net_route, /* /proc/net/route */
384 lxpr_read_net_rpc, /* /proc/net/rpc */
385 lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
386 lxpr_read_net_sockstat, /* /proc/net/sockstat */
387 lxpr_read_net_snmp, /* /proc/net/snmp */
388 lxpr_read_net_stat, /* /proc/net/stat */
389 lxpr_read_net_tcp, /* /proc/net/tcp */
390 lxpr_read_net_udp, /* /proc/net/udp */
391 lxpr_read_net_unix, /* /proc/net/unix */
392 lxpr_read_partitions, /* /proc/partitions */
393 lxpr_read_invalid, /* /proc/self */
394 lxpr_read_stat, /* /proc/stat */
395 lxpr_read_uptime, /* /proc/uptime */
396 lxpr_read_version, /* /proc/version */
397 };
398
399 /*
400 * Array of lookup functions, indexed by lx /proc file type.
401 */
402 static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
403 lxpr_lookup_procdir, /* /proc */
404 lxpr_lookup_piddir, /* /proc/<pid> */
405 lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
406 lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
407 lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
408 lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
409 lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
410 lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
411 lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
412 lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
413 lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
414 lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
415 lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
416 lxpr_lookup_fddir, /* /proc/<pid>/fd */
417 lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
418 lxpr_lookup_not_a_dir, /* /proc/cmdline */
419 lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
420 lxpr_lookup_not_a_dir, /* /proc/devices */
421 lxpr_lookup_not_a_dir, /* /proc/dma */
422 lxpr_lookup_not_a_dir, /* /proc/filesystems */
423 lxpr_lookup_not_a_dir, /* /proc/interrupts */
424 lxpr_lookup_not_a_dir, /* /proc/ioports */
425 lxpr_lookup_not_a_dir, /* /proc/kcore */
426 lxpr_lookup_not_a_dir, /* /proc/kmsg */
427 lxpr_lookup_not_a_dir, /* /proc/loadavg */
428 lxpr_lookup_not_a_dir, /* /proc/meminfo */
429 lxpr_lookup_not_a_dir, /* /proc/mounts */
430 lxpr_lookup_netdir, /* /proc/net */
431 lxpr_lookup_not_a_dir, /* /proc/net/arp */
432 lxpr_lookup_not_a_dir, /* /proc/net/dev */
433 lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
434 lxpr_lookup_not_a_dir, /* /proc/net/igmp */
435 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
436 lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
437 lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
438 lxpr_lookup_not_a_dir, /* /proc/net/netstat */
439 lxpr_lookup_not_a_dir, /* /proc/net/raw */
440 lxpr_lookup_not_a_dir, /* /proc/net/route */
441 lxpr_lookup_not_a_dir, /* /proc/net/rpc */
442 lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
443 lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
444 lxpr_lookup_not_a_dir, /* /proc/net/snmp */
445 lxpr_lookup_not_a_dir, /* /proc/net/stat */
446 lxpr_lookup_not_a_dir, /* /proc/net/tcp */
447 lxpr_lookup_not_a_dir, /* /proc/net/udp */
448 lxpr_lookup_not_a_dir, /* /proc/net/unix */
449 lxpr_lookup_not_a_dir, /* /proc/partitions */
450 lxpr_lookup_not_a_dir, /* /proc/self */
451 lxpr_lookup_not_a_dir, /* /proc/stat */
452 lxpr_lookup_not_a_dir, /* /proc/uptime */
453 lxpr_lookup_not_a_dir, /* /proc/version */
454 };
455
456 /*
457 * Array of readdir functions, indexed by /proc file type.
458 */
459 static int (*lxpr_readdir_function[LXPR_NFILES])() = {
460 lxpr_readdir_procdir, /* /proc */
461 lxpr_readdir_piddir, /* /proc/<pid> */
462 lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
463 lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
464 lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
465 lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
466 lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
467 lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
468 lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
469 lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
470 lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
471 lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
472 lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
473 lxpr_readdir_fddir, /* /proc/<pid>/fd */
474 lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
475 lxpr_readdir_not_a_dir, /* /proc/cmdline */
476 lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
477 lxpr_readdir_not_a_dir, /* /proc/devices */
478 lxpr_readdir_not_a_dir, /* /proc/dma */
479 lxpr_readdir_not_a_dir, /* /proc/filesystems */
480 lxpr_readdir_not_a_dir, /* /proc/interrupts */
481 lxpr_readdir_not_a_dir, /* /proc/ioports */
482 lxpr_readdir_not_a_dir, /* /proc/kcore */
483 lxpr_readdir_not_a_dir, /* /proc/kmsg */
484 lxpr_readdir_not_a_dir, /* /proc/loadavg */
485 lxpr_readdir_not_a_dir, /* /proc/meminfo */
486 lxpr_readdir_not_a_dir, /* /proc/mounts */
487 lxpr_readdir_netdir, /* /proc/net */
488 lxpr_readdir_not_a_dir, /* /proc/net/arp */
489 lxpr_readdir_not_a_dir, /* /proc/net/dev */
490 lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
491 lxpr_readdir_not_a_dir, /* /proc/net/igmp */
492 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
493 lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
494 lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
495 lxpr_readdir_not_a_dir, /* /proc/net/netstat */
496 lxpr_readdir_not_a_dir, /* /proc/net/raw */
497 lxpr_readdir_not_a_dir, /* /proc/net/route */
498 lxpr_readdir_not_a_dir, /* /proc/net/rpc */
499 lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
500 lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
501 lxpr_readdir_not_a_dir, /* /proc/net/snmp */
502 lxpr_readdir_not_a_dir, /* /proc/net/stat */
503 lxpr_readdir_not_a_dir, /* /proc/net/tcp */
504 lxpr_readdir_not_a_dir, /* /proc/net/udp */
505 lxpr_readdir_not_a_dir, /* /proc/net/unix */
506 lxpr_readdir_not_a_dir, /* /proc/partitions */
507 lxpr_readdir_not_a_dir, /* /proc/self */
508 lxpr_readdir_not_a_dir, /* /proc/stat */
509 lxpr_readdir_not_a_dir, /* /proc/uptime */
510 lxpr_readdir_not_a_dir, /* /proc/version */
511 };
512
513
514 /*
515 * lxpr_read(): Vnode operation for VOP_READ()
516 *
517 * As the format of all the files that can be read in the lx procfs is human
518 * readable and not binary structures there do not have to be different
519 * read variants depending on whether the reading process model is 32 or 64 bits
520 * (at least in general, and certainly the difference is unlikely to be enough
521 * to justify have different routines for 32 and 64 bit reads
522 */
523 /* ARGSUSED */
524 static int
525 lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
526 caller_context_t *ct)
527 {
528 lxpr_node_t *lxpnp = VTOLXP(vp);
529 lxpr_nodetype_t type = lxpnp->lxpr_type;
530 lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
531 int error;
532
533 ASSERT(type < LXPR_NFILES);
534
535 lxpr_read_function[type](lxpnp, uiobuf);
536
537 error = lxpr_uiobuf_flush(uiobuf);
538 lxpr_uiobuf_free(uiobuf);
539
540 return (error);
541 }
542
543
544 /*
545 * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
546 *
547 * Various special case reads:
548 * - trying to read a directory
549 * - invalid file (used to mean a file that should be implemented,
550 * but isn't yet)
551 * - empty file
552 * - wait to be able to read a file that will never have anything to read
553 */
554 /* ARGSUSED */
555 static void
556 lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
557 {
558 lxpr_uiobuf_seterr(uiobuf, EISDIR);
559 }
560
561 /* ARGSUSED */
562 static void
563 lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
564 {
565 lxpr_uiobuf_seterr(uiobuf, EINVAL);
566 }
567
568 /* ARGSUSED */
569 static void
570 lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
571 {
572 }
573
574 /*
575 * lxpr_read_pid_cmdline():
576 *
577 * This is not precisely compatible with linux:
578 *
579 * The linux cmdline returns argv with the correct separation
580 * using \0 between the arguments, we cannot do that without
581 * copying the real argv from the correct process context.
582 * This is too difficult to attempt so we pretend that the
583 * entire cmdline is just argv[0]. This is good enough for
584 * ps to display correctly, but might cause some other things
585 * not to work correctly.
586 */
587 static void
588 lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
589 {
590 proc_t *p;
591
592 ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
593
594 p = lxpr_lock(lxpnp->lxpr_pid);
595 if (p == NULL) {
596 lxpr_uiobuf_seterr(uiobuf, EINVAL);
597 return;
598 }
599
600 if (PTOU(p)->u_argv != 0) {
601 char *buff = PTOU(p)->u_psargs;
602 int len = strlen(buff);
603 lxpr_unlock(p);
604 lxpr_uiobuf_write(uiobuf, buff, len+1);
605 } else {
606 lxpr_unlock(p);
607 }
608 }
609
610
611 /*
612 * lxpr_read_pid_maps(): memory map file
613 */
614 static void
615 lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
616 {
617 proc_t *p;
618 struct as *as;
619 struct seg *seg;
620 char *buf;
621 int buflen = MAXPATHLEN;
622 struct print_data {
623 caddr_t saddr;
624 caddr_t eaddr;
625 int type;
626 char prot[5];
627 uint32_t offset;
628 vnode_t *vp;
629 struct print_data *next;
630 } *print_head = NULL;
631 struct print_data **print_tail = &print_head;
632 struct print_data *pbuf;
633
634 ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
635
636 p = lxpr_lock(lxpnp->lxpr_pid);
637 if (p == NULL) {
638 lxpr_uiobuf_seterr(uiobuf, EINVAL);
639 return;
640 }
641
642 as = p->p_as;
643
644 if (as == &kas) {
645 lxpr_unlock(p);
646 return;
647 }
648
649 mutex_exit(&p->p_lock);
650
651 /* Iterate over all segments in the address space */
652 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
653 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
654 vnode_t *vp;
655 uint_t protbits;
656
657 pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
658
659 pbuf->saddr = seg->s_base;
660 pbuf->eaddr = seg->s_base+seg->s_size;
661 pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
662
663 /*
664 * Cheat and only use the protection bits of the first page
665 * in the segment
666 */
667 (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
668 (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
669
670 if (protbits & PROT_READ) pbuf->prot[0] = 'r';
671 if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
672 if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
673 if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
674 else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
675
676 if (seg->s_ops == &segvn_ops &&
677 SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
678 vp != NULL && vp->v_type == VREG) {
679 VN_HOLD(vp);
680 pbuf->vp = vp;
681 } else {
682 pbuf->vp = NULL;
683 }
684
685 pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
686
687 pbuf->next = NULL;
688 *print_tail = pbuf;
689 print_tail = &pbuf->next;
690 }
691 AS_LOCK_EXIT(as, &as->a_lock);
692 mutex_enter(&p->p_lock);
693 lxpr_unlock(p);
694
695 buf = kmem_alloc(buflen, KM_SLEEP);
696
697 /* print the data we've extracted */
698 pbuf = print_head;
699 while (pbuf != NULL) {
700 struct print_data *pbuf_next;
701 vattr_t vattr;
702
703 int maj = 0;
704 int min = 0;
705 int inode = 0;
706
707 *buf = '\0';
708 if (pbuf->vp != NULL) {
709 vattr.va_mask = AT_FSID | AT_NODEID;
710 if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
711 NULL) == 0) {
712 maj = getmajor(vattr.va_fsid);
713 min = getminor(vattr.va_fsid);
714 inode = vattr.va_nodeid;
715 }
716 (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
717 VN_RELE(pbuf->vp);
718 }
719
720 if (*buf != '\0') {
721 lxpr_uiobuf_printf(uiobuf,
722 "%08x-%08x %s %08x %02d:%03d %d %s\n",
723 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
724 maj, min, inode, buf);
725 } else {
726 lxpr_uiobuf_printf(uiobuf,
727 "%08x-%08x %s %08x %02d:%03d %d\n",
728 pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
729 maj, min, inode);
730 }
731
732 pbuf_next = pbuf->next;
733 kmem_free(pbuf, sizeof (*pbuf));
734 pbuf = pbuf_next;
735 }
736
737 kmem_free(buf, buflen);
738 }
739
740 /*
741 * lxpr_read_pid_statm(): memory status file
742 */
743 static void
744 lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
745 {
746 proc_t *p;
747 struct as *as;
748 size_t vsize;
749 size_t rss;
750
751 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
752
753 p = lxpr_lock(lxpnp->lxpr_pid);
754 if (p == NULL) {
755 lxpr_uiobuf_seterr(uiobuf, EINVAL);
756 return;
757 }
758
759 as = p->p_as;
760
761 mutex_exit(&p->p_lock);
762
763 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
764 vsize = btopr(as->a_resvsize);
765 rss = rm_asrss(as);
766 AS_LOCK_EXIT(as, &as->a_lock);
767
768 mutex_enter(&p->p_lock);
769 lxpr_unlock(p);
770
771 lxpr_uiobuf_printf(uiobuf,
772 "%lu %lu %lu %lu %lu %lu %lu\n",
773 vsize, rss, 0l, rss, 0l, 0l, 0l);
774 }
775
776 /*
777 * lxpr_read_pid_status(): status file
778 */
779 static void
780 lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
781 {
782 proc_t *p;
783 kthread_t *t;
784 user_t *up;
785 cred_t *cr;
786 const gid_t *groups;
787 int ngroups;
788 struct as *as;
789 char *status;
790 pid_t pid, ppid;
791 size_t vsize;
792 size_t rss;
793 k_sigset_t current, ignore, handle;
794 int i, lx_sig;
795
796 ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
797
798 p = lxpr_lock(lxpnp->lxpr_pid);
799 if (p == NULL) {
800 lxpr_uiobuf_seterr(uiobuf, EINVAL);
801 return;
802 }
803
804 pid = p->p_pid;
805
806 /*
807 * Convert pid to the Linux default of 1 if we're the zone's init
808 * process
809 */
810 if (pid == curproc->p_zone->zone_proc_initpid) {
811 pid = 1;
812 ppid = 0; /* parent pid for init is 0 */
813 } else {
814 /*
815 * Make sure not to reference parent PIDs that reside outside
816 * the zone
817 */
818 ppid = ((p->p_flag & SZONETOP)
819 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
820
821 /*
822 * Convert ppid to the Linux default of 1 if our parent is the
823 * zone's init process
824 */
825 if (ppid == curproc->p_zone->zone_proc_initpid)
826 ppid = 1;
827 }
828
829 t = prchoose(p);
830 if (t != NULL) {
831 switch (t->t_state) {
832 case TS_SLEEP:
833 status = "S (sleeping)";
834 break;
835 case TS_RUN:
836 case TS_ONPROC:
837 status = "R (running)";
838 break;
839 case TS_ZOMB:
840 status = "Z (zombie)";
841 break;
842 case TS_STOPPED:
843 status = "T (stopped)";
844 break;
845 default:
846 status = "! (unknown)";
847 break;
848 }
849 thread_unlock(t);
850 } else {
851 /*
852 * there is a hole in the exit code, where a proc can have
853 * no threads but it is yet to be flagged SZOMB. We will
854 * assume we are about to become a zombie
855 */
856 status = "Z (zombie)";
857 }
858
859 up = PTOU(p);
860 mutex_enter(&p->p_crlock);
861 crhold(cr = p->p_cred);
862 mutex_exit(&p->p_crlock);
863
864 lxpr_uiobuf_printf(uiobuf,
865 "Name:\t%s\n"
866 "State:\t%s\n"
867 "Tgid:\t%d\n"
868 "Pid:\t%d\n"
869 "PPid:\t%d\n"
870 "TracerPid:\t%d\n"
871 "Uid:\t%u\t%u\t%u\t%u\n"
872 "Gid:\t%u\t%u\t%u\t%u\n"
873 "FDSize:\t%d\n"
874 "Groups:\t",
875 up->u_comm,
876 status,
877 pid, /* thread group id - same as pid until we map lwps to procs */
878 pid,
879 ppid,
880 0,
881 crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
882 crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
883 p->p_fno_ctl);
884
885 ngroups = crgetngroups(cr);
886 groups = crgetgroups(cr);
887 for (i = 0; i < ngroups; i++) {
888 lxpr_uiobuf_printf(uiobuf,
889 "%u ",
890 groups[i]);
891 }
892 crfree(cr);
893
894 as = p->p_as;
895 if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
896 mutex_exit(&p->p_lock);
897 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
898 vsize = as->a_resvsize;
899 rss = rm_asrss(as);
900 AS_LOCK_EXIT(as, &as->a_lock);
901 mutex_enter(&p->p_lock);
902
903 lxpr_uiobuf_printf(uiobuf,
904 "\n"
905 "VmSize:\t%8lu kB\n"
906 "VmLck:\t%8lu kB\n"
907 "VmRSS:\t%8lu kB\n"
908 "VmData:\t%8lu kB\n"
909 "VmStk:\t%8lu kB\n"
910 "VmExe:\t%8lu kB\n"
911 "VmLib:\t%8lu kB",
912 btok(vsize),
913 0l,
914 ptok(rss),
915 0l,
916 btok(p->p_stksize),
917 ptok(rss),
918 0l);
919 }
920
921 sigemptyset(¤t);
922 sigemptyset(&ignore);
923 sigemptyset(&handle);
924
925 for (i = 1; i < NSIG; i++) {
926 lx_sig = stol_signo[i];
927
928 if ((lx_sig > 0) && (lx_sig < LX_NSIG)) {
929 if (sigismember(&p->p_sig, i))
930 sigaddset(¤t, lx_sig);
931
932 if (up->u_signal[i - 1] == SIG_IGN)
933 sigaddset(&ignore, lx_sig);
934 else if (up->u_signal[i - 1] != SIG_DFL)
935 sigaddset(&handle, lx_sig);
936 }
937 }
938
939 lxpr_uiobuf_printf(uiobuf,
940 "\n"
941 "SigPnd:\t%08x%08x\n"
942 "SigBlk:\t%08x%08x\n"
943 "SigIgn:\t%08x%08x\n"
944 "SigCgt:\t%08x%08x\n"
945 "CapInh:\t%016x\n"
946 "CapPrm:\t%016x\n"
947 "CapEff:\t%016x\n",
948 current.__sigbits[1], current.__sigbits[0],
949 0, 0, /* signals blocked on per thread basis */
950 ignore.__sigbits[1], ignore.__sigbits[0],
951 handle.__sigbits[1], handle.__sigbits[0],
952 /* Can't do anything with linux capabilities */
953 0,
954 0,
955 0);
956
957 lxpr_unlock(p);
958 }
959
960
961 /*
962 * lxpr_read_pid_stat(): pid stat file
963 */
964 static void
965 lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
966 {
967 proc_t *p;
968 kthread_t *t;
969 struct as *as;
970 char stat;
971 pid_t pid, ppid, pgpid, spid;
972 gid_t psgid;
973 dev_t psdev;
974 size_t rss, vsize;
975 int nice, pri;
976 caddr_t wchan;
977 processorid_t cpu;
978
979 ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
980
981 p = lxpr_lock(lxpnp->lxpr_pid);
982 if (p == NULL) {
983 lxpr_uiobuf_seterr(uiobuf, EINVAL);
984 return;
985 }
986
987 pid = p->p_pid;
988
989 /*
990 * Set Linux defaults if we're the zone's init process
991 */
992 if (pid == curproc->p_zone->zone_proc_initpid) {
993 pid = 1; /* PID for init */
994 ppid = 0; /* parent PID for init is 0 */
995 pgpid = 0; /* process group for init is 0 */
996 psgid = (gid_t)-1; /* credential GID for init is -1 */
997 spid = 0; /* session id for init is 0 */
998 psdev = 0; /* session device for init is 0 */
999 } else {
1000 /*
1001 * Make sure not to reference parent PIDs that reside outside
1002 * the zone
1003 */
1004 ppid = ((p->p_flag & SZONETOP)
1005 ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
1006
1007 /*
1008 * Convert ppid to the Linux default of 1 if our parent is the
1009 * zone's init process
1010 */
1011 if (ppid == curproc->p_zone->zone_proc_initpid)
1012 ppid = 1;
1013
1014 pgpid = p->p_pgrp;
1015
1016 mutex_enter(&p->p_splock);
1017 mutex_enter(&p->p_sessp->s_lock);
1018 spid = p->p_sessp->s_sid;
1019 /* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
1020 psdev = p->p_sessp->s_dev;
1021 if (p->p_sessp->s_cred)
1022 psgid = crgetgid(p->p_sessp->s_cred);
1023 else
1024 psgid = crgetgid(p->p_cred);
1025
1026 mutex_exit(&p->p_sessp->s_lock);
1027 mutex_exit(&p->p_splock);
1028 }
1029
1030 t = prchoose(p);
1031 if (t != NULL) {
1032 switch (t->t_state) {
1033 case TS_SLEEP:
1034 stat = 'S'; break;
1035 case TS_RUN:
1036 case TS_ONPROC:
1037 stat = 'R'; break;
1038 case TS_ZOMB:
1039 stat = 'Z'; break;
1040 case TS_STOPPED:
1041 stat = 'T'; break;
1042 default:
1043 stat = '!'; break;
1044 }
1045
1046 if (CL_DONICE(t, NULL, 0, &nice) != 0)
1047 nice = 0;
1048
1049 pri = v.v_maxsyspri - t->t_pri;
1050 wchan = t->t_wchan;
1051 cpu = t->t_cpu->cpu_seqid;
1052 thread_unlock(t);
1053 } else {
1054 /* Only zombies have no threads */
1055 stat = 'Z';
1056 nice = 0;
1057 pri = 0;
1058 wchan = 0;
1059 cpu = 0;
1060 }
1061 as = p->p_as;
1062 mutex_exit(&p->p_lock);
1063 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1064 vsize = as->a_resvsize;
1065 rss = rm_asrss(as);
1066 AS_LOCK_EXIT(as, &as->a_lock);
1067 mutex_enter(&p->p_lock);
1068
1069 lxpr_uiobuf_printf(uiobuf,
1070 "%d (%s) %c %d %d %d %d %d "
1071 "%lu %lu %lu %lu %lu "
1072 "%lu %lu %ld %ld "
1073 "%d %d "
1074 "0 "
1075 "%ld %lu "
1076 "%lu %ld %llu "
1077 "%lu %lu %u "
1078 "%lu %lu "
1079 "%lu %lu %lu %lu "
1080 "%lu "
1081 "%lu %lu "
1082 "%d "
1083 "%d"
1084 "\n",
1085 pid,
1086 PTOU(p)->u_comm,
1087 stat,
1088 ppid, pgpid,
1089 spid, psdev, psgid,
1090 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
1091 p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
1092 pri, nice,
1093 0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
1094 vsize, rss, p->p_vmem_ctl,
1095 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
1096 0l, 0l, /* kstkesp, kstkeip */
1097 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
1098 wchan,
1099 0l, 0l, /* nswap, cnswap */
1100 0, /* exit_signal */
1101 cpu);
1102
1103 lxpr_unlock(p);
1104 }
1105
1106 /* ARGSUSED */
1107 static void
1108 lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1109 {
1110 }
1111
1112 /* ARGSUSED */
1113 static void
1114 lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1115 {
1116 lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
1117 " | Transmit\n");
1118 lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
1119 " frame compressed multicast|bytes packets errs drop fifo"
1120 " colls carrier compressed\n");
1121
1122 /*
1123 * XXX: data about each interface should go here, but we'll wait to
1124 * see if anybody wants to use it.
1125 */
1126 }
1127
1128 /* ARGSUSED */
1129 static void
1130 lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1131 {
1132 }
1133
1134 /* ARGSUSED */
1135 static void
1136 lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1137 {
1138 }
1139
1140 /* ARGSUSED */
1141 static void
1142 lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1143 {
1144 }
1145
1146 /* ARGSUSED */
1147 static void
1148 lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1149 {
1150 }
1151
1152 /* ARGSUSED */
1153 static void
1154 lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1155 {
1156 }
1157
1158 /* ARGSUSED */
1159 static void
1160 lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1161 {
1162 }
1163
1164 /* ARGSUSED */
1165 static void
1166 lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1167 {
1168 }
1169
1170 /* ARGSUSED */
1171 static void
1172 lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1173 {
1174 }
1175
1176 /* ARGSUSED */
1177 static void
1178 lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1179 {
1180 }
1181
1182 /* ARGSUSED */
1183 static void
1184 lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1185 {
1186 }
1187
1188 /* ARGSUSED */
1189 static void
1190 lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1191 {
1192 }
1193
1194 /* ARGSUSED */
1195 static void
1196 lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1197 {
1198 }
1199
1200 /* ARGSUSED */
1201 static void
1202 lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1203 {
1204 }
1205
1206 /* ARGSUSED */
1207 static void
1208 lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1209 {
1210 }
1211
1212 /* ARGSUSED */
1213 static void
1214 lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1215 {
1216 }
1217
1218 /* ARGSUSED */
1219 static void
1220 lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1221 {
1222 }
1223
1224 /*
1225 * lxpr_read_kmsg(): read the contents of the kernel message queue. We
1226 * translate this into the reception of console messages for this lx zone; each
1227 * read copies out a single zone console message, or blocks until the next one
1228 * is produced.
1229 */
1230
1231 #define LX_KMSG_PRI "<0>"
1232
1233 static void
1234 lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
1235 {
1236 ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
1237 mblk_t *mp;
1238
1239 if (ldi_getmsg(lh, &mp, NULL) == 0) {
1240 /*
1241 * lx procfs doesn't like successive reads to the same file
1242 * descriptor unless we do an explicit rewind each time.
1243 */
1244 lxpr_uiobuf_seek(uiobuf, 0);
1245
1246 lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
1247 mp->b_cont->b_rptr);
1248
1249 freemsg(mp);
1250 }
1251 }
1252
1253 /*
1254 * lxpr_read_loadavg(): read the contents of the "loadavg" file.
1255 *
1256 * Just enough for uptime to work
1257 */
1258 extern int nthread;
1259
1260 static void
1261 lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1262 {
1263 ulong_t avenrun1;
1264 ulong_t avenrun5;
1265 ulong_t avenrun15;
1266 ulong_t avenrun1_cs;
1267 ulong_t avenrun5_cs;
1268 ulong_t avenrun15_cs;
1269 int loadavg[3];
1270 int *loadbuf;
1271 cpupart_t *cp;
1272
1273 uint_t nrunnable = 0;
1274 rctl_qty_t nlwps;
1275
1276 ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
1277
1278 mutex_enter(&cpu_lock);
1279
1280 /*
1281 * Need to add up values over all CPU partitions. If pools are active,
1282 * only report the values of the zone's partition, which by definition
1283 * includes the current CPU.
1284 */
1285 if (pool_pset_enabled()) {
1286 psetid_t psetid = zone_pset_get(curproc->p_zone);
1287
1288 ASSERT(curproc->p_zone != &zone0);
1289 cp = CPU->cpu_part;
1290
1291 nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
1292 (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
1293 loadbuf = &loadavg[0];
1294
1295 /*
1296 * We'll report the total number of lwps in the zone for the
1297 * "nproc" parameter of /proc/loadavg; good enough for lx.
1298 */
1299 nlwps = curproc->p_zone->zone_nlwps;
1300 } else {
1301 cp = cp_list_head;
1302 do {
1303 nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
1304 } while ((cp = cp->cp_next) != cp_list_head);
1305
1306 loadbuf = &avenrun[0];
1307
1308 /*
1309 * This will report kernel threads as well as user lwps, but it
1310 * should be good enough for lx consumers.
1311 */
1312 nlwps = nthread;
1313 }
1314
1315 mutex_exit(&cpu_lock);
1316
1317 avenrun1 = loadbuf[0] >> FSHIFT;
1318 avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
1319 avenrun5 = loadbuf[1] >> FSHIFT;
1320 avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
1321 avenrun15 = loadbuf[2] >> FSHIFT;
1322 avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
1323
1324 lxpr_uiobuf_printf(uiobuf,
1325 "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
1326 avenrun1, avenrun1_cs,
1327 avenrun5, avenrun5_cs,
1328 avenrun15, avenrun15_cs,
1329 nrunnable, nlwps, 0);
1330 }
1331
1332 /*
1333 * lxpr_read_meminfo(): read the contents of the "meminfo" file.
1334 */
1335 static void
1336 lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1337 {
1338 long total_mem = physmem * PAGESIZE;
1339 long free_mem = freemem * PAGESIZE;
1340 long total_swap = k_anoninfo.ani_max * PAGESIZE;
1341 long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
1342
1343 ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
1344
1345 lxpr_uiobuf_printf(uiobuf,
1346 " total: used: free: shared: buffers: cached:\n"
1347 "Mem: %8lu %8lu %8lu %8u %8u %8u\n"
1348 "Swap: %8lu %8lu %8lu\n"
1349 "MemTotal: %8lu kB\n"
1350 "MemFree: %8lu kB\n"
1351 "MemShared: %8u kB\n"
1352 "Buffers: %8u kB\n"
1353 "Cached: %8u kB\n"
1354 "SwapCached:%8u kB\n"
1355 "Active: %8u kB\n"
1356 "Inactive: %8u kB\n"
1357 "HighTotal: %8u kB\n"
1358 "HighFree: %8u kB\n"
1359 "LowTotal: %8u kB\n"
1360 "LowFree: %8u kB\n"
1361 "SwapTotal: %8lu kB\n"
1362 "SwapFree: %8lu kB\n",
1363 total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
1364 total_swap, used_swap, total_swap - used_swap,
1365 btok(total_mem), /* MemTotal */
1366 btok(free_mem), /* MemFree */
1367 0, /* MemShared */
1368 0, /* Buffers */
1369 0, /* Cached */
1370 0, /* SwapCached */
1371 0, /* Active */
1372 0, /* Inactive */
1373 0, /* HighTotal */
1374 0, /* HighFree */
1375 btok(total_mem), /* LowTotal */
1376 btok(free_mem), /* LowFree */
1377 btok(total_swap), /* SwapTotal */
1378 btok(total_swap - used_swap)); /* SwapFree */
1379 }
1380
1381 /*
1382 * lxpr_read_mounts():
1383 */
1384 /* ARGSUSED */
1385 static void
1386 lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1387 {
1388 struct vfs *vfsp;
1389 struct vfs *vfslist;
1390 zone_t *zone = LXPTOZ(lxpnp);
1391 struct print_data {
1392 refstr_t *vfs_mntpt;
1393 refstr_t *vfs_resource;
1394 uint_t vfs_flag;
1395 int vfs_fstype;
1396 struct print_data *next;
1397 } *print_head = NULL;
1398 struct print_data **print_tail = &print_head;
1399 struct print_data *printp;
1400
1401 vfs_list_read_lock();
1402
1403 if (zone == global_zone) {
1404 vfsp = vfslist = rootvfs;
1405 } else {
1406 vfsp = vfslist = zone->zone_vfslist;
1407 /*
1408 * If the zone has a root entry, it will be the first in
1409 * the list. If it doesn't, we conjure one up.
1410 */
1411 if (vfslist == NULL ||
1412 strcmp(refstr_value(vfsp->vfs_mntpt),
1413 zone->zone_rootpath) != 0) {
1414 struct vfs *tvfsp;
1415 /*
1416 * The root of the zone is not a mount point. The vfs
1417 * we want to report is that of the zone's root vnode.
1418 */
1419 tvfsp = zone->zone_rootvp->v_vfsp;
1420
1421 lxpr_uiobuf_printf(uiobuf,
1422 "/ / %s %s 0 0\n",
1423 vfssw[tvfsp->vfs_fstype].vsw_name,
1424 tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1425
1426 }
1427 if (vfslist == NULL) {
1428 vfs_list_unlock();
1429 return;
1430 }
1431 }
1432
1433 /*
1434 * Later on we have to do a lookupname, which can end up causing
1435 * another vfs_list_read_lock() to be called. Which can lead to a
1436 * deadlock. To avoid this, we extract the data we need into a local
1437 * list, then we can run this list without holding vfs_list_read_lock()
1438 * We keep the list in the same order as the vfs_list
1439 */
1440 do {
1441 /* Skip mounts we shouldn't show */
1442 if (vfsp->vfs_flag & VFS_NOMNTTAB) {
1443 goto nextfs;
1444 }
1445
1446 printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
1447 refstr_hold(vfsp->vfs_mntpt);
1448 printp->vfs_mntpt = vfsp->vfs_mntpt;
1449 refstr_hold(vfsp->vfs_resource);
1450 printp->vfs_resource = vfsp->vfs_resource;
1451 printp->vfs_flag = vfsp->vfs_flag;
1452 printp->vfs_fstype = vfsp->vfs_fstype;
1453 printp->next = NULL;
1454
1455 *print_tail = printp;
1456 print_tail = &printp->next;
1457
1458 nextfs:
1459 vfsp = (zone == global_zone) ?
1460 vfsp->vfs_next : vfsp->vfs_zone_next;
1461
1462 } while (vfsp != vfslist);
1463
1464 vfs_list_unlock();
1465
1466 /*
1467 * now we can run through what we've extracted without holding
1468 * vfs_list_read_lock()
1469 */
1470 printp = print_head;
1471 while (printp != NULL) {
1472 struct print_data *printp_next;
1473 const char *resource;
1474 char *mntpt;
1475 struct vnode *vp;
1476 int error;
1477
1478 mntpt = (char *)refstr_value(printp->vfs_mntpt);
1479 resource = refstr_value(printp->vfs_resource);
1480
1481 if (mntpt != NULL && mntpt[0] != '\0')
1482 mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
1483 else
1484 mntpt = "-";
1485
1486 error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
1487
1488 if (error != 0)
1489 goto nextp;
1490
1491 if (!(vp->v_flag & VROOT)) {
1492 VN_RELE(vp);
1493 goto nextp;
1494 }
1495 VN_RELE(vp);
1496
1497 if (resource != NULL && resource[0] != '\0') {
1498 if (resource[0] == '/') {
1499 resource = ZONE_PATH_VISIBLE(resource, zone) ?
1500 ZONE_PATH_TRANSLATE(resource, zone) :
1501 mntpt;
1502 }
1503 } else {
1504 resource = "-";
1505 }
1506
1507 lxpr_uiobuf_printf(uiobuf,
1508 "%s %s %s %s 0 0\n",
1509 resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
1510 printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
1511
1512 nextp:
1513 printp_next = printp->next;
1514 refstr_rele(printp->vfs_mntpt);
1515 refstr_rele(printp->vfs_resource);
1516 kmem_free(printp, sizeof (*printp));
1517 printp = printp_next;
1518
1519 }
1520 }
1521
1522 /*
1523 * lxpr_read_partitions():
1524 *
1525 * We don't support partitions in a local zone because it requires access to
1526 * physical devices. But we need to fake up enough of the file to show that we
1527 * have no partitions.
1528 */
1529 /* ARGSUSED */
1530 static void
1531 lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1532 {
1533 lxpr_uiobuf_printf(uiobuf,
1534 "major minor #blocks name rio rmerge rsect ruse "
1535 "wio wmerge wsect wuse running use aveq\n\n");
1536 }
1537
1538 /*
1539 * lxpr_read_version(): read the contents of the "version" file.
1540 */
1541 /* ARGSUSED */
1542 static void
1543 lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1544 {
1545 char *vers;
1546 if (lx_get_zone_kern_version(LXPTOZ(lxpnp)) <= LX_KERN_2_4)
1547 vers = LX_UNAME_RELEASE_2_4;
1548 else
1549 vers = LX_UNAME_RELEASE_2_6;
1550
1551 lxpr_uiobuf_printf(uiobuf,
1552 "%s version %s (%s version %d.%d.%d) "
1553 "#%s SMP %s\n",
1554 LX_UNAME_SYSNAME, vers,
1555 #if defined(__GNUC__)
1556 "gcc",
1557 __GNUC__,
1558 __GNUC_MINOR__,
1559 __GNUC_PATCHLEVEL__,
1560 #else
1561 "Sun C",
1562 __SUNPRO_C / 0x100,
1563 (__SUNPRO_C & 0xff) / 0x10,
1564 __SUNPRO_C & 0xf,
1565 #endif
1566 LX_UNAME_VERSION,
1567 "00:00:00 00/00/00");
1568 }
1569
1570
1571 /*
1572 * lxpr_read_stat(): read the contents of the "stat" file.
1573 *
1574 */
1575 /* ARGSUSED */
1576
1577 static void
1578 lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1579 {
1580 cpu_t *cp, *cpstart;
1581 int pools_enabled;
1582 ulong_t idle_cum = 0;
1583 ulong_t sys_cum = 0;
1584 ulong_t user_cum = 0;
1585 ulong_t irq_cum = 0;
1586 uint_t cpu_nrunnable_cum = 0;
1587 uint_t w_io_cum = 0;
1588
1589 ulong_t pgpgin_cum = 0;
1590 ulong_t pgpgout_cum = 0;
1591 ulong_t pgswapout_cum = 0;
1592 ulong_t pgswapin_cum = 0;
1593 ulong_t intr_cum = 0;
1594 ulong_t pswitch_cum = 0;
1595 ulong_t forks_cum = 0;
1596 hrtime_t msnsecs[NCMSTATES];
1597 int lx_kern_version = lx_get_zone_kern_version(LXPTOZ(lxpnp));
1598 /* temporary variable since scalehrtime modifies data in place */
1599 hrtime_t tmptime;
1600
1601 ASSERT(lxpnp->lxpr_type == LXPR_STAT);
1602
1603 mutex_enter(&cpu_lock);
1604 pools_enabled = pool_pset_enabled();
1605
1606 /* Calculate cumulative stats */
1607 cp = cpstart = CPU;
1608 do {
1609 int i;
1610
1611 /*
1612 * Don't count CPUs that aren't even in the system
1613 * or aren't up yet.
1614 */
1615 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1616 continue;
1617 }
1618
1619 get_cpu_mstate(cp, msnsecs);
1620
1621 idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
1622 sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
1623 user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
1624
1625 pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
1626 pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
1627 pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
1628 pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
1629
1630 if (lx_kern_version >= LX_KERN_2_6) {
1631 cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
1632 w_io_cum += CPU_STATS(cp, sys.iowait);
1633 for (i = 0; i < NCMSTATES; i++) {
1634 tmptime = cp->cpu_intracct[i];
1635 scalehrtime(&tmptime);
1636 irq_cum += NSEC_TO_TICK(tmptime);
1637 }
1638 }
1639
1640 for (i = 0; i < PIL_MAX; i++)
1641 intr_cum += CPU_STATS(cp, sys.intr[i]);
1642
1643 pswitch_cum += CPU_STATS(cp, sys.pswitch);
1644 forks_cum += CPU_STATS(cp, sys.sysfork);
1645 forks_cum += CPU_STATS(cp, sys.sysvfork);
1646
1647 if (pools_enabled)
1648 cp = cp->cpu_next_part;
1649 else
1650 cp = cp->cpu_next;
1651 } while (cp != cpstart);
1652
1653 if (lx_kern_version >= LX_KERN_2_6) {
1654 lxpr_uiobuf_printf(uiobuf,
1655 "cpu %ld %ld %ld %ld %ld %ld %ld\n",
1656 user_cum, 0, sys_cum, idle_cum, 0, irq_cum, 0);
1657 } else {
1658 lxpr_uiobuf_printf(uiobuf,
1659 "cpu %ld %ld %ld %ld\n",
1660 user_cum, 0, sys_cum, idle_cum);
1661 }
1662
1663 /* Do per processor stats */
1664 do {
1665 int i;
1666
1667 ulong_t idle_ticks;
1668 ulong_t sys_ticks;
1669 ulong_t user_ticks;
1670 ulong_t irq_ticks = 0;
1671
1672 /*
1673 * Don't count CPUs that aren't even in the system
1674 * or aren't up yet.
1675 */
1676 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1677 continue;
1678 }
1679
1680 get_cpu_mstate(cp, msnsecs);
1681
1682 idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
1683 sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
1684 user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
1685
1686 if (lx_kern_version >= LX_KERN_2_6) {
1687 for (i = 0; i < NCMSTATES; i++) {
1688 tmptime = cp->cpu_intracct[i];
1689 scalehrtime(&tmptime);
1690 irq_ticks += NSEC_TO_TICK(tmptime);
1691 }
1692
1693 lxpr_uiobuf_printf(uiobuf,
1694 "cpu%d %ld %ld %ld %ld %ld %ld %ld\n",
1695 cp->cpu_id, user_ticks, 0, sys_ticks, idle_ticks,
1696 0, irq_ticks, 0);
1697 } else {
1698 lxpr_uiobuf_printf(uiobuf,
1699 "cpu%d %ld %ld %ld %ld\n",
1700 cp->cpu_id,
1701 user_ticks, 0, sys_ticks, idle_ticks);
1702 }
1703
1704 if (pools_enabled)
1705 cp = cp->cpu_next_part;
1706 else
1707 cp = cp->cpu_next;
1708 } while (cp != cpstart);
1709
1710 mutex_exit(&cpu_lock);
1711
1712 if (lx_kern_version >= LX_KERN_2_6) {
1713 lxpr_uiobuf_printf(uiobuf,
1714 "page %lu %lu\n"
1715 "swap %lu %lu\n"
1716 "intr %lu\n"
1717 "ctxt %lu\n"
1718 "btime %lu\n"
1719 "processes %lu\n"
1720 "procs_running %lu\n"
1721 "procs_blocked %lu\n",
1722 pgpgin_cum, pgpgout_cum,
1723 pgswapin_cum, pgswapout_cum,
1724 intr_cum,
1725 pswitch_cum,
1726 boot_time,
1727 forks_cum,
1728 cpu_nrunnable_cum,
1729 w_io_cum);
1730 } else {
1731 lxpr_uiobuf_printf(uiobuf,
1732 "page %lu %lu\n"
1733 "swap %lu %lu\n"
1734 "intr %lu\n"
1735 "ctxt %lu\n"
1736 "btime %lu\n"
1737 "processes %lu\n",
1738 pgpgin_cum, pgpgout_cum,
1739 pgswapin_cum, pgswapout_cum,
1740 intr_cum,
1741 pswitch_cum,
1742 boot_time,
1743 forks_cum);
1744 }
1745 }
1746
1747
1748 /*
1749 * lxpr_read_uptime(): read the contents of the "uptime" file.
1750 *
1751 * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
1752 * Use fixed point arithmetic to get 2 decimal places
1753 */
1754 /* ARGSUSED */
1755 static void
1756 lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1757 {
1758 cpu_t *cp, *cpstart;
1759 int pools_enabled;
1760 ulong_t idle_cum = 0;
1761 ulong_t cpu_count = 0;
1762 ulong_t idle_s;
1763 ulong_t idle_cs;
1764 ulong_t up_s;
1765 ulong_t up_cs;
1766 hrtime_t birthtime;
1767 hrtime_t centi_sec = 10000000; /* 10^7 */
1768
1769 ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
1770
1771 /* Calculate cumulative stats */
1772 mutex_enter(&cpu_lock);
1773 pools_enabled = pool_pset_enabled();
1774
1775 cp = cpstart = CPU;
1776 do {
1777 /*
1778 * Don't count CPUs that aren't even in the system
1779 * or aren't up yet.
1780 */
1781 if ((cp->cpu_flags & CPU_EXISTS) == 0) {
1782 continue;
1783 }
1784
1785 idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
1786 idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
1787 cpu_count += 1;
1788
1789 if (pools_enabled)
1790 cp = cp->cpu_next_part;
1791 else
1792 cp = cp->cpu_next;
1793 } while (cp != cpstart);
1794 mutex_exit(&cpu_lock);
1795
1796 /* Getting the Zone zsched process startup time */
1797 birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
1798 up_cs = (gethrtime() - birthtime) / centi_sec;
1799 up_s = up_cs / 100;
1800 up_cs %= 100;
1801
1802 ASSERT(cpu_count > 0);
1803 idle_cum /= cpu_count;
1804 idle_s = idle_cum / hz;
1805 idle_cs = idle_cum % hz;
1806 idle_cs *= 100;
1807 idle_cs /= hz;
1808
1809 lxpr_uiobuf_printf(uiobuf,
1810 "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
1811 }
1812
1813 static const char *amd_x_edx[] = {
1814 NULL, NULL, NULL, NULL,
1815 NULL, NULL, NULL, NULL,
1816 NULL, NULL, NULL, "syscall",
1817 NULL, NULL, NULL, NULL,
1818 NULL, NULL, NULL, "mp",
1819 "nx", NULL, "mmxext", NULL,
1820 NULL, NULL, NULL, NULL,
1821 NULL, "lm", "3dnowext", "3dnow"
1822 };
1823
1824 static const char *amd_x_ecx[] = {
1825 "lahf_lm", NULL, "svm", NULL,
1826 "altmovcr8"
1827 };
1828
1829 static const char *tm_x_edx[] = {
1830 "recovery", "longrun", NULL, "lrti"
1831 };
1832
1833 /*
1834 * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
1835 */
1836 static const char *intc_x_edx[] = {
1837 NULL, NULL, NULL, NULL,
1838 NULL, NULL, NULL, NULL,
1839 NULL, NULL, NULL, "syscall",
1840 NULL, NULL, NULL, NULL,
1841 NULL, NULL, NULL, NULL,
1842 "nx", NULL, NULL, NULL,
1843 NULL, NULL, NULL, NULL,
1844 NULL, "lm", NULL, NULL
1845 };
1846
1847 static const char *intc_edx[] = {
1848 "fpu", "vme", "de", "pse",
1849 "tsc", "msr", "pae", "mce",
1850 "cx8", "apic", NULL, "sep",
1851 "mtrr", "pge", "mca", "cmov",
1852 "pat", "pse36", "pn", "clflush",
1853 NULL, "dts", "acpi", "mmx",
1854 "fxsr", "sse", "sse2", "ss",
1855 "ht", "tm", "ia64", "pbe"
1856 };
1857
1858 /*
1859 * "sse3" on linux is called "pni" (Prescott New Instructions).
1860 */
1861 static const char *intc_ecx[] = {
1862 "pni", NULL, NULL, "monitor",
1863 "ds_cpl", NULL, NULL, "est",
1864 "tm2", NULL, "cid", NULL,
1865 NULL, "cx16", "xtpr"
1866 };
1867
1868 static void
1869 lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
1870 {
1871 int i;
1872 uint32_t bits;
1873 cpu_t *cp, *cpstart;
1874 int pools_enabled;
1875 const char **fp;
1876 char brandstr[CPU_IDSTRLEN];
1877 struct cpuid_regs cpr;
1878 int maxeax;
1879 int std_ecx, std_edx, ext_ecx, ext_edx;
1880
1881 ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
1882
1883 mutex_enter(&cpu_lock);
1884 pools_enabled = pool_pset_enabled();
1885
1886 cp = cpstart = CPU;
1887 do {
1888 /*
1889 * This returns the maximum eax value for standard cpuid
1890 * functions in eax.
1891 */
1892 cpr.cp_eax = 0;
1893 (void) cpuid_insn(cp, &cpr);
1894 maxeax = cpr.cp_eax;
1895
1896 /*
1897 * Get standard x86 feature flags.
1898 */
1899 cpr.cp_eax = 1;
1900 (void) cpuid_insn(cp, &cpr);
1901 std_ecx = cpr.cp_ecx;
1902 std_edx = cpr.cp_edx;
1903
1904 /*
1905 * Now get extended feature flags.
1906 */
1907 cpr.cp_eax = 0x80000001;
1908 (void) cpuid_insn(cp, &cpr);
1909 ext_ecx = cpr.cp_ecx;
1910 ext_edx = cpr.cp_edx;
1911
1912 (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
1913
1914 lxpr_uiobuf_printf(uiobuf,
1915 "processor\t: %d\n"
1916 "vendor_id\t: %s\n"
1917 "cpu family\t: %d\n"
1918 "model\t\t: %d\n"
1919 "model name\t: %s\n"
1920 "stepping\t: %d\n"
1921 "cpu MHz\t\t: %u.%03u\n",
1922 cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
1923 cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
1924 (uint32_t)(cpu_freq_hz / 1000000),
1925 ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
1926
1927 lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
1928 getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
1929
1930 /* if (x86_feature & X86_HTT) { */
1931 /*
1932 * 'siblings' is used for HT-style threads
1933 */
1934 /* lxpr_uiobuf_printf(uiobuf,
1935 "physical id\t: %lu\n"
1936 "siblings\t: %u\n",
1937 pg_plat_hw_instance_id(cp, PGHW_CHIP),
1938 cpuid_get_ncpu_per_chip(cp));
1939 }
1940 */
1941 /*
1942 * Since we're relatively picky about running on older hardware,
1943 * we can be somewhat cavalier about the answers to these ones.
1944 *
1945 * In fact, given the hardware we support, we just say:
1946 *
1947 * fdiv_bug : no (if we're on a 64-bit kernel)
1948 * hlt_bug : no
1949 * f00f_bug : no
1950 * coma_bug : no
1951 * wp : yes (write protect in supervsr mode)
1952 */
1953 lxpr_uiobuf_printf(uiobuf,
1954 "fdiv_bug\t: %s\n"
1955 "hlt_bug \t: no\n"
1956 "f00f_bug\t: no\n"
1957 "coma_bug\t: no\n"
1958 "fpu\t\t: %s\n"
1959 "fpu_exception\t: %s\n"
1960 "cpuid level\t: %d\n"
1961 "flags\t\t:",
1962 #if defined(__i386)
1963 fpu_pentium_fdivbug ? "yes" : "no",
1964 #else
1965 "no",
1966 #endif /* __i386 */
1967 fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
1968 maxeax);
1969
1970 for (bits = std_edx, fp = intc_edx, i = 0;
1971 i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
1972 if ((bits & (1 << i)) != 0 && *fp)
1973 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
1974
1975 /*
1976 * name additional features where appropriate
1977 */
1978 switch (x86_vendor) {
1979 case X86_VENDOR_Intel:
1980 for (bits = ext_edx, fp = intc_x_edx, i = 0;
1981 i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
1982 fp++, i++)
1983 if ((bits & (1 << i)) != 0 && *fp)
1984 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
1985 break;
1986
1987 case X86_VENDOR_AMD:
1988 for (bits = ext_edx, fp = amd_x_edx, i = 0;
1989 i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
1990 fp++, i++)
1991 if ((bits & (1 << i)) != 0 && *fp)
1992 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
1993
1994 for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
1995 i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
1996 fp++, i++)
1997 if ((bits & (1 << i)) != 0 && *fp)
1998 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
1999 break;
2000
2001 case X86_VENDOR_TM:
2002 for (bits = ext_edx, fp = tm_x_edx, i = 0;
2003 i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
2004 fp++, i++)
2005 if ((bits & (1 << i)) != 0 && *fp)
2006 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2007 break;
2008 default:
2009 break;
2010 }
2011
2012 for (bits = std_ecx, fp = intc_ecx, i = 0;
2013 i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
2014 if ((bits & (1 << i)) != 0 && *fp)
2015 lxpr_uiobuf_printf(uiobuf, " %s", *fp);
2016
2017 lxpr_uiobuf_printf(uiobuf, "\n\n");
2018
2019 if (pools_enabled)
2020 cp = cp->cpu_next_part;
2021 else
2022 cp = cp->cpu_next;
2023 } while (cp != cpstart);
2024
2025 mutex_exit(&cpu_lock);
2026 }
2027
2028 /* ARGSUSED */
2029 static void
2030 lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
2031 {
2032 ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
2033 lxpr_uiobuf_seterr(uiobuf, EFAULT);
2034 }
2035
2036
2037
2038 /*
2039 * lxpr_getattr(): Vnode operation for VOP_GETATTR()
2040 */
2041 static int
2042 lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2043 caller_context_t *ct)
2044 {
2045 register lxpr_node_t *lxpnp = VTOLXP(vp);
2046 lxpr_nodetype_t type = lxpnp->lxpr_type;
2047 extern uint_t nproc;
2048 int error;
2049
2050 /*
2051 * Return attributes of underlying vnode if ATTR_REAL
2052 *
2053 * but keep fd files with the symlink permissions
2054 */
2055 if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
2056 vnode_t *rvp = lxpnp->lxpr_realvp;
2057
2058 /*
2059 * withold attribute information to owner or root
2060 */
2061 if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
2062 return (error);
2063 }
2064
2065 /*
2066 * now its attributes
2067 */
2068 if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
2069 return (error);
2070 }
2071
2072 /*
2073 * if it's a file in lx /proc/pid/fd/xx then set its
2074 * mode and keep it looking like a symlink
2075 */
2076 if (type == LXPR_PID_FD_FD) {
2077 vap->va_mode = lxpnp->lxpr_mode;
2078 vap->va_type = vp->v_type;
2079 vap->va_size = 0;
2080 vap->va_nlink = 1;
2081 }
2082 return (0);
2083 }
2084
2085 /* Default attributes, that may be overridden below */
2086 bzero(vap, sizeof (*vap));
2087 vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
2088 vap->va_nlink = 1;
2089 vap->va_type = vp->v_type;
2090 vap->va_mode = lxpnp->lxpr_mode;
2091 vap->va_fsid = vp->v_vfsp->vfs_dev;
2092 vap->va_blksize = DEV_BSIZE;
2093 vap->va_uid = lxpnp->lxpr_uid;
2094 vap->va_gid = lxpnp->lxpr_gid;
2095 vap->va_nodeid = lxpnp->lxpr_ino;
2096
2097 switch (type) {
2098 case LXPR_PROCDIR:
2099 vap->va_nlink = nproc + 2 + PROCDIRFILES;
2100 vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
2101 break;
2102 case LXPR_PIDDIR:
2103 vap->va_nlink = PIDDIRFILES;
2104 vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
2105 break;
2106 case LXPR_SELF:
2107 vap->va_uid = crgetruid(curproc->p_cred);
2108 vap->va_gid = crgetrgid(curproc->p_cred);
2109 break;
2110 default:
2111 break;
2112 }
2113
2114 vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
2115 return (0);
2116 }
2117
2118
2119 /*
2120 * lxpr_access(): Vnode operation for VOP_ACCESS()
2121 */
2122 static int
2123 lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
2124 {
2125 lxpr_node_t *lxpnp = VTOLXP(vp);
2126 int shift = 0;
2127 proc_t *tp;
2128
2129 /* lx /proc is a read only file system */
2130 if (mode & VWRITE)
2131 return (EROFS);
2132
2133 /*
2134 * If this is a restricted file, check access permissions.
2135 */
2136 switch (lxpnp->lxpr_type) {
2137 case LXPR_PIDDIR:
2138 return (0);
2139 case LXPR_PID_CURDIR:
2140 case LXPR_PID_ENV:
2141 case LXPR_PID_EXE:
2142 case LXPR_PID_MAPS:
2143 case LXPR_PID_MEM:
2144 case LXPR_PID_ROOTDIR:
2145 case LXPR_PID_FDDIR:
2146 case LXPR_PID_FD_FD:
2147 if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
2148 return (ENOENT);
2149 if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
2150 priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
2151 lxpr_unlock(tp);
2152 return (EACCES);
2153 }
2154 lxpr_unlock(tp);
2155 default:
2156 break;
2157 }
2158
2159 if (lxpnp->lxpr_realvp != NULL) {
2160 /*
2161 * For these we use the underlying vnode's accessibility.
2162 */
2163 return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
2164 }
2165
2166 /* If user is root allow access regardless of permission bits */
2167 if (secpolicy_proc_access(cr) == 0)
2168 return (0);
2169
2170 /*
2171 * Access check is based on only
2172 * one of owner, group, public.
2173 * If not owner, then check group.
2174 * If not a member of the group, then
2175 * check public access.
2176 */
2177 if (crgetuid(cr) != lxpnp->lxpr_uid) {
2178 shift += 3;
2179 if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
2180 shift += 3;
2181 }
2182
2183 mode &= ~(lxpnp->lxpr_mode << shift);
2184
2185 if (mode == 0)
2186 return (0);
2187
2188 return (EACCES);
2189 }
2190
2191
2192
2193
2194 /* ARGSUSED */
2195 static vnode_t *
2196 lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
2197 {
2198 return (NULL);
2199 }
2200
2201
2202 /*
2203 * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
2204 */
2205 /* ARGSUSED */
2206 static int
2207 lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
2208 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
2209 int *direntflags, pathname_t *realpnp)
2210 {
2211 lxpr_node_t *lxpnp = VTOLXP(dp);
2212 lxpr_nodetype_t type = lxpnp->lxpr_type;
2213 int error;
2214
2215 ASSERT(dp->v_type == VDIR);
2216 ASSERT(type < LXPR_NFILES);
2217
2218 /*
2219 * we should never get here because the lookup
2220 * is done on the realvp for these nodes
2221 */
2222 ASSERT(type != LXPR_PID_FD_FD &&
2223 type != LXPR_PID_CURDIR &&
2224 type != LXPR_PID_ROOTDIR);
2225
2226 /*
2227 * restrict lookup permission to owner or root
2228 */
2229 if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
2230 return (error);
2231 }
2232
2233 /*
2234 * Just return the parent vnode
2235 * if thats where we are trying to go
2236 */
2237 if (strcmp(comp, "..") == 0) {
2238 VN_HOLD(lxpnp->lxpr_parent);
2239 *vpp = lxpnp->lxpr_parent;
2240 return (0);
2241 }
2242
2243 /*
2244 * Special handling for directory searches
2245 * Note: null component name is synonym for
2246 * current directory being searched.
2247 */
2248 if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
2249 VN_HOLD(dp);
2250 *vpp = dp;
2251 return (0);
2252 }
2253
2254 *vpp = (lxpr_lookup_function[type](dp, comp));
2255 return ((*vpp == NULL) ? ENOENT : 0);
2256 }
2257
2258 /*
2259 * Do a sequential search on the given directory table
2260 */
2261 static vnode_t *
2262 lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
2263 lxpr_dirent_t *dirtab, int dirtablen)
2264 {
2265 lxpr_node_t *lxpnp;
2266 int count;
2267
2268 for (count = 0; count < dirtablen; count++) {
2269 if (strcmp(dirtab[count].d_name, comp) == 0) {
2270 lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
2271 dp = LXPTOV(lxpnp);
2272 ASSERT(dp != NULL);
2273 return (dp);
2274 }
2275 }
2276 return (NULL);
2277 }
2278
2279
2280 static vnode_t *
2281 lxpr_lookup_piddir(vnode_t *dp, char *comp)
2282 {
2283 proc_t *p;
2284
2285 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
2286
2287 p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
2288 if (p == NULL)
2289 return (NULL);
2290
2291 dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
2292
2293 lxpr_unlock(p);
2294
2295 return (dp);
2296 }
2297
2298
2299 /*
2300 * Lookup one of the process's open files.
2301 */
2302 static vnode_t *
2303 lxpr_lookup_fddir(vnode_t *dp, char *comp)
2304 {
2305 lxpr_node_t *dlxpnp = VTOLXP(dp);
2306 lxpr_node_t *lxpnp;
2307 vnode_t *vp = NULL;
2308 proc_t *p;
2309 file_t *fp;
2310 uint_t fd;
2311 int c;
2312 uf_entry_t *ufp;
2313 uf_info_t *fip;
2314
2315 ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
2316
2317 /*
2318 * convert the string rendition of the filename
2319 * to a file descriptor
2320 */
2321 fd = 0;
2322 while ((c = *comp++) != '\0') {
2323 int ofd;
2324 if (c < '0' || c > '9')
2325 return (NULL);
2326
2327 ofd = fd;
2328 fd = 10*fd + c - '0';
2329 /* integer overflow */
2330 if (fd / 10 != ofd)
2331 return (NULL);
2332 }
2333
2334 /*
2335 * get the proc to work with and lock it
2336 */
2337 p = lxpr_lock(dlxpnp->lxpr_pid);
2338 if ((p == NULL))
2339 return (NULL);
2340
2341 /*
2342 * If the process is a zombie or system process
2343 * it can't have any open files.
2344 */
2345 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
2346 lxpr_unlock(p);
2347 return (NULL);
2348 }
2349
2350 /*
2351 * get us a fresh node/vnode
2352 */
2353 lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
2354
2355 /*
2356 * get open file info
2357 */
2358 fip = (&(p)->p_user.u_finfo);
2359 mutex_enter(&fip->fi_lock);
2360
2361 /*
2362 * got the fd data so now done with this proc
2363 */
2364 lxpr_unlock(p);
2365
2366 if (fd < fip->fi_nfiles) {
2367 UF_ENTER(ufp, fip, fd);
2368 /*
2369 * ensure the fd is still kosher.
2370 * it may have gone between the readdir and
2371 * the lookup
2372 */
2373 if (fip->fi_list[fd].uf_file == NULL) {
2374 mutex_exit(&fip->fi_lock);
2375 UF_EXIT(ufp);
2376 lxpr_freenode(lxpnp);
2377 return (NULL);
2378 }
2379
2380 if ((fp = ufp->uf_file) != NULL)
2381 vp = fp->f_vnode;
2382 UF_EXIT(ufp);
2383 }
2384 mutex_exit(&fip->fi_lock);
2385
2386 if (vp == NULL) {
2387 lxpr_freenode(lxpnp);
2388 return (NULL);
2389 } else {
2390 /*
2391 * Fill in the lxpr_node so future references will
2392 * be able to find the underlying vnode.
2393 * The vnode is held on the realvp.
2394 */
2395 lxpnp->lxpr_realvp = vp;
2396 VN_HOLD(lxpnp->lxpr_realvp);
2397 }
2398
2399 dp = LXPTOV(lxpnp);
2400 ASSERT(dp != NULL);
2401
2402 return (dp);
2403 }
2404
2405
2406 static vnode_t *
2407 lxpr_lookup_netdir(vnode_t *dp, char *comp)
2408 {
2409 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
2410
2411 dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
2412
2413 return (dp);
2414 }
2415
2416
2417 static vnode_t *
2418 lxpr_lookup_procdir(vnode_t *dp, char *comp)
2419 {
2420 ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
2421
2422 /*
2423 * We know all the names of files & dirs in our
2424 * file system structure except those that are pid names.
2425 * These change as pids are created/deleted etc.
2426 * So just look for a number as the first char to see if we
2427 * are we doing pid lookups?
2428 *
2429 * Don't need to check for "self" as it is implemented as a symlink
2430 */
2431 if (*comp >= '0' && *comp <= '9') {
2432 pid_t pid = 0;
2433 lxpr_node_t *lxpnp = NULL;
2434 proc_t *p;
2435 int c;
2436
2437 while ((c = *comp++) != '\0')
2438 pid = 10*pid + c - '0';
2439
2440 /*
2441 * Can't continue if the process is still loading
2442 * or it doesn't really exist yet (or maybe it just died!)
2443 */
2444 p = lxpr_lock(pid);
2445 if (p == NULL)
2446 return (NULL);
2447
2448 if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
2449 lxpr_unlock(p);
2450 return (NULL);
2451 }
2452
2453 /*
2454 * allocate and fill in a new lx /proc node
2455 */
2456 lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
2457
2458 lxpr_unlock(p);
2459
2460 dp = LXPTOV(lxpnp);
2461 ASSERT(dp != NULL);
2462
2463 return (dp);
2464
2465 }
2466
2467 /* Lookup fixed names */
2468 return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
2469 }
2470
2471
2472
2473
2474 /*
2475 * lxpr_readdir(): Vnode operation for VOP_READDIR()
2476 */
2477 /* ARGSUSED */
2478 static int
2479 lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
2480 caller_context_t *ct, int flags)
2481 {
2482 lxpr_node_t *lxpnp = VTOLXP(dp);
2483 lxpr_nodetype_t type = lxpnp->lxpr_type;
2484 ssize_t uresid;
2485 off_t uoffset;
2486 int error;
2487
2488 ASSERT(dp->v_type == VDIR);
2489 ASSERT(type < LXPR_NFILES);
2490
2491 /*
2492 * we should never get here because the readdir
2493 * is done on the realvp for these nodes
2494 */
2495 ASSERT(type != LXPR_PID_FD_FD &&
2496 type != LXPR_PID_CURDIR &&
2497 type != LXPR_PID_ROOTDIR);
2498
2499 /*
2500 * restrict readdir permission to owner or root
2501 */
2502 if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
2503 return (error);
2504
2505 uoffset = uiop->uio_offset;
2506 uresid = uiop->uio_resid;
2507
2508 /* can't do negative reads */
2509 if (uoffset < 0 || uresid <= 0)
2510 return (EINVAL);
2511
2512 /* can't read directory entries that don't exist! */
2513 if (uoffset % LXPR_SDSIZE)
2514 return (ENOENT);
2515
2516 return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
2517 }
2518
2519
2520 /* ARGSUSED */
2521 static int
2522 lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2523 {
2524 return (ENOTDIR);
2525 }
2526
2527 /*
2528 * This has the common logic for returning directory entries
2529 */
2530 static int
2531 lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
2532 lxpr_dirent_t *dirtab, int dirtablen)
2533 {
2534 /* bp holds one dirent64 structure */
2535 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2536 dirent64_t *dirent = (dirent64_t *)bp;
2537 ssize_t oresid; /* save a copy for testing later */
2538 ssize_t uresid;
2539
2540 oresid = uiop->uio_resid;
2541
2542 /* clear out the dirent buffer */
2543 bzero(bp, sizeof (bp));
2544
2545 /*
2546 * Satisfy user request
2547 */
2548 while ((uresid = uiop->uio_resid) > 0) {
2549 int dirindex;
2550 off_t uoffset;
2551 int reclen;
2552 int error;
2553
2554 uoffset = uiop->uio_offset;
2555 dirindex = (uoffset / LXPR_SDSIZE) - 2;
2556
2557 if (uoffset == 0) {
2558
2559 dirent->d_ino = lxpnp->lxpr_ino;
2560 dirent->d_name[0] = '.';
2561 dirent->d_name[1] = '\0';
2562 reclen = DIRENT64_RECLEN(1);
2563
2564 } else if (uoffset == LXPR_SDSIZE) {
2565
2566 dirent->d_ino = lxpr_parentinode(lxpnp);
2567 dirent->d_name[0] = '.';
2568 dirent->d_name[1] = '.';
2569 dirent->d_name[2] = '\0';
2570 reclen = DIRENT64_RECLEN(2);
2571
2572 } else if (dirindex < dirtablen) {
2573 int slen = strlen(dirtab[dirindex].d_name);
2574
2575 dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
2576 lxpnp->lxpr_pid, 0);
2577
2578 ASSERT(slen < LXPNSIZ);
2579 (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
2580 reclen = DIRENT64_RECLEN(slen);
2581
2582 } else {
2583 /* Run out of table entries */
2584 if (eofp) {
2585 *eofp = 1;
2586 }
2587 return (0);
2588 }
2589
2590 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2591 dirent->d_reclen = (ushort_t)reclen;
2592
2593 /*
2594 * if the size of the data to transfer is greater
2595 * that that requested then we can't do it this transfer.
2596 */
2597 if (reclen > uresid) {
2598 /*
2599 * Error if no entries have been returned yet.
2600 */
2601 if (uresid == oresid) {
2602 return (EINVAL);
2603 }
2604 break;
2605 }
2606
2607 /*
2608 * uiomove() updates both uiop->uio_resid and
2609 * uiop->uio_offset by the same amount. But we want
2610 * uiop->uio_offset to change in increments
2611 * of LXPR_SDSIZE, which is different from the number of bytes
2612 * being returned to the user.
2613 * So we set uiop->uio_offset separately, ignoring what
2614 * uiomove() does.
2615 */
2616 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))) {
2617 return (error);
2618 }
2619
2620 uiop->uio_offset = uoffset + LXPR_SDSIZE;
2621 }
2622
2623 /* Have run out of space, but could have just done last table entry */
2624 if (eofp) {
2625 *eofp =
2626 (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
2627 }
2628 return (0);
2629 }
2630
2631
2632 static int
2633 lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2634 {
2635 /* bp holds one dirent64 structure */
2636 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2637 dirent64_t *dirent = (dirent64_t *)bp;
2638 ssize_t oresid; /* save a copy for testing later */
2639 ssize_t uresid;
2640 off_t uoffset;
2641 zoneid_t zoneid;
2642 pid_t pid;
2643 int error;
2644 int ceof;
2645
2646 ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
2647
2648 oresid = uiop->uio_resid;
2649 zoneid = LXPTOZ(lxpnp)->zone_id;
2650
2651 /*
2652 * We return directory entries in the order:
2653 * "." and ".." then the unique lx procfs files, then the
2654 * directories corresponding to the running processes.
2655 *
2656 * This is a good order because it allows us to more easily
2657 * keep track of where we are betwen calls to getdents().
2658 * If the number of processes changes between calls then we
2659 * can't lose track of where we are in the lx procfs files.
2660 */
2661
2662 /* Do the fixed entries */
2663 error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
2664 PROCDIRFILES);
2665
2666 /* Finished if we got an error or if we couldn't do all the table */
2667 if (error != 0 || ceof == 0)
2668 return (error);
2669
2670 /* clear out the dirent buffer */
2671 bzero(bp, sizeof (bp));
2672
2673 /* Do the process entries */
2674 while ((uresid = uiop->uio_resid) > 0) {
2675 proc_t *p;
2676 int len;
2677 int reclen;
2678 int i;
2679
2680 uoffset = uiop->uio_offset;
2681
2682 /*
2683 * Stop when entire proc table has been examined.
2684 */
2685 i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
2686 if (i >= v.v_proc) {
2687 /* Run out of table entries */
2688 if (eofp) {
2689 *eofp = 1;
2690 }
2691 return (0);
2692 }
2693 mutex_enter(&pidlock);
2694
2695 /*
2696 * Skip indices for which there is no pid_entry, PIDs for
2697 * which there is no corresponding process, a PID of 0,
2698 * and anything the security policy doesn't allow
2699 * us to look at.
2700 */
2701 if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
2702 p->p_pid == 0 ||
2703 secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
2704 mutex_exit(&pidlock);
2705 goto next;
2706 }
2707 mutex_exit(&pidlock);
2708
2709 /*
2710 * Convert pid to the Linux default of 1 if we're the zone's
2711 * init process, otherwise use the value from the proc
2712 * structure
2713 */
2714 pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
2715 p->p_pid : 1);
2716
2717 /*
2718 * If this /proc was mounted in the global zone, view
2719 * all procs; otherwise, only view zone member procs.
2720 */
2721 if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
2722 goto next;
2723 }
2724
2725 ASSERT(p->p_stat != 0);
2726
2727 dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
2728 len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
2729 ASSERT(len < LXPNSIZ);
2730 reclen = DIRENT64_RECLEN(len);
2731
2732 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2733 dirent->d_reclen = (ushort_t)reclen;
2734
2735 /*
2736 * if the size of the data to transfer is greater
2737 * that that requested then we can't do it this transfer.
2738 */
2739 if (reclen > uresid) {
2740 /*
2741 * Error if no entries have been returned yet.
2742 */
2743 if (uresid == oresid)
2744 return (EINVAL);
2745 break;
2746 }
2747
2748 /*
2749 * uiomove() updates both uiop->uio_resid and
2750 * uiop->uio_offset by the same amount. But we want
2751 * uiop->uio_offset to change in increments
2752 * of LXPR_SDSIZE, which is different from the number of bytes
2753 * being returned to the user.
2754 * So we set uiop->uio_offset separately, in the
2755 * increment of this for loop, ignoring what uiomove() does.
2756 */
2757 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)))
2758 return (error);
2759
2760 next:
2761 uiop->uio_offset = uoffset + LXPR_SDSIZE;
2762 }
2763
2764 if (eofp)
2765 *eofp =
2766 (uiop->uio_offset >=
2767 ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
2768
2769 return (0);
2770 }
2771
2772
2773 static int
2774 lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2775 {
2776 proc_t *p;
2777
2778 ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
2779
2780 /* can't read its contents if it died */
2781 mutex_enter(&pidlock);
2782
2783 p = prfind((lxpnp->lxpr_pid == 1) ?
2784 curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
2785
2786 if (p == NULL || p->p_stat == SIDL) {
2787 mutex_exit(&pidlock);
2788 return (ENOENT);
2789 }
2790 mutex_exit(&pidlock);
2791
2792 return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
2793 }
2794
2795
2796 static int
2797 lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2798 {
2799 ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
2800 return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
2801 }
2802
2803
2804 static int
2805 lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
2806 {
2807 /* bp holds one dirent64 structure */
2808 longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
2809 dirent64_t *dirent = (dirent64_t *)bp;
2810 ssize_t oresid; /* save a copy for testing later */
2811 ssize_t uresid;
2812 off_t uoffset;
2813 int error;
2814 int ceof;
2815 proc_t *p;
2816 int fddirsize;
2817 uf_info_t *fip;
2818
2819
2820 ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
2821
2822 oresid = uiop->uio_resid;
2823
2824 /* can't read its contents if it died */
2825 p = lxpr_lock(lxpnp->lxpr_pid);
2826 if (p == NULL)
2827 return (ENOENT);
2828
2829 /* Get open file info */
2830 fip = (&(p)->p_user.u_finfo);
2831
2832 if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
2833 fddirsize = 0;
2834 else
2835 fddirsize = fip->fi_nfiles;
2836
2837 mutex_enter(&fip->fi_lock);
2838 lxpr_unlock(p);
2839
2840 /* Do the fixed entries (in this case just "." & "..") */
2841 error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
2842
2843 /* Finished if we got an error or if we couldn't do all the table */
2844 if (error != 0 || ceof == 0)
2845 return (error);
2846
2847 /* clear out the dirent buffer */
2848 bzero(bp, sizeof (bp));
2849
2850 /*
2851 * Loop until user's request is satisfied or until
2852 * all file descriptors have been examined.
2853 */
2854 for (; (uresid = uiop->uio_resid) > 0;
2855 uiop->uio_offset = uoffset + LXPR_SDSIZE) {
2856 int reclen;
2857 int fd;
2858 int len;
2859
2860 uoffset = uiop->uio_offset;
2861
2862 /*
2863 * Stop at the end of the fd list
2864 */
2865 fd = (uoffset / LXPR_SDSIZE) - 2;
2866 if (fd >= fddirsize) {
2867 if (eofp) {
2868 *eofp = 1;
2869 }
2870 goto out;
2871 }
2872
2873 if (fip->fi_list[fd].uf_file == NULL)
2874 continue;
2875
2876 dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
2877 len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
2878 ASSERT(len < LXPNSIZ);
2879 reclen = DIRENT64_RECLEN(len);
2880
2881 dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
2882 dirent->d_reclen = (ushort_t)reclen;
2883
2884 if (reclen > uresid) {
2885 /*
2886 * Error if no entries have been returned yet.
2887 */
2888 if (uresid == oresid)
2889 error = EINVAL;
2890 goto out;
2891 }
2892
2893 if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)))
2894 goto out;
2895 }
2896
2897 if (eofp)
2898 *eofp =
2899 (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
2900
2901 out:
2902 mutex_exit(&fip->fi_lock);
2903 return (error);
2904 }
2905
2906
2907 /*
2908 * lxpr_readlink(): Vnode operation for VOP_READLINK()
2909 */
2910 /* ARGSUSED */
2911 static int
2912 lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2913 {
2914 char bp[MAXPATHLEN + 1];
2915 size_t buflen = sizeof (bp);
2916 lxpr_node_t *lxpnp = VTOLXP(vp);
2917 vnode_t *rvp = lxpnp->lxpr_realvp;
2918 pid_t pid;
2919 int error = 0;
2920
2921 /* must be a symbolic link file */
2922 if (vp->v_type != VLNK)
2923 return (EINVAL);
2924
2925 /* Try to produce a symlink name for anything that has a realvp */
2926 if (rvp != NULL) {
2927 if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
2928 return (error);
2929 if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
2930 return (error);
2931 } else {
2932 switch (lxpnp->lxpr_type) {
2933 case LXPR_SELF:
2934 /*
2935 * Don't need to check result as every possible int
2936 * will fit within MAXPATHLEN bytes
2937 */
2938
2939 /*
2940 * Convert pid to the Linux default of 1 if we're the
2941 * zone's init process
2942 */
2943 pid = ((curproc->p_pid !=
2944 curproc->p_zone->zone_proc_initpid)
2945 ? curproc->p_pid : 1);
2946
2947 (void) snprintf(bp, buflen, "%d", pid);
2948 break;
2949 case LXPR_PID_CURDIR:
2950 case LXPR_PID_ROOTDIR:
2951 case LXPR_PID_EXE:
2952 return (EACCES);
2953 default:
2954 /*
2955 * Need to return error so that nothing thinks
2956 * that the symlink is empty and hence "."
2957 */
2958 return (EINVAL);
2959 }
2960 }
2961
2962 /* copy the link data to user space */
2963 return (uiomove(bp, strlen(bp), UIO_READ, uiop));
2964 }
2965
2966
2967 /*
2968 * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
2969 * Vnode is no longer referenced, deallocate the file
2970 * and all its resources.
2971 */
2972 /* ARGSUSED */
2973 static void
2974 lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
2975 {
2976 lxpr_freenode(VTOLXP(vp));
2977 }
2978
2979
2980 /*
2981 * lxpr_sync(): Vnode operation for VOP_SYNC()
2982 */
2983 static int
2984 lxpr_sync()
2985 {
2986 /*
2987 * nothing to sync but this
2988 * function must never fail
2989 */
2990 return (0);
2991 }
2992
2993
2994 /*
2995 * lxpr_cmp(): Vnode operation for VOP_CMP()
2996 */
2997 static int
2998 lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
2999 {
3000 vnode_t *rvp;
3001
3002 while (vn_matchops(vp1, lxpr_vnodeops) &&
3003 (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
3004 vp1 = rvp;
3005 while (vn_matchops(vp2, lxpr_vnodeops) &&
3006 (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
3007 vp2 = rvp;
3008 if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
3009 return (vp1 == vp2);
3010 return (VOP_CMP(vp1, vp2, ct));
3011 }
3012
3013
3014 /*
3015 * lxpr_realvp(): Vnode operation for VOP_REALVP()
3016 */
3017 static int
3018 lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
3019 {
3020 vnode_t *rvp;
3021
3022 if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
3023 vp = rvp;
3024 if (VOP_REALVP(vp, &rvp, ct) == 0)
3025 vp = rvp;
3026 }
3027
3028 *vpp = vp;
3029 return (0);
3030 }