1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <sys/flock_impl.h>
70 #include <sys/stropts.h>
71 #include <sys/strsubr.h>
72 #include <sys/pathname.h>
73 #include <sys/mode.h>
74 #include <sys/socketvar.h>
75 #include <sys/autoconf.h>
76 #include <sys/dtrace.h>
77 #include <sys/timod.h>
78 #include <netinet/udp.h>
79 #include <netinet/tcp.h>
80 #include <inet/cc.h>
81 #include <vm/as.h>
82 #include <vm/rm.h>
83 #include <vm/seg.h>
84 #include <vm/seg_vn.h>
85 #include <vm/seg_dev.h>
86 #include <vm/seg_spt.h>
87 #include <vm/page.h>
88 #include <sys/vmparam.h>
89 #include <sys/swap.h>
90 #include <fs/proc/prdata.h>
91 #include <sys/task.h>
92 #include <sys/project.h>
93 #include <sys/contract_impl.h>
94 #include <sys/contract/process.h>
95 #include <sys/contract/process_impl.h>
96 #include <sys/schedctl.h>
97 #include <sys/pool.h>
98 #include <sys/zone.h>
99 #include <sys/atomic.h>
100 #include <sys/sdt.h>
101
102 #define MAX_ITERS_SPIN 5
103
104 typedef struct prpagev {
105 uint_t *pg_protv; /* vector of page permissions */
106 char *pg_incore; /* vector of incore flags */
107 size_t pg_npages; /* number of pages in protv and incore */
108 ulong_t pg_pnbase; /* pn within segment of first protv element */
109 } prpagev_t;
110
111 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
112
113 extern struct seg_ops segdev_ops; /* needs a header file */
114 extern struct seg_ops segspt_shmops; /* needs a header file */
115
116 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
117 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
118
119 /*
120 * Choose an lwp from the complete set of lwps for the process.
121 * This is called for any operation applied to the process
122 * file descriptor that requires an lwp to operate upon.
123 *
124 * Returns a pointer to the thread for the selected LWP,
125 * and with the dispatcher lock held for the thread.
126 *
127 * The algorithm for choosing an lwp is critical for /proc semantics;
128 * don't touch this code unless you know all of the implications.
129 */
130 kthread_t *
131 prchoose(proc_t *p)
132 {
133 kthread_t *t;
134 kthread_t *t_onproc = NULL; /* running on processor */
135 kthread_t *t_run = NULL; /* runnable, on disp queue */
136 kthread_t *t_sleep = NULL; /* sleeping */
137 kthread_t *t_hold = NULL; /* sleeping, performing hold */
138 kthread_t *t_susp = NULL; /* suspended stop */
139 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
140 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
141 kthread_t *t_req = NULL; /* requested stop */
142 kthread_t *t_istop = NULL; /* event-of-interest stop */
143 kthread_t *t_dtrace = NULL; /* DTrace stop */
144
145 ASSERT(MUTEX_HELD(&p->p_lock));
146
147 /*
148 * If the agent lwp exists, it takes precedence over all others.
149 */
150 if ((t = p->p_agenttp) != NULL) {
151 thread_lock(t);
152 return (t);
153 }
154
155 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
156 return (t);
157 do { /* for eacn lwp in the process */
158 if (VSTOPPED(t)) { /* virtually stopped */
159 if (t_req == NULL)
160 t_req = t;
161 continue;
162 }
163
164 thread_lock(t); /* make sure thread is in good state */
165 switch (t->t_state) {
166 default:
167 panic("prchoose: bad thread state %d, thread 0x%p",
168 t->t_state, (void *)t);
169 /*NOTREACHED*/
170 case TS_SLEEP:
171 /* this is filthy */
172 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
173 t->t_wchan0 == NULL) {
174 if (t_hold == NULL)
175 t_hold = t;
176 } else {
177 if (t_sleep == NULL)
178 t_sleep = t;
179 }
180 break;
181 case TS_RUN:
182 case TS_WAIT:
183 if (t_run == NULL)
184 t_run = t;
185 break;
186 case TS_ONPROC:
187 if (t_onproc == NULL)
188 t_onproc = t;
189 break;
190 case TS_ZOMB: /* last possible choice */
191 break;
192 case TS_STOPPED:
193 switch (t->t_whystop) {
194 case PR_SUSPENDED:
195 if (t_susp == NULL)
196 t_susp = t;
197 break;
198 case PR_JOBCONTROL:
199 if (t->t_proc_flag & TP_PRSTOP) {
200 if (t_jdstop == NULL)
201 t_jdstop = t;
202 } else {
203 if (t_jstop == NULL)
204 t_jstop = t;
205 }
206 break;
207 case PR_REQUESTED:
208 if (t->t_dtrace_stop && t_dtrace == NULL)
209 t_dtrace = t;
210 else if (t_req == NULL)
211 t_req = t;
212 break;
213 case PR_SYSENTRY:
214 case PR_SYSEXIT:
215 case PR_SIGNALLED:
216 case PR_FAULTED:
217 /*
218 * Make an lwp calling exit() be the
219 * last lwp seen in the process.
220 */
221 if (t_istop == NULL ||
222 (t_istop->t_whystop == PR_SYSENTRY &&
223 t_istop->t_whatstop == SYS_exit))
224 t_istop = t;
225 break;
226 case PR_CHECKPOINT: /* can't happen? */
227 break;
228 default:
229 panic("prchoose: bad t_whystop %d, thread 0x%p",
230 t->t_whystop, (void *)t);
231 /*NOTREACHED*/
232 }
233 break;
234 }
235 thread_unlock(t);
236 } while ((t = t->t_forw) != p->p_tlist);
237
238 if (t_onproc)
239 t = t_onproc;
240 else if (t_run)
241 t = t_run;
242 else if (t_sleep)
243 t = t_sleep;
244 else if (t_jstop)
245 t = t_jstop;
246 else if (t_jdstop)
247 t = t_jdstop;
248 else if (t_istop)
249 t = t_istop;
250 else if (t_dtrace)
251 t = t_dtrace;
252 else if (t_req)
253 t = t_req;
254 else if (t_hold)
255 t = t_hold;
256 else if (t_susp)
257 t = t_susp;
258 else /* TS_ZOMB */
259 t = p->p_tlist;
260
261 if (t != NULL)
262 thread_lock(t);
263 return (t);
264 }
265
266 /*
267 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
268 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
269 * on the /proc file descriptor. Called from stop() when a traced
270 * process stops on an event of interest. Also called from exit()
271 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
272 */
273 void
274 prnotify(struct vnode *vp)
275 {
276 prcommon_t *pcp = VTOP(vp)->pr_common;
277
278 mutex_enter(&pcp->prc_mutex);
279 cv_broadcast(&pcp->prc_wait);
280 mutex_exit(&pcp->prc_mutex);
281 if (pcp->prc_flags & PRC_POLL) {
282 /*
283 * We call pollwakeup() with POLLHUP to ensure that
284 * the pollers are awakened even if they are polling
285 * for nothing (i.e., waiting for the process to exit).
286 * This enables the use of the PRC_POLL flag for optimization
287 * (we can turn off PRC_POLL only if we know no pollers remain).
288 */
289 pcp->prc_flags &= ~PRC_POLL;
290 pollwakeup(&pcp->prc_pollhead, POLLHUP);
291 }
292 }
293
294 /* called immediately below, in prfree() */
295 static void
296 prfreenotify(vnode_t *vp)
297 {
298 prnode_t *pnp;
299 prcommon_t *pcp;
300
301 while (vp != NULL) {
302 pnp = VTOP(vp);
303 pcp = pnp->pr_common;
304 ASSERT(pcp->prc_thread == NULL);
305 pcp->prc_proc = NULL;
306 /*
307 * We can't call prnotify() here because we are holding
308 * pidlock. We assert that there is no need to.
309 */
310 mutex_enter(&pcp->prc_mutex);
311 cv_broadcast(&pcp->prc_wait);
312 mutex_exit(&pcp->prc_mutex);
313 ASSERT(!(pcp->prc_flags & PRC_POLL));
314
315 vp = pnp->pr_next;
316 pnp->pr_next = NULL;
317 }
318 }
319
320 /*
321 * Called from a hook in freeproc() when a traced process is removed
322 * from the process table. The proc-table pointers of all associated
323 * /proc vnodes are cleared to indicate that the process has gone away.
324 */
325 void
326 prfree(proc_t *p)
327 {
328 uint_t slot = p->p_slot;
329
330 ASSERT(MUTEX_HELD(&pidlock));
331
332 /*
333 * Block the process against /proc so it can be freed.
334 * It cannot be freed while locked by some controlling process.
335 * Lock ordering:
336 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
337 */
338 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
339 mutex_enter(&p->p_lock);
340 while (p->p_proc_flag & P_PR_LOCK) {
341 mutex_exit(&pr_pidlock);
342 cv_wait(&pr_pid_cv[slot], &p->p_lock);
343 mutex_exit(&p->p_lock);
344 mutex_enter(&pr_pidlock);
345 mutex_enter(&p->p_lock);
346 }
347
348 ASSERT(p->p_tlist == NULL);
349
350 prfreenotify(p->p_plist);
351 p->p_plist = NULL;
352
353 prfreenotify(p->p_trace);
354 p->p_trace = NULL;
355
356 /*
357 * We broadcast to wake up everyone waiting for this process.
358 * No one can reach this process from this point on.
359 */
360 cv_broadcast(&pr_pid_cv[slot]);
361
362 mutex_exit(&p->p_lock);
363 mutex_exit(&pr_pidlock);
364 }
365
366 /*
367 * Called from a hook in exit() when a traced process is becoming a zombie.
368 */
369 void
370 prexit(proc_t *p)
371 {
372 ASSERT(MUTEX_HELD(&p->p_lock));
373
374 if (pr_watch_active(p)) {
375 pr_free_watchpoints(p);
376 watch_disable(curthread);
377 }
378 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
379 if (p->p_trace) {
380 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
381 prnotify(p->p_trace);
382 }
383 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
384 }
385
386 /*
387 * Called when a thread calls lwp_exit().
388 */
389 void
390 prlwpexit(kthread_t *t)
391 {
392 vnode_t *vp;
393 prnode_t *pnp;
394 prcommon_t *pcp;
395 proc_t *p = ttoproc(t);
396 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
397
398 ASSERT(t == curthread);
399 ASSERT(MUTEX_HELD(&p->p_lock));
400
401 /*
402 * The process must be blocked against /proc to do this safely.
403 * The lwp must not disappear while the process is marked P_PR_LOCK.
404 * It is the caller's responsibility to have called prbarrier(p).
405 */
406 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
407
408 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
409 pnp = VTOP(vp);
410 pcp = pnp->pr_common;
411 if (pcp->prc_thread == t) {
412 pcp->prc_thread = NULL;
413 pcp->prc_flags |= PRC_DESTROY;
414 }
415 }
416
417 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
418 pnp = VTOP(vp);
419 pcp = pnp->pr_common;
420 pcp->prc_thread = NULL;
421 pcp->prc_flags |= PRC_DESTROY;
422 prnotify(vp);
423 }
424
425 if (p->p_trace)
426 prnotify(p->p_trace);
427 }
428
429 /*
430 * Called when a zombie thread is joined or when a
431 * detached lwp exits. Called from lwp_hash_out().
432 */
433 void
434 prlwpfree(proc_t *p, lwpent_t *lep)
435 {
436 vnode_t *vp;
437 prnode_t *pnp;
438 prcommon_t *pcp;
439
440 ASSERT(MUTEX_HELD(&p->p_lock));
441
442 /*
443 * The process must be blocked against /proc to do this safely.
444 * The lwp must not disappear while the process is marked P_PR_LOCK.
445 * It is the caller's responsibility to have called prbarrier(p).
446 */
447 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
448
449 vp = lep->le_trace;
450 lep->le_trace = NULL;
451 while (vp) {
452 prnotify(vp);
453 pnp = VTOP(vp);
454 pcp = pnp->pr_common;
455 ASSERT(pcp->prc_thread == NULL &&
456 (pcp->prc_flags & PRC_DESTROY));
457 pcp->prc_tslot = -1;
458 vp = pnp->pr_next;
459 pnp->pr_next = NULL;
460 }
461
462 if (p->p_trace)
463 prnotify(p->p_trace);
464 }
465
466 /*
467 * Called from a hook in exec() when a thread starts exec().
468 */
469 void
470 prexecstart(void)
471 {
472 proc_t *p = ttoproc(curthread);
473 klwp_t *lwp = ttolwp(curthread);
474
475 /*
476 * The P_PR_EXEC flag blocks /proc operations for
477 * the duration of the exec().
478 * We can't start exec() while the process is
479 * locked by /proc, so we call prbarrier().
480 * lwp_nostop keeps the process from being stopped
481 * via job control for the duration of the exec().
482 */
483
484 ASSERT(MUTEX_HELD(&p->p_lock));
485 prbarrier(p);
486 lwp->lwp_nostop++;
487 p->p_proc_flag |= P_PR_EXEC;
488 }
489
490 /*
491 * Called from a hook in exec() when a thread finishes exec().
492 * The thread may or may not have succeeded. Some other thread
493 * may have beat it to the punch.
494 */
495 void
496 prexecend(void)
497 {
498 proc_t *p = ttoproc(curthread);
499 klwp_t *lwp = ttolwp(curthread);
500 vnode_t *vp;
501 prnode_t *pnp;
502 prcommon_t *pcp;
503 model_t model = p->p_model;
504 id_t tid = curthread->t_tid;
505 int tslot = curthread->t_dslot;
506
507 ASSERT(MUTEX_HELD(&p->p_lock));
508
509 lwp->lwp_nostop--;
510 if (p->p_flag & SEXITLWPS) {
511 /*
512 * We are on our way to exiting because some
513 * other thread beat us in the race to exec().
514 * Don't clear the P_PR_EXEC flag in this case.
515 */
516 return;
517 }
518
519 /*
520 * Wake up anyone waiting in /proc for the process to complete exec().
521 */
522 p->p_proc_flag &= ~P_PR_EXEC;
523 if ((vp = p->p_trace) != NULL) {
524 pcp = VTOP(vp)->pr_common;
525 mutex_enter(&pcp->prc_mutex);
526 cv_broadcast(&pcp->prc_wait);
527 mutex_exit(&pcp->prc_mutex);
528 for (; vp != NULL; vp = pnp->pr_next) {
529 pnp = VTOP(vp);
530 pnp->pr_common->prc_datamodel = model;
531 }
532 }
533 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
534 /*
535 * We dealt with the process common above.
536 */
537 ASSERT(p->p_trace != NULL);
538 pcp = VTOP(vp)->pr_common;
539 mutex_enter(&pcp->prc_mutex);
540 cv_broadcast(&pcp->prc_wait);
541 mutex_exit(&pcp->prc_mutex);
542 for (; vp != NULL; vp = pnp->pr_next) {
543 pnp = VTOP(vp);
544 pcp = pnp->pr_common;
545 pcp->prc_datamodel = model;
546 pcp->prc_tid = tid;
547 pcp->prc_tslot = tslot;
548 }
549 }
550 }
551
552 /*
553 * Called from a hook in relvm() just before freeing the address space.
554 * We free all the watched areas now.
555 */
556 void
557 prrelvm(void)
558 {
559 proc_t *p = ttoproc(curthread);
560
561 mutex_enter(&p->p_lock);
562 prbarrier(p); /* block all other /proc operations */
563 if (pr_watch_active(p)) {
564 pr_free_watchpoints(p);
565 watch_disable(curthread);
566 }
567 mutex_exit(&p->p_lock);
568 pr_free_watched_pages(p);
569 }
570
571 /*
572 * Called from hooks in exec-related code when a traced process
573 * attempts to exec(2) a setuid/setgid program or an unreadable
574 * file. Rather than fail the exec we invalidate the associated
575 * /proc vnodes so that subsequent attempts to use them will fail.
576 *
577 * All /proc vnodes, except directory vnodes, are retained on a linked
578 * list (rooted at p_plist in the process structure) until last close.
579 *
580 * A controlling process must re-open the /proc files in order to
581 * regain control.
582 */
583 void
584 prinvalidate(struct user *up)
585 {
586 kthread_t *t = curthread;
587 proc_t *p = ttoproc(t);
588 vnode_t *vp;
589 prnode_t *pnp;
590 int writers = 0;
591
592 mutex_enter(&p->p_lock);
593 prbarrier(p); /* block all other /proc operations */
594
595 /*
596 * At this moment, there can be only one lwp in the process.
597 */
598 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
599
600 /*
601 * Invalidate any currently active /proc vnodes.
602 */
603 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
604 pnp = VTOP(vp);
605 switch (pnp->pr_type) {
606 case PR_PSINFO: /* these files can read by anyone */
607 case PR_LPSINFO:
608 case PR_LWPSINFO:
609 case PR_LWPDIR:
610 case PR_LWPIDDIR:
611 case PR_USAGE:
612 case PR_LUSAGE:
613 case PR_LWPUSAGE:
614 break;
615 default:
616 pnp->pr_flags |= PR_INVAL;
617 break;
618 }
619 }
620 /*
621 * Wake up anyone waiting for the process or lwp.
622 * p->p_trace is guaranteed to be non-NULL if there
623 * are any open /proc files for this process.
624 */
625 if ((vp = p->p_trace) != NULL) {
626 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
627
628 prnotify(vp);
629 /*
630 * Are there any writers?
631 */
632 if ((writers = pcp->prc_writers) != 0) {
633 /*
634 * Clear the exclusive open flag (old /proc interface).
635 * Set prc_selfopens equal to prc_writers so that
636 * the next O_EXCL|O_WRITE open will succeed
637 * even with existing (though invalid) writers.
638 * prclose() must decrement prc_selfopens when
639 * the invalid files are closed.
640 */
641 pcp->prc_flags &= ~PRC_EXCL;
642 ASSERT(pcp->prc_selfopens <= writers);
643 pcp->prc_selfopens = writers;
644 }
645 }
646 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
647 while (vp != NULL) {
648 /*
649 * We should not invalidate the lwpiddir vnodes,
650 * but the necessities of maintaining the old
651 * ioctl()-based version of /proc require it.
652 */
653 pnp = VTOP(vp);
654 pnp->pr_flags |= PR_INVAL;
655 prnotify(vp);
656 vp = pnp->pr_next;
657 }
658
659 /*
660 * If any tracing flags are in effect and any vnodes are open for
661 * writing then set the requested-stop and run-on-last-close flags.
662 * Otherwise, clear all tracing flags.
663 */
664 t->t_proc_flag &= ~TP_PAUSE;
665 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
666 t->t_proc_flag |= TP_PRSTOP;
667 aston(t); /* so ISSIG will see the flag */
668 p->p_proc_flag |= P_PR_RUNLCL;
669 } else {
670 premptyset(&up->u_entrymask); /* syscalls */
671 premptyset(&up->u_exitmask);
672 up->u_systrap = 0;
673 premptyset(&p->p_sigmask); /* signals */
674 premptyset(&p->p_fltmask); /* faults */
675 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
676 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
677 prnostep(ttolwp(t));
678 }
679
680 mutex_exit(&p->p_lock);
681 }
682
683 /*
684 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
685 * Return with pr_pidlock held in all cases.
686 * Return with p_lock held if the the process still exists.
687 * Return value is the process pointer if the process still exists, else NULL.
688 * If we lock the process, give ourself kernel priority to avoid deadlocks;
689 * this is undone in prunlock().
690 */
691 proc_t *
692 pr_p_lock(prnode_t *pnp)
693 {
694 proc_t *p;
695 prcommon_t *pcp;
696
697 mutex_enter(&pr_pidlock);
698 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
699 return (NULL);
700 mutex_enter(&p->p_lock);
701 while (p->p_proc_flag & P_PR_LOCK) {
702 /*
703 * This cv/mutex pair is persistent even if
704 * the process disappears while we sleep.
705 */
706 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
707 kmutex_t *mp = &p->p_lock;
708
709 mutex_exit(&pr_pidlock);
710 cv_wait(cv, mp);
711 mutex_exit(mp);
712 mutex_enter(&pr_pidlock);
713 if (pcp->prc_proc == NULL)
714 return (NULL);
715 ASSERT(p == pcp->prc_proc);
716 mutex_enter(&p->p_lock);
717 }
718 p->p_proc_flag |= P_PR_LOCK;
719 THREAD_KPRI_REQUEST();
720 return (p);
721 }
722
723 /*
724 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
725 * This prevents any lwp of the process from disappearing and
726 * blocks most operations that a process can perform on itself.
727 * Returns 0 on success, a non-zero error number on failure.
728 *
729 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
730 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
731 *
732 * error returns:
733 * ENOENT: process or lwp has disappeared or process is exiting
734 * (or has become a zombie and zdisp == ZNO).
735 * EAGAIN: procfs vnode has become invalid.
736 * EINTR: signal arrived while waiting for exec to complete.
737 */
738 int
739 prlock(prnode_t *pnp, int zdisp)
740 {
741 prcommon_t *pcp;
742 proc_t *p;
743
744 again:
745 pcp = pnp->pr_common;
746 p = pr_p_lock(pnp);
747 mutex_exit(&pr_pidlock);
748
749 /*
750 * Return ENOENT immediately if there is no process.
751 */
752 if (p == NULL)
753 return (ENOENT);
754
755 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
756
757 /*
758 * Return ENOENT if process entered zombie state or is exiting
759 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
760 */
761 if (zdisp == ZNO &&
762 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
763 prunlock(pnp);
764 return (ENOENT);
765 }
766
767 /*
768 * If lwp-specific, check to see if lwp has disappeared.
769 */
770 if (pcp->prc_flags & PRC_LWP) {
771 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
772 pcp->prc_tslot == -1) {
773 prunlock(pnp);
774 return (ENOENT);
775 }
776 }
777
778 /*
779 * Return EAGAIN if we have encountered a security violation.
780 * (The process exec'd a set-id or unreadable executable file.)
781 */
782 if (pnp->pr_flags & PR_INVAL) {
783 prunlock(pnp);
784 return (EAGAIN);
785 }
786
787 /*
788 * If process is undergoing an exec(), wait for
789 * completion and then start all over again.
790 */
791 if (p->p_proc_flag & P_PR_EXEC) {
792 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
793 mutex_enter(&pcp->prc_mutex);
794 prunlock(pnp);
795 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
796 mutex_exit(&pcp->prc_mutex);
797 return (EINTR);
798 }
799 mutex_exit(&pcp->prc_mutex);
800 goto again;
801 }
802
803 /*
804 * We return holding p->p_lock.
805 */
806 return (0);
807 }
808
809 /*
810 * Undo prlock() and pr_p_lock().
811 * p->p_lock is still held; pr_pidlock is no longer held.
812 *
813 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
814 * if any, waiting for the flag to be dropped; it retains p->p_lock.
815 *
816 * prunlock() calls prunmark() and then drops p->p_lock.
817 */
818 void
819 prunmark(proc_t *p)
820 {
821 ASSERT(p->p_proc_flag & P_PR_LOCK);
822 ASSERT(MUTEX_HELD(&p->p_lock));
823
824 cv_signal(&pr_pid_cv[p->p_slot]);
825 p->p_proc_flag &= ~P_PR_LOCK;
826 THREAD_KPRI_RELEASE();
827 }
828
829 void
830 prunlock(prnode_t *pnp)
831 {
832 prcommon_t *pcp = pnp->pr_common;
833 proc_t *p = pcp->prc_proc;
834
835 /*
836 * If we (or someone) gave it a SIGKILL, and it is not
837 * already a zombie, set it running unconditionally.
838 */
839 if ((p->p_flag & SKILLED) &&
840 !(p->p_flag & SEXITING) &&
841 !(pcp->prc_flags & PRC_DESTROY) &&
842 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
843 (void) pr_setrun(pnp, 0);
844 prunmark(p);
845 mutex_exit(&p->p_lock);
846 }
847
848 /*
849 * Called while holding p->p_lock to delay until the process is unlocked.
850 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
851 * The process cannot become locked again until p->p_lock is dropped.
852 */
853 void
854 prbarrier(proc_t *p)
855 {
856 ASSERT(MUTEX_HELD(&p->p_lock));
857
858 if (p->p_proc_flag & P_PR_LOCK) {
859 /* The process is locked; delay until not locked */
860 uint_t slot = p->p_slot;
861
862 while (p->p_proc_flag & P_PR_LOCK)
863 cv_wait(&pr_pid_cv[slot], &p->p_lock);
864 cv_signal(&pr_pid_cv[slot]);
865 }
866 }
867
868 /*
869 * Return process/lwp status.
870 * The u-block is mapped in by this routine and unmapped at the end.
871 */
872 void
873 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
874 {
875 kthread_t *t;
876
877 ASSERT(MUTEX_HELD(&p->p_lock));
878
879 t = prchoose(p); /* returns locked thread */
880 ASSERT(t != NULL);
881 thread_unlock(t);
882
883 /* just bzero the process part, prgetlwpstatus() does the rest */
884 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
885 sp->pr_nlwp = p->p_lwpcnt;
886 sp->pr_nzomb = p->p_zombcnt;
887 prassignset(&sp->pr_sigpend, &p->p_sig);
888 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
889 sp->pr_brksize = p->p_brksize;
890 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
891 sp->pr_stksize = p->p_stksize;
892 sp->pr_pid = p->p_pid;
893 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
894 (p->p_flag & SZONETOP)) {
895 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
896 /*
897 * Inside local zones, fake zsched's pid as parent pids for
898 * processes which reference processes outside of the zone.
899 */
900 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
901 } else {
902 sp->pr_ppid = p->p_ppid;
903 }
904 sp->pr_pgid = p->p_pgrp;
905 sp->pr_sid = p->p_sessp->s_sid;
906 sp->pr_taskid = p->p_task->tk_tkid;
907 sp->pr_projid = p->p_task->tk_proj->kpj_id;
908 sp->pr_zoneid = p->p_zone->zone_id;
909 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
910 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
911 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
912 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
913 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
914 prassignset(&sp->pr_flttrace, &p->p_fltmask);
915 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
916 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
917 switch (p->p_model) {
918 case DATAMODEL_ILP32:
919 sp->pr_dmodel = PR_MODEL_ILP32;
920 break;
921 case DATAMODEL_LP64:
922 sp->pr_dmodel = PR_MODEL_LP64;
923 break;
924 }
925 if (p->p_agenttp)
926 sp->pr_agentid = p->p_agenttp->t_tid;
927
928 /* get the chosen lwp's status */
929 prgetlwpstatus(t, &sp->pr_lwp, zp);
930
931 /* replicate the flags */
932 sp->pr_flags = sp->pr_lwp.pr_flags;
933 }
934
935 #ifdef _SYSCALL32_IMPL
936 void
937 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
938 {
939 proc_t *p = ttoproc(t);
940 klwp_t *lwp = ttolwp(t);
941 struct mstate *ms = &lwp->lwp_mstate;
942 hrtime_t usr, sys;
943 int flags;
944 ulong_t instr;
945
946 ASSERT(MUTEX_HELD(&p->p_lock));
947
948 bzero(sp, sizeof (*sp));
949 flags = 0L;
950 if (t->t_state == TS_STOPPED) {
951 flags |= PR_STOPPED;
952 if ((t->t_schedflag & TS_PSTART) == 0)
953 flags |= PR_ISTOP;
954 } else if (VSTOPPED(t)) {
955 flags |= PR_STOPPED|PR_ISTOP;
956 }
957 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
958 flags |= PR_DSTOP;
959 if (lwp->lwp_asleep)
960 flags |= PR_ASLEEP;
961 if (t == p->p_agenttp)
962 flags |= PR_AGENT;
963 if (!(t->t_proc_flag & TP_TWAIT))
964 flags |= PR_DETACH;
965 if (t->t_proc_flag & TP_DAEMON)
966 flags |= PR_DAEMON;
967 if (p->p_proc_flag & P_PR_FORK)
968 flags |= PR_FORK;
969 if (p->p_proc_flag & P_PR_RUNLCL)
970 flags |= PR_RLC;
971 if (p->p_proc_flag & P_PR_KILLCL)
972 flags |= PR_KLC;
973 if (p->p_proc_flag & P_PR_ASYNC)
974 flags |= PR_ASYNC;
975 if (p->p_proc_flag & P_PR_BPTADJ)
976 flags |= PR_BPTADJ;
977 if (p->p_proc_flag & P_PR_PTRACE)
978 flags |= PR_PTRACE;
979 if (p->p_flag & SMSACCT)
980 flags |= PR_MSACCT;
981 if (p->p_flag & SMSFORK)
982 flags |= PR_MSFORK;
983 if (p->p_flag & SVFWAIT)
984 flags |= PR_VFORKP;
985 sp->pr_flags = flags;
986 if (VSTOPPED(t)) {
987 sp->pr_why = PR_REQUESTED;
988 sp->pr_what = 0;
989 } else {
990 sp->pr_why = t->t_whystop;
991 sp->pr_what = t->t_whatstop;
992 }
993 sp->pr_lwpid = t->t_tid;
994 sp->pr_cursig = lwp->lwp_cursig;
995 prassignset(&sp->pr_lwppend, &t->t_sig);
996 schedctl_finish_sigblock(t);
997 prassignset(&sp->pr_lwphold, &t->t_hold);
998 if (t->t_whystop == PR_FAULTED) {
999 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1000 if (t->t_whatstop == FLTPAGE)
1001 sp->pr_info.si_addr =
1002 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1003 } else if (lwp->lwp_curinfo)
1004 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1005 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1006 sp->pr_info.si_zoneid != zp->zone_id) {
1007 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1008 sp->pr_info.si_uid = 0;
1009 sp->pr_info.si_ctid = -1;
1010 sp->pr_info.si_zoneid = zp->zone_id;
1011 }
1012 sp->pr_altstack.ss_sp =
1013 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1014 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1015 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1016 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1017 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1018 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1019 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1020 sizeof (sp->pr_clname) - 1);
1021 if (flags & PR_STOPPED)
1022 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1023 usr = ms->ms_acct[LMS_USER];
1024 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1025 scalehrtime(&usr);
1026 scalehrtime(&sys);
1027 hrt2ts32(usr, &sp->pr_utime);
1028 hrt2ts32(sys, &sp->pr_stime);
1029
1030 /*
1031 * Fetch the current instruction, if not a system process.
1032 * We don't attempt this unless the lwp is stopped.
1033 */
1034 if ((p->p_flag & SSYS) || p->p_as == &kas)
1035 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1036 else if (!(flags & PR_STOPPED))
1037 sp->pr_flags |= PR_PCINVAL;
1038 else if (!prfetchinstr(lwp, &instr))
1039 sp->pr_flags |= PR_PCINVAL;
1040 else
1041 sp->pr_instr = (uint32_t)instr;
1042
1043 /*
1044 * Drop p_lock while touching the lwp's stack.
1045 */
1046 mutex_exit(&p->p_lock);
1047 if (prisstep(lwp))
1048 sp->pr_flags |= PR_STEP;
1049 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1050 int i;
1051
1052 sp->pr_syscall = get_syscall32_args(lwp,
1053 (int *)sp->pr_sysarg, &i);
1054 sp->pr_nsysarg = (ushort_t)i;
1055 }
1056 if ((flags & PR_STOPPED) || t == curthread)
1057 prgetprregs32(lwp, sp->pr_reg);
1058 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1059 (flags & PR_VFORKP)) {
1060 long r1, r2;
1061 user_t *up;
1062 auxv_t *auxp;
1063 int i;
1064
1065 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1066 if (sp->pr_errno == 0) {
1067 sp->pr_rval1 = (int32_t)r1;
1068 sp->pr_rval2 = (int32_t)r2;
1069 sp->pr_errpriv = PRIV_NONE;
1070 } else
1071 sp->pr_errpriv = lwp->lwp_badpriv;
1072
1073 if (t->t_sysnum == SYS_execve) {
1074 up = PTOU(p);
1075 sp->pr_sysarg[0] = 0;
1076 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1077 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1078 for (i = 0, auxp = up->u_auxv;
1079 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1080 i++, auxp++) {
1081 if (auxp->a_type == AT_SUN_EXECNAME) {
1082 sp->pr_sysarg[0] =
1083 (caddr32_t)
1084 (uintptr_t)auxp->a_un.a_ptr;
1085 break;
1086 }
1087 }
1088 }
1089 }
1090 if (prhasfp())
1091 prgetprfpregs32(lwp, &sp->pr_fpreg);
1092 mutex_enter(&p->p_lock);
1093 }
1094
1095 void
1096 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1097 {
1098 kthread_t *t;
1099
1100 ASSERT(MUTEX_HELD(&p->p_lock));
1101
1102 t = prchoose(p); /* returns locked thread */
1103 ASSERT(t != NULL);
1104 thread_unlock(t);
1105
1106 /* just bzero the process part, prgetlwpstatus32() does the rest */
1107 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1108 sp->pr_nlwp = p->p_lwpcnt;
1109 sp->pr_nzomb = p->p_zombcnt;
1110 prassignset(&sp->pr_sigpend, &p->p_sig);
1111 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1112 sp->pr_brksize = (uint32_t)p->p_brksize;
1113 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1114 sp->pr_stksize = (uint32_t)p->p_stksize;
1115 sp->pr_pid = p->p_pid;
1116 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1117 (p->p_flag & SZONETOP)) {
1118 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1119 /*
1120 * Inside local zones, fake zsched's pid as parent pids for
1121 * processes which reference processes outside of the zone.
1122 */
1123 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1124 } else {
1125 sp->pr_ppid = p->p_ppid;
1126 }
1127 sp->pr_pgid = p->p_pgrp;
1128 sp->pr_sid = p->p_sessp->s_sid;
1129 sp->pr_taskid = p->p_task->tk_tkid;
1130 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1131 sp->pr_zoneid = p->p_zone->zone_id;
1132 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1133 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1134 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1135 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1136 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1137 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1138 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1139 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1140 switch (p->p_model) {
1141 case DATAMODEL_ILP32:
1142 sp->pr_dmodel = PR_MODEL_ILP32;
1143 break;
1144 case DATAMODEL_LP64:
1145 sp->pr_dmodel = PR_MODEL_LP64;
1146 break;
1147 }
1148 if (p->p_agenttp)
1149 sp->pr_agentid = p->p_agenttp->t_tid;
1150
1151 /* get the chosen lwp's status */
1152 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1153
1154 /* replicate the flags */
1155 sp->pr_flags = sp->pr_lwp.pr_flags;
1156 }
1157 #endif /* _SYSCALL32_IMPL */
1158
1159 /*
1160 * Return lwp status.
1161 */
1162 void
1163 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1164 {
1165 proc_t *p = ttoproc(t);
1166 klwp_t *lwp = ttolwp(t);
1167 struct mstate *ms = &lwp->lwp_mstate;
1168 hrtime_t usr, sys;
1169 int flags;
1170 ulong_t instr;
1171
1172 ASSERT(MUTEX_HELD(&p->p_lock));
1173
1174 bzero(sp, sizeof (*sp));
1175 flags = 0L;
1176 if (t->t_state == TS_STOPPED) {
1177 flags |= PR_STOPPED;
1178 if ((t->t_schedflag & TS_PSTART) == 0)
1179 flags |= PR_ISTOP;
1180 } else if (VSTOPPED(t)) {
1181 flags |= PR_STOPPED|PR_ISTOP;
1182 }
1183 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1184 flags |= PR_DSTOP;
1185 if (lwp->lwp_asleep)
1186 flags |= PR_ASLEEP;
1187 if (t == p->p_agenttp)
1188 flags |= PR_AGENT;
1189 if (!(t->t_proc_flag & TP_TWAIT))
1190 flags |= PR_DETACH;
1191 if (t->t_proc_flag & TP_DAEMON)
1192 flags |= PR_DAEMON;
1193 if (p->p_proc_flag & P_PR_FORK)
1194 flags |= PR_FORK;
1195 if (p->p_proc_flag & P_PR_RUNLCL)
1196 flags |= PR_RLC;
1197 if (p->p_proc_flag & P_PR_KILLCL)
1198 flags |= PR_KLC;
1199 if (p->p_proc_flag & P_PR_ASYNC)
1200 flags |= PR_ASYNC;
1201 if (p->p_proc_flag & P_PR_BPTADJ)
1202 flags |= PR_BPTADJ;
1203 if (p->p_proc_flag & P_PR_PTRACE)
1204 flags |= PR_PTRACE;
1205 if (p->p_flag & SMSACCT)
1206 flags |= PR_MSACCT;
1207 if (p->p_flag & SMSFORK)
1208 flags |= PR_MSFORK;
1209 if (p->p_flag & SVFWAIT)
1210 flags |= PR_VFORKP;
1211 if (p->p_pgidp->pid_pgorphaned)
1212 flags |= PR_ORPHAN;
1213 if (p->p_pidflag & CLDNOSIGCHLD)
1214 flags |= PR_NOSIGCHLD;
1215 if (p->p_pidflag & CLDWAITPID)
1216 flags |= PR_WAITPID;
1217 sp->pr_flags = flags;
1218 if (VSTOPPED(t)) {
1219 sp->pr_why = PR_REQUESTED;
1220 sp->pr_what = 0;
1221 } else {
1222 sp->pr_why = t->t_whystop;
1223 sp->pr_what = t->t_whatstop;
1224 }
1225 sp->pr_lwpid = t->t_tid;
1226 sp->pr_cursig = lwp->lwp_cursig;
1227 prassignset(&sp->pr_lwppend, &t->t_sig);
1228 schedctl_finish_sigblock(t);
1229 prassignset(&sp->pr_lwphold, &t->t_hold);
1230 if (t->t_whystop == PR_FAULTED)
1231 bcopy(&lwp->lwp_siginfo,
1232 &sp->pr_info, sizeof (k_siginfo_t));
1233 else if (lwp->lwp_curinfo)
1234 bcopy(&lwp->lwp_curinfo->sq_info,
1235 &sp->pr_info, sizeof (k_siginfo_t));
1236 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1237 sp->pr_info.si_zoneid != zp->zone_id) {
1238 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1239 sp->pr_info.si_uid = 0;
1240 sp->pr_info.si_ctid = -1;
1241 sp->pr_info.si_zoneid = zp->zone_id;
1242 }
1243 sp->pr_altstack = lwp->lwp_sigaltstack;
1244 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1245 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1246 sp->pr_ustack = lwp->lwp_ustack;
1247 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1248 sizeof (sp->pr_clname) - 1);
1249 if (flags & PR_STOPPED)
1250 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1251 usr = ms->ms_acct[LMS_USER];
1252 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1253 scalehrtime(&usr);
1254 scalehrtime(&sys);
1255 hrt2ts(usr, &sp->pr_utime);
1256 hrt2ts(sys, &sp->pr_stime);
1257
1258 /*
1259 * Fetch the current instruction, if not a system process.
1260 * We don't attempt this unless the lwp is stopped.
1261 */
1262 if ((p->p_flag & SSYS) || p->p_as == &kas)
1263 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1264 else if (!(flags & PR_STOPPED))
1265 sp->pr_flags |= PR_PCINVAL;
1266 else if (!prfetchinstr(lwp, &instr))
1267 sp->pr_flags |= PR_PCINVAL;
1268 else
1269 sp->pr_instr = instr;
1270
1271 /*
1272 * Drop p_lock while touching the lwp's stack.
1273 */
1274 mutex_exit(&p->p_lock);
1275 if (prisstep(lwp))
1276 sp->pr_flags |= PR_STEP;
1277 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1278 int i;
1279
1280 sp->pr_syscall = get_syscall_args(lwp,
1281 (long *)sp->pr_sysarg, &i);
1282 sp->pr_nsysarg = (ushort_t)i;
1283 }
1284 if ((flags & PR_STOPPED) || t == curthread)
1285 prgetprregs(lwp, sp->pr_reg);
1286 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1287 (flags & PR_VFORKP)) {
1288 user_t *up;
1289 auxv_t *auxp;
1290 int i;
1291
1292 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1293 if (sp->pr_errno == 0)
1294 sp->pr_errpriv = PRIV_NONE;
1295 else
1296 sp->pr_errpriv = lwp->lwp_badpriv;
1297
1298 if (t->t_sysnum == SYS_execve) {
1299 up = PTOU(p);
1300 sp->pr_sysarg[0] = 0;
1301 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1302 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1303 for (i = 0, auxp = up->u_auxv;
1304 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1305 i++, auxp++) {
1306 if (auxp->a_type == AT_SUN_EXECNAME) {
1307 sp->pr_sysarg[0] =
1308 (uintptr_t)auxp->a_un.a_ptr;
1309 break;
1310 }
1311 }
1312 }
1313 }
1314 if (prhasfp())
1315 prgetprfpregs(lwp, &sp->pr_fpreg);
1316 mutex_enter(&p->p_lock);
1317 }
1318
1319 /*
1320 * Get the sigaction structure for the specified signal. The u-block
1321 * must already have been mapped in by the caller.
1322 */
1323 void
1324 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1325 {
1326 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1327
1328 bzero(sp, sizeof (*sp));
1329
1330 if (sig != 0 && (unsigned)sig < nsig) {
1331 sp->sa_handler = up->u_signal[sig-1];
1332 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1333 if (sigismember(&up->u_sigonstack, sig))
1334 sp->sa_flags |= SA_ONSTACK;
1335 if (sigismember(&up->u_sigresethand, sig))
1336 sp->sa_flags |= SA_RESETHAND;
1337 if (sigismember(&up->u_sigrestart, sig))
1338 sp->sa_flags |= SA_RESTART;
1339 if (sigismember(&p->p_siginfo, sig))
1340 sp->sa_flags |= SA_SIGINFO;
1341 if (sigismember(&up->u_signodefer, sig))
1342 sp->sa_flags |= SA_NODEFER;
1343 if (sig == SIGCLD) {
1344 if (p->p_flag & SNOWAIT)
1345 sp->sa_flags |= SA_NOCLDWAIT;
1346 if ((p->p_flag & SJCTL) == 0)
1347 sp->sa_flags |= SA_NOCLDSTOP;
1348 }
1349 }
1350 }
1351
1352 #ifdef _SYSCALL32_IMPL
1353 void
1354 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1355 {
1356 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1357
1358 bzero(sp, sizeof (*sp));
1359
1360 if (sig != 0 && (unsigned)sig < nsig) {
1361 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1362 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1363 if (sigismember(&up->u_sigonstack, sig))
1364 sp->sa_flags |= SA_ONSTACK;
1365 if (sigismember(&up->u_sigresethand, sig))
1366 sp->sa_flags |= SA_RESETHAND;
1367 if (sigismember(&up->u_sigrestart, sig))
1368 sp->sa_flags |= SA_RESTART;
1369 if (sigismember(&p->p_siginfo, sig))
1370 sp->sa_flags |= SA_SIGINFO;
1371 if (sigismember(&up->u_signodefer, sig))
1372 sp->sa_flags |= SA_NODEFER;
1373 if (sig == SIGCLD) {
1374 if (p->p_flag & SNOWAIT)
1375 sp->sa_flags |= SA_NOCLDWAIT;
1376 if ((p->p_flag & SJCTL) == 0)
1377 sp->sa_flags |= SA_NOCLDSTOP;
1378 }
1379 }
1380 }
1381 #endif /* _SYSCALL32_IMPL */
1382
1383 /*
1384 * Count the number of segments in this process's address space.
1385 */
1386 int
1387 prnsegs(struct as *as, int reserved)
1388 {
1389 int n = 0;
1390 struct seg *seg;
1391
1392 ASSERT(as != &kas && AS_WRITE_HELD(as));
1393
1394 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1395 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1396 caddr_t saddr, naddr;
1397 void *tmp = NULL;
1398
1399 if ((seg->s_flags & S_HOLE) != 0) {
1400 continue;
1401 }
1402
1403 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1404 (void) pr_getprot(seg, reserved, &tmp,
1405 &saddr, &naddr, eaddr);
1406 if (saddr != naddr)
1407 n++;
1408 }
1409
1410 ASSERT(tmp == NULL);
1411 }
1412
1413 return (n);
1414 }
1415
1416 /*
1417 * Convert uint32_t to decimal string w/o leading zeros.
1418 * Add trailing null characters if 'len' is greater than string length.
1419 * Return the string length.
1420 */
1421 int
1422 pr_u32tos(uint32_t n, char *s, int len)
1423 {
1424 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1425 char *cp = cbuf;
1426 char *end = s + len;
1427
1428 do {
1429 *cp++ = (char)(n % 10 + '0');
1430 n /= 10;
1431 } while (n);
1432
1433 len = (int)(cp - cbuf);
1434
1435 do {
1436 *s++ = *--cp;
1437 } while (cp > cbuf);
1438
1439 while (s < end) /* optional pad */
1440 *s++ = '\0';
1441
1442 return (len);
1443 }
1444
1445 /*
1446 * Convert uint64_t to decimal string w/o leading zeros.
1447 * Return the string length.
1448 */
1449 static int
1450 pr_u64tos(uint64_t n, char *s)
1451 {
1452 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1453 char *cp = cbuf;
1454 int len;
1455
1456 do {
1457 *cp++ = (char)(n % 10 + '0');
1458 n /= 10;
1459 } while (n);
1460
1461 len = (int)(cp - cbuf);
1462
1463 do {
1464 *s++ = *--cp;
1465 } while (cp > cbuf);
1466
1467 return (len);
1468 }
1469
1470 file_t *
1471 pr_getf(proc_t *p, uint_t fd, short *flag)
1472 {
1473 uf_entry_t *ufp;
1474 uf_info_t *fip;
1475 file_t *fp;
1476
1477 ASSERT(MUTEX_HELD(&p->p_lock));
1478
1479 fip = P_FINFO(p);
1480
1481 if (fd >= fip->fi_nfiles)
1482 return (NULL);
1483
1484 mutex_exit(&p->p_lock);
1485 mutex_enter(&fip->fi_lock);
1486 UF_ENTER(ufp, fip, fd);
1487 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1488 if (flag != NULL)
1489 *flag = ufp->uf_flag;
1490 ufp->uf_refcnt++;
1491 } else {
1492 fp = NULL;
1493 }
1494 UF_EXIT(ufp);
1495 mutex_exit(&fip->fi_lock);
1496 mutex_enter(&p->p_lock);
1497
1498 return (fp);
1499 }
1500
1501 void
1502 pr_releasef(proc_t *p, uint_t fd)
1503 {
1504 uf_entry_t *ufp;
1505 uf_info_t *fip;
1506
1507 ASSERT(MUTEX_HELD(&p->p_lock));
1508
1509 fip = P_FINFO(p);
1510
1511 mutex_exit(&p->p_lock);
1512 mutex_enter(&fip->fi_lock);
1513 UF_ENTER(ufp, fip, fd);
1514 ASSERT(ufp->uf_refcnt > 0);
1515 ufp->uf_refcnt--;
1516 UF_EXIT(ufp);
1517 mutex_exit(&fip->fi_lock);
1518 mutex_enter(&p->p_lock);
1519 }
1520
1521 void
1522 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1523 {
1524 char *s = name;
1525 struct vfs *vfsp;
1526 struct vfssw *vfsswp;
1527
1528 if ((vfsp = vp->v_vfsp) != NULL &&
1529 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1530 *vfsswp->vsw_name) {
1531 (void) strcpy(s, vfsswp->vsw_name);
1532 s += strlen(s);
1533 *s++ = '.';
1534 }
1535 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1536 *s++ = '.';
1537 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1538 *s++ = '.';
1539 s += pr_u64tos(vattr->va_nodeid, s);
1540 *s++ = '\0';
1541 }
1542
1543 struct seg *
1544 break_seg(proc_t *p)
1545 {
1546 caddr_t addr = p->p_brkbase;
1547 struct seg *seg;
1548 struct vnode *vp;
1549
1550 if (p->p_brksize != 0)
1551 addr += p->p_brksize - 1;
1552 seg = as_segat(p->p_as, addr);
1553 if (seg != NULL && seg->s_ops == &segvn_ops &&
1554 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1555 return (seg);
1556 return (NULL);
1557 }
1558
1559 /*
1560 * Implementation of service functions to handle procfs generic chained
1561 * copyout buffers.
1562 */
1563 typedef struct pr_iobuf_list {
1564 list_node_t piol_link; /* buffer linkage */
1565 size_t piol_size; /* total size (header + data) */
1566 size_t piol_usedsize; /* amount to copy out from this buf */
1567 } piol_t;
1568
1569 #define MAPSIZE (64 * 1024)
1570 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1571
1572 void
1573 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1574 {
1575 piol_t *iol;
1576 size_t initial_size = MIN(1, n) * itemsize;
1577
1578 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1579
1580 ASSERT(list_head(iolhead) == NULL);
1581 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1582 ASSERT(initial_size > 0);
1583
1584 /*
1585 * Someone creating chained copyout buffers may ask for less than
1586 * MAPSIZE if the amount of data to be buffered is known to be
1587 * smaller than that.
1588 * But in order to prevent involuntary self-denial of service,
1589 * the requested input size is clamped at MAPSIZE.
1590 */
1591 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1592 iol = kmem_alloc(initial_size, KM_SLEEP);
1593 list_insert_head(iolhead, iol);
1594 iol->piol_usedsize = 0;
1595 iol->piol_size = initial_size;
1596 }
1597
1598 void *
1599 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1600 {
1601 piol_t *iol;
1602 char *new;
1603
1604 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1605 ASSERT(list_head(iolhead) != NULL);
1606
1607 iol = (piol_t *)list_tail(iolhead);
1608
1609 if (iol->piol_size <
1610 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1611 /*
1612 * Out of space in the current buffer. Allocate more.
1613 */
1614 piol_t *newiol;
1615
1616 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1617 newiol->piol_size = MAPSIZE;
1618 newiol->piol_usedsize = 0;
1619
1620 list_insert_after(iolhead, iol, newiol);
1621 iol = list_next(iolhead, iol);
1622 ASSERT(iol == newiol);
1623 }
1624 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1625 iol->piol_usedsize += itemsize;
1626 bzero(new, itemsize);
1627 return (new);
1628 }
1629
1630 void
1631 pr_iol_freelist(list_t *iolhead)
1632 {
1633 piol_t *iol;
1634
1635 while ((iol = list_head(iolhead)) != NULL) {
1636 list_remove(iolhead, iol);
1637 kmem_free(iol, iol->piol_size);
1638 }
1639 list_destroy(iolhead);
1640 }
1641
1642 int
1643 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1644 {
1645 int error = errin;
1646 piol_t *iol;
1647
1648 while ((iol = list_head(iolhead)) != NULL) {
1649 list_remove(iolhead, iol);
1650 if (!error) {
1651 if (copyout(PIOL_DATABUF(iol), *tgt,
1652 iol->piol_usedsize))
1653 error = EFAULT;
1654 *tgt += iol->piol_usedsize;
1655 }
1656 kmem_free(iol, iol->piol_size);
1657 }
1658 list_destroy(iolhead);
1659
1660 return (error);
1661 }
1662
1663 int
1664 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1665 {
1666 offset_t off = uiop->uio_offset;
1667 char *base;
1668 size_t size;
1669 piol_t *iol;
1670 int error = errin;
1671
1672 while ((iol = list_head(iolhead)) != NULL) {
1673 list_remove(iolhead, iol);
1674 base = PIOL_DATABUF(iol);
1675 size = iol->piol_usedsize;
1676 if (off <= size && error == 0 && uiop->uio_resid > 0)
1677 error = uiomove(base + off, size - off,
1678 UIO_READ, uiop);
1679 off = MAX(0, off - (offset_t)size);
1680 kmem_free(iol, iol->piol_size);
1681 }
1682 list_destroy(iolhead);
1683
1684 return (error);
1685 }
1686
1687 /*
1688 * Return an array of structures with memory map information.
1689 * We allocate here; the caller must deallocate.
1690 */
1691 int
1692 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1693 {
1694 struct as *as = p->p_as;
1695 prmap_t *mp;
1696 struct seg *seg;
1697 struct seg *brkseg, *stkseg;
1698 struct vnode *vp;
1699 struct vattr vattr;
1700 uint_t prot;
1701
1702 ASSERT(as != &kas && AS_WRITE_HELD(as));
1703
1704 /*
1705 * Request an initial buffer size that doesn't waste memory
1706 * if the address space has only a small number of segments.
1707 */
1708 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1709
1710 if ((seg = AS_SEGFIRST(as)) == NULL)
1711 return (0);
1712
1713 brkseg = break_seg(p);
1714 stkseg = as_segat(as, prgetstackbase(p));
1715
1716 do {
1717 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1718 caddr_t saddr, naddr;
1719 void *tmp = NULL;
1720
1721 if ((seg->s_flags & S_HOLE) != 0) {
1722 continue;
1723 }
1724
1725 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1726 prot = pr_getprot(seg, reserved, &tmp,
1727 &saddr, &naddr, eaddr);
1728 if (saddr == naddr)
1729 continue;
1730
1731 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1732
1733 mp->pr_vaddr = (uintptr_t)saddr;
1734 mp->pr_size = naddr - saddr;
1735 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1736 mp->pr_mflags = 0;
1737 if (prot & PROT_READ)
1738 mp->pr_mflags |= MA_READ;
1739 if (prot & PROT_WRITE)
1740 mp->pr_mflags |= MA_WRITE;
1741 if (prot & PROT_EXEC)
1742 mp->pr_mflags |= MA_EXEC;
1743 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1744 mp->pr_mflags |= MA_SHARED;
1745 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1746 mp->pr_mflags |= MA_NORESERVE;
1747 if (seg->s_ops == &segspt_shmops ||
1748 (seg->s_ops == &segvn_ops &&
1749 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1750 mp->pr_mflags |= MA_ANON;
1751 if (seg == brkseg)
1752 mp->pr_mflags |= MA_BREAK;
1753 else if (seg == stkseg) {
1754 mp->pr_mflags |= MA_STACK;
1755 if (reserved) {
1756 size_t maxstack =
1757 ((size_t)p->p_stk_ctl +
1758 PAGEOFFSET) & PAGEMASK;
1759 mp->pr_vaddr =
1760 (uintptr_t)prgetstackbase(p) +
1761 p->p_stksize - maxstack;
1762 mp->pr_size = (uintptr_t)naddr -
1763 mp->pr_vaddr;
1764 }
1765 }
1766 if (seg->s_ops == &segspt_shmops)
1767 mp->pr_mflags |= MA_ISM | MA_SHM;
1768 mp->pr_pagesize = PAGESIZE;
1769
1770 /*
1771 * Manufacture a filename for the "object" directory.
1772 */
1773 vattr.va_mask = AT_FSID|AT_NODEID;
1774 if (seg->s_ops == &segvn_ops &&
1775 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1776 vp != NULL && vp->v_type == VREG &&
1777 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1778 if (vp == p->p_exec)
1779 (void) strcpy(mp->pr_mapname, "a.out");
1780 else
1781 pr_object_name(mp->pr_mapname,
1782 vp, &vattr);
1783 }
1784
1785 /*
1786 * Get the SysV shared memory id, if any.
1787 */
1788 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1789 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1790 SHMID_NONE) {
1791 if (mp->pr_shmid == SHMID_FREE)
1792 mp->pr_shmid = -1;
1793
1794 mp->pr_mflags |= MA_SHM;
1795 } else {
1796 mp->pr_shmid = -1;
1797 }
1798 }
1799 ASSERT(tmp == NULL);
1800 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1801
1802 return (0);
1803 }
1804
1805 #ifdef _SYSCALL32_IMPL
1806 int
1807 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1808 {
1809 struct as *as = p->p_as;
1810 prmap32_t *mp;
1811 struct seg *seg;
1812 struct seg *brkseg, *stkseg;
1813 struct vnode *vp;
1814 struct vattr vattr;
1815 uint_t prot;
1816
1817 ASSERT(as != &kas && AS_WRITE_HELD(as));
1818
1819 /*
1820 * Request an initial buffer size that doesn't waste memory
1821 * if the address space has only a small number of segments.
1822 */
1823 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1824
1825 if ((seg = AS_SEGFIRST(as)) == NULL)
1826 return (0);
1827
1828 brkseg = break_seg(p);
1829 stkseg = as_segat(as, prgetstackbase(p));
1830
1831 do {
1832 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1833 caddr_t saddr, naddr;
1834 void *tmp = NULL;
1835
1836 if ((seg->s_flags & S_HOLE) != 0) {
1837 continue;
1838 }
1839
1840 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1841 prot = pr_getprot(seg, reserved, &tmp,
1842 &saddr, &naddr, eaddr);
1843 if (saddr == naddr)
1844 continue;
1845
1846 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1847
1848 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1849 mp->pr_size = (size32_t)(naddr - saddr);
1850 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1851 mp->pr_mflags = 0;
1852 if (prot & PROT_READ)
1853 mp->pr_mflags |= MA_READ;
1854 if (prot & PROT_WRITE)
1855 mp->pr_mflags |= MA_WRITE;
1856 if (prot & PROT_EXEC)
1857 mp->pr_mflags |= MA_EXEC;
1858 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1859 mp->pr_mflags |= MA_SHARED;
1860 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1861 mp->pr_mflags |= MA_NORESERVE;
1862 if (seg->s_ops == &segspt_shmops ||
1863 (seg->s_ops == &segvn_ops &&
1864 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1865 mp->pr_mflags |= MA_ANON;
1866 if (seg == brkseg)
1867 mp->pr_mflags |= MA_BREAK;
1868 else if (seg == stkseg) {
1869 mp->pr_mflags |= MA_STACK;
1870 if (reserved) {
1871 size_t maxstack =
1872 ((size_t)p->p_stk_ctl +
1873 PAGEOFFSET) & PAGEMASK;
1874 uintptr_t vaddr =
1875 (uintptr_t)prgetstackbase(p) +
1876 p->p_stksize - maxstack;
1877 mp->pr_vaddr = (caddr32_t)vaddr;
1878 mp->pr_size = (size32_t)
1879 ((uintptr_t)naddr - vaddr);
1880 }
1881 }
1882 if (seg->s_ops == &segspt_shmops)
1883 mp->pr_mflags |= MA_ISM | MA_SHM;
1884 mp->pr_pagesize = PAGESIZE;
1885
1886 /*
1887 * Manufacture a filename for the "object" directory.
1888 */
1889 vattr.va_mask = AT_FSID|AT_NODEID;
1890 if (seg->s_ops == &segvn_ops &&
1891 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1892 vp != NULL && vp->v_type == VREG &&
1893 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1894 if (vp == p->p_exec)
1895 (void) strcpy(mp->pr_mapname, "a.out");
1896 else
1897 pr_object_name(mp->pr_mapname,
1898 vp, &vattr);
1899 }
1900
1901 /*
1902 * Get the SysV shared memory id, if any.
1903 */
1904 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1905 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1906 SHMID_NONE) {
1907 if (mp->pr_shmid == SHMID_FREE)
1908 mp->pr_shmid = -1;
1909
1910 mp->pr_mflags |= MA_SHM;
1911 } else {
1912 mp->pr_shmid = -1;
1913 }
1914 }
1915 ASSERT(tmp == NULL);
1916 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1917
1918 return (0);
1919 }
1920 #endif /* _SYSCALL32_IMPL */
1921
1922 /*
1923 * Return the size of the /proc page data file.
1924 */
1925 size_t
1926 prpdsize(struct as *as)
1927 {
1928 struct seg *seg;
1929 size_t size;
1930
1931 ASSERT(as != &kas && AS_WRITE_HELD(as));
1932
1933 if ((seg = AS_SEGFIRST(as)) == NULL)
1934 return (0);
1935
1936 size = sizeof (prpageheader_t);
1937 do {
1938 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1939 caddr_t saddr, naddr;
1940 void *tmp = NULL;
1941 size_t npage;
1942
1943 if ((seg->s_flags & S_HOLE) != 0) {
1944 continue;
1945 }
1946
1947 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1948 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1949 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1950 size += sizeof (prasmap_t) + round8(npage);
1951 }
1952 ASSERT(tmp == NULL);
1953 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1954
1955 return (size);
1956 }
1957
1958 #ifdef _SYSCALL32_IMPL
1959 size_t
1960 prpdsize32(struct as *as)
1961 {
1962 struct seg *seg;
1963 size_t size;
1964
1965 ASSERT(as != &kas && AS_WRITE_HELD(as));
1966
1967 if ((seg = AS_SEGFIRST(as)) == NULL)
1968 return (0);
1969
1970 size = sizeof (prpageheader32_t);
1971 do {
1972 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1973 caddr_t saddr, naddr;
1974 void *tmp = NULL;
1975 size_t npage;
1976
1977 if ((seg->s_flags & S_HOLE) != 0) {
1978 continue;
1979 }
1980
1981 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1982 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1983 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1984 size += sizeof (prasmap32_t) + round8(npage);
1985 }
1986 ASSERT(tmp == NULL);
1987 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1988
1989 return (size);
1990 }
1991 #endif /* _SYSCALL32_IMPL */
1992
1993 /*
1994 * Read page data information.
1995 */
1996 int
1997 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1998 {
1999 struct as *as = p->p_as;
2000 caddr_t buf;
2001 size_t size;
2002 prpageheader_t *php;
2003 prasmap_t *pmp;
2004 struct seg *seg;
2005 int error;
2006
2007 again:
2008 AS_LOCK_ENTER(as, RW_WRITER);
2009
2010 if ((seg = AS_SEGFIRST(as)) == NULL) {
2011 AS_LOCK_EXIT(as);
2012 return (0);
2013 }
2014 size = prpdsize(as);
2015 if (uiop->uio_resid < size) {
2016 AS_LOCK_EXIT(as);
2017 return (E2BIG);
2018 }
2019
2020 buf = kmem_zalloc(size, KM_SLEEP);
2021 php = (prpageheader_t *)buf;
2022 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2023
2024 hrt2ts(gethrtime(), &php->pr_tstamp);
2025 php->pr_nmap = 0;
2026 php->pr_npage = 0;
2027 do {
2028 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2029 caddr_t saddr, naddr;
2030 void *tmp = NULL;
2031
2032 if ((seg->s_flags & S_HOLE) != 0) {
2033 continue;
2034 }
2035
2036 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2037 struct vnode *vp;
2038 struct vattr vattr;
2039 size_t len;
2040 size_t npage;
2041 uint_t prot;
2042 uintptr_t next;
2043
2044 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2045 if ((len = (size_t)(naddr - saddr)) == 0)
2046 continue;
2047 npage = len / PAGESIZE;
2048 next = (uintptr_t)(pmp + 1) + round8(npage);
2049 /*
2050 * It's possible that the address space can change
2051 * subtlely even though we're holding as->a_lock
2052 * due to the nondeterminism of page_exists() in
2053 * the presence of asychronously flushed pages or
2054 * mapped files whose sizes are changing.
2055 * page_exists() may be called indirectly from
2056 * pr_getprot() by a SEGOP_INCORE() routine.
2057 * If this happens we need to make sure we don't
2058 * overrun the buffer whose size we computed based
2059 * on the initial iteration through the segments.
2060 * Once we've detected an overflow, we need to clean
2061 * up the temporary memory allocated in pr_getprot()
2062 * and retry. If there's a pending signal, we return
2063 * EINTR so that this thread can be dislodged if
2064 * a latent bug causes us to spin indefinitely.
2065 */
2066 if (next > (uintptr_t)buf + size) {
2067 pr_getprot_done(&tmp);
2068 AS_LOCK_EXIT(as);
2069
2070 kmem_free(buf, size);
2071
2072 if (ISSIG(curthread, JUSTLOOKING))
2073 return (EINTR);
2074
2075 goto again;
2076 }
2077
2078 php->pr_nmap++;
2079 php->pr_npage += npage;
2080 pmp->pr_vaddr = (uintptr_t)saddr;
2081 pmp->pr_npage = npage;
2082 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2083 pmp->pr_mflags = 0;
2084 if (prot & PROT_READ)
2085 pmp->pr_mflags |= MA_READ;
2086 if (prot & PROT_WRITE)
2087 pmp->pr_mflags |= MA_WRITE;
2088 if (prot & PROT_EXEC)
2089 pmp->pr_mflags |= MA_EXEC;
2090 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2091 pmp->pr_mflags |= MA_SHARED;
2092 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2093 pmp->pr_mflags |= MA_NORESERVE;
2094 if (seg->s_ops == &segspt_shmops ||
2095 (seg->s_ops == &segvn_ops &&
2096 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2097 pmp->pr_mflags |= MA_ANON;
2098 if (seg->s_ops == &segspt_shmops)
2099 pmp->pr_mflags |= MA_ISM | MA_SHM;
2100 pmp->pr_pagesize = PAGESIZE;
2101 /*
2102 * Manufacture a filename for the "object" directory.
2103 */
2104 vattr.va_mask = AT_FSID|AT_NODEID;
2105 if (seg->s_ops == &segvn_ops &&
2106 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2107 vp != NULL && vp->v_type == VREG &&
2108 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2109 if (vp == p->p_exec)
2110 (void) strcpy(pmp->pr_mapname, "a.out");
2111 else
2112 pr_object_name(pmp->pr_mapname,
2113 vp, &vattr);
2114 }
2115
2116 /*
2117 * Get the SysV shared memory id, if any.
2118 */
2119 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2120 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2121 SHMID_NONE) {
2122 if (pmp->pr_shmid == SHMID_FREE)
2123 pmp->pr_shmid = -1;
2124
2125 pmp->pr_mflags |= MA_SHM;
2126 } else {
2127 pmp->pr_shmid = -1;
2128 }
2129
2130 hat_getstat(as, saddr, len, hatid,
2131 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2132 pmp = (prasmap_t *)next;
2133 }
2134 ASSERT(tmp == NULL);
2135 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2136
2137 AS_LOCK_EXIT(as);
2138
2139 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2140 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2141 kmem_free(buf, size);
2142
2143 return (error);
2144 }
2145
2146 #ifdef _SYSCALL32_IMPL
2147 int
2148 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2149 {
2150 struct as *as = p->p_as;
2151 caddr_t buf;
2152 size_t size;
2153 prpageheader32_t *php;
2154 prasmap32_t *pmp;
2155 struct seg *seg;
2156 int error;
2157
2158 again:
2159 AS_LOCK_ENTER(as, RW_WRITER);
2160
2161 if ((seg = AS_SEGFIRST(as)) == NULL) {
2162 AS_LOCK_EXIT(as);
2163 return (0);
2164 }
2165 size = prpdsize32(as);
2166 if (uiop->uio_resid < size) {
2167 AS_LOCK_EXIT(as);
2168 return (E2BIG);
2169 }
2170
2171 buf = kmem_zalloc(size, KM_SLEEP);
2172 php = (prpageheader32_t *)buf;
2173 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2174
2175 hrt2ts32(gethrtime(), &php->pr_tstamp);
2176 php->pr_nmap = 0;
2177 php->pr_npage = 0;
2178 do {
2179 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2180 caddr_t saddr, naddr;
2181 void *tmp = NULL;
2182
2183 if ((seg->s_flags & S_HOLE) != 0) {
2184 continue;
2185 }
2186
2187 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2188 struct vnode *vp;
2189 struct vattr vattr;
2190 size_t len;
2191 size_t npage;
2192 uint_t prot;
2193 uintptr_t next;
2194
2195 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2196 if ((len = (size_t)(naddr - saddr)) == 0)
2197 continue;
2198 npage = len / PAGESIZE;
2199 next = (uintptr_t)(pmp + 1) + round8(npage);
2200 /*
2201 * It's possible that the address space can change
2202 * subtlely even though we're holding as->a_lock
2203 * due to the nondeterminism of page_exists() in
2204 * the presence of asychronously flushed pages or
2205 * mapped files whose sizes are changing.
2206 * page_exists() may be called indirectly from
2207 * pr_getprot() by a SEGOP_INCORE() routine.
2208 * If this happens we need to make sure we don't
2209 * overrun the buffer whose size we computed based
2210 * on the initial iteration through the segments.
2211 * Once we've detected an overflow, we need to clean
2212 * up the temporary memory allocated in pr_getprot()
2213 * and retry. If there's a pending signal, we return
2214 * EINTR so that this thread can be dislodged if
2215 * a latent bug causes us to spin indefinitely.
2216 */
2217 if (next > (uintptr_t)buf + size) {
2218 pr_getprot_done(&tmp);
2219 AS_LOCK_EXIT(as);
2220
2221 kmem_free(buf, size);
2222
2223 if (ISSIG(curthread, JUSTLOOKING))
2224 return (EINTR);
2225
2226 goto again;
2227 }
2228
2229 php->pr_nmap++;
2230 php->pr_npage += npage;
2231 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2232 pmp->pr_npage = (size32_t)npage;
2233 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2234 pmp->pr_mflags = 0;
2235 if (prot & PROT_READ)
2236 pmp->pr_mflags |= MA_READ;
2237 if (prot & PROT_WRITE)
2238 pmp->pr_mflags |= MA_WRITE;
2239 if (prot & PROT_EXEC)
2240 pmp->pr_mflags |= MA_EXEC;
2241 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2242 pmp->pr_mflags |= MA_SHARED;
2243 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2244 pmp->pr_mflags |= MA_NORESERVE;
2245 if (seg->s_ops == &segspt_shmops ||
2246 (seg->s_ops == &segvn_ops &&
2247 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2248 pmp->pr_mflags |= MA_ANON;
2249 if (seg->s_ops == &segspt_shmops)
2250 pmp->pr_mflags |= MA_ISM | MA_SHM;
2251 pmp->pr_pagesize = PAGESIZE;
2252 /*
2253 * Manufacture a filename for the "object" directory.
2254 */
2255 vattr.va_mask = AT_FSID|AT_NODEID;
2256 if (seg->s_ops == &segvn_ops &&
2257 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2258 vp != NULL && vp->v_type == VREG &&
2259 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2260 if (vp == p->p_exec)
2261 (void) strcpy(pmp->pr_mapname, "a.out");
2262 else
2263 pr_object_name(pmp->pr_mapname,
2264 vp, &vattr);
2265 }
2266
2267 /*
2268 * Get the SysV shared memory id, if any.
2269 */
2270 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2271 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2272 SHMID_NONE) {
2273 if (pmp->pr_shmid == SHMID_FREE)
2274 pmp->pr_shmid = -1;
2275
2276 pmp->pr_mflags |= MA_SHM;
2277 } else {
2278 pmp->pr_shmid = -1;
2279 }
2280
2281 hat_getstat(as, saddr, len, hatid,
2282 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2283 pmp = (prasmap32_t *)next;
2284 }
2285 ASSERT(tmp == NULL);
2286 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2287
2288 AS_LOCK_EXIT(as);
2289
2290 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2291 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2292 kmem_free(buf, size);
2293
2294 return (error);
2295 }
2296 #endif /* _SYSCALL32_IMPL */
2297
2298 ushort_t
2299 prgetpctcpu(uint64_t pct)
2300 {
2301 /*
2302 * The value returned will be relevant in the zone of the examiner,
2303 * which may not be the same as the zone which performed the procfs
2304 * mount.
2305 */
2306 int nonline = zone_ncpus_online_get(curproc->p_zone);
2307
2308 /*
2309 * Prorate over online cpus so we don't exceed 100%
2310 */
2311 if (nonline > 1)
2312 pct /= nonline;
2313 pct >>= 16; /* convert to 16-bit scaled integer */
2314 if (pct > 0x8000) /* might happen, due to rounding */
2315 pct = 0x8000;
2316 return ((ushort_t)pct);
2317 }
2318
2319 /*
2320 * Return information used by ps(1).
2321 */
2322 void
2323 prgetpsinfo(proc_t *p, psinfo_t *psp)
2324 {
2325 kthread_t *t;
2326 struct cred *cred;
2327 hrtime_t hrutime, hrstime;
2328
2329 ASSERT(MUTEX_HELD(&p->p_lock));
2330
2331 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2332 bzero(psp, sizeof (*psp));
2333 else {
2334 thread_unlock(t);
2335 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2336 }
2337
2338 /*
2339 * only export SSYS and SMSACCT; everything else is off-limits to
2340 * userland apps.
2341 */
2342 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2343 psp->pr_nlwp = p->p_lwpcnt;
2344 psp->pr_nzomb = p->p_zombcnt;
2345 mutex_enter(&p->p_crlock);
2346 cred = p->p_cred;
2347 psp->pr_uid = crgetruid(cred);
2348 psp->pr_euid = crgetuid(cred);
2349 psp->pr_gid = crgetrgid(cred);
2350 psp->pr_egid = crgetgid(cred);
2351 mutex_exit(&p->p_crlock);
2352 psp->pr_pid = p->p_pid;
2353 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2354 (p->p_flag & SZONETOP)) {
2355 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2356 /*
2357 * Inside local zones, fake zsched's pid as parent pids for
2358 * processes which reference processes outside of the zone.
2359 */
2360 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2361 } else {
2362 psp->pr_ppid = p->p_ppid;
2363 }
2364 psp->pr_pgid = p->p_pgrp;
2365 psp->pr_sid = p->p_sessp->s_sid;
2366 psp->pr_taskid = p->p_task->tk_tkid;
2367 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2368 psp->pr_poolid = p->p_pool->pool_id;
2369 psp->pr_zoneid = p->p_zone->zone_id;
2370 if ((psp->pr_contract = PRCTID(p)) == 0)
2371 psp->pr_contract = -1;
2372 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2373 switch (p->p_model) {
2374 case DATAMODEL_ILP32:
2375 psp->pr_dmodel = PR_MODEL_ILP32;
2376 break;
2377 case DATAMODEL_LP64:
2378 psp->pr_dmodel = PR_MODEL_LP64;
2379 break;
2380 }
2381 hrutime = mstate_aggr_state(p, LMS_USER);
2382 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2383 hrt2ts((hrutime + hrstime), &psp->pr_time);
2384 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2385
2386 if (t == NULL) {
2387 int wcode = p->p_wcode; /* must be atomic read */
2388
2389 if (wcode)
2390 psp->pr_wstat = wstat(wcode, p->p_wdata);
2391 psp->pr_ttydev = PRNODEV;
2392 psp->pr_lwp.pr_state = SZOMB;
2393 psp->pr_lwp.pr_sname = 'Z';
2394 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2395 psp->pr_lwp.pr_bindpset = PS_NONE;
2396 } else {
2397 user_t *up = PTOU(p);
2398 struct as *as;
2399 dev_t d;
2400 extern dev_t rwsconsdev, rconsdev, uconsdev;
2401
2402 d = cttydev(p);
2403 /*
2404 * If the controlling terminal is the real
2405 * or workstation console device, map to what the
2406 * user thinks is the console device. Handle case when
2407 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2408 */
2409 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2410 d = uconsdev;
2411 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2412 psp->pr_start = up->u_start;
2413 bcopy(up->u_comm, psp->pr_fname,
2414 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2415 bcopy(up->u_psargs, psp->pr_psargs,
2416 MIN(PRARGSZ-1, PSARGSZ));
2417 psp->pr_argc = up->u_argc;
2418 psp->pr_argv = up->u_argv;
2419 psp->pr_envp = up->u_envp;
2420
2421 /* get the chosen lwp's lwpsinfo */
2422 prgetlwpsinfo(t, &psp->pr_lwp);
2423
2424 /* compute %cpu for the process */
2425 if (p->p_lwpcnt == 1)
2426 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2427 else {
2428 uint64_t pct = 0;
2429 hrtime_t cur_time = gethrtime_unscaled();
2430
2431 t = p->p_tlist;
2432 do {
2433 pct += cpu_update_pct(t, cur_time);
2434 } while ((t = t->t_forw) != p->p_tlist);
2435
2436 psp->pr_pctcpu = prgetpctcpu(pct);
2437 }
2438 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2439 psp->pr_size = 0;
2440 psp->pr_rssize = 0;
2441 } else {
2442 mutex_exit(&p->p_lock);
2443 AS_LOCK_ENTER(as, RW_READER);
2444 psp->pr_size = btopr(as->a_resvsize) *
2445 (PAGESIZE / 1024);
2446 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2447 psp->pr_pctmem = rm_pctmemory(as);
2448 AS_LOCK_EXIT(as);
2449 mutex_enter(&p->p_lock);
2450 }
2451 }
2452 }
2453
2454 static size_t
2455 prfdinfomisc(list_t *data, uint_t type, void *val, off_t vlen)
2456 {
2457 pr_misc_header_t *misc;
2458 size_t len;
2459
2460 len = sizeof (*misc) + vlen;
2461
2462 if (data != NULL) {
2463 misc = pr_iol_newbuf(data, len);
2464 misc->pr_misc_type = type;
2465 misc->pr_misc_size = len;
2466 misc++;
2467 bcopy((char *)val, (char *)misc, vlen);
2468 }
2469
2470 return (len);
2471 }
2472
2473 /*
2474 * There's no elegant way to determine if a character device
2475 * supports TLI, so just check a hardcoded list of known TLI
2476 * devices.
2477 */
2478
2479 static boolean_t
2480 pristli(vnode_t *vp)
2481 {
2482 static const char *tlidevs[] = {
2483 "udp", "udp6", "tcp", "tcp6", NULL
2484 };
2485 struct devnames *dnp;
2486 major_t major;
2487 uint_t i;
2488
2489 ASSERT(vp != NULL);
2490 ASSERT(vp->v_type == VCHR);
2491
2492 if (vp->v_rdev == 0)
2493 return (B_FALSE);
2494
2495 major = getmajor(vp->v_rdev);
2496 if (major == DDI_MAJOR_T_NONE || major > devcnt)
2497 return (B_FALSE);
2498
2499 dnp = &devnamesp[major];
2500
2501 for (i = 0; tlidevs[i] != NULL; i++) {
2502 if (strcmp(dnp->dn_name, tlidevs[i]) == 0)
2503 return (B_TRUE);
2504 }
2505
2506 return (B_FALSE);
2507 }
2508
2509 static size_t
2510 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2511 {
2512 char pathname[MAXPATHLEN];
2513 vnode_t *vrootp;
2514 size_t sz = 0;
2515
2516 mutex_enter(&p->p_lock);
2517 if ((vrootp = PTOU(p)->u_rdir) == NULL)
2518 vrootp = rootdir;
2519 VN_HOLD(vrootp);
2520 mutex_exit(&p->p_lock);
2521
2522 if (vnodetopath(vrootp, vp, pathname, sizeof (pathname), cred) == 0) {
2523 sz += prfdinfomisc(data, PR_PATHNAME,
2524 pathname, strlen(pathname));
2525 }
2526 VN_RELE(vrootp);
2527
2528 return (sz);
2529 }
2530
2531 static size_t
2532 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2533 {
2534 strcmd_t strcmd;
2535 int32_t rval;
2536 size_t sz = 0;
2537
2538 strcmd.sc_cmd = TI_GETMYNAME;
2539 strcmd.sc_timeout = 1;
2540 strcmd.sc_len = STRCMDBUFSIZE;
2541
2542 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2543 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2544 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2545 strcmd.sc_len);
2546 }
2547
2548 strcmd.sc_cmd = TI_GETPEERNAME;
2549 strcmd.sc_timeout = 1;
2550 strcmd.sc_len = STRCMDBUFSIZE;
2551
2552 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2553 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2554 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2555 strcmd.sc_len);
2556 }
2557
2558 return (sz);
2559 }
2560
2561 static size_t
2562 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2563 {
2564 sonode_t *so;
2565 socklen_t vlen;
2566 size_t sz = 0;
2567 uint_t i;
2568
2569 if (vp->v_stream) {
2570 so = VTOSO(vp->v_stream->sd_vnode);
2571
2572 if (so->so_version == SOV_STREAM)
2573 so = NULL;
2574 } else {
2575 so = VTOSO(vp);
2576 }
2577
2578 if (so == NULL)
2579 return (0);
2580
2581 DTRACE_PROBE1(sonode, sonode_t *, so);
2582
2583 /* prmisc - PR_SOCKETNAME */
2584
2585 struct sockaddr_storage buf;
2586 struct sockaddr *name = (struct sockaddr *)&buf;
2587
2588 vlen = sizeof (buf);
2589 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2590 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2591
2592 /* prmisc - PR_PEERSOCKNAME */
2593
2594 vlen = sizeof (buf);
2595 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2596 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2597
2598 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2599
2600 static struct boolopt {
2601 int level;
2602 int opt;
2603 int bopt;
2604 } boolopts[] = {
2605 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG },
2606 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR },
2607 #ifdef SO_REUSEPORT
2608 /* SmartOS and OmniOS have SO_REUSEPORT */
2609 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT },
2610 #endif
2611 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE },
2612 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE },
2613 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST },
2614 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE },
2615 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND },
2616 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES },
2617 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT },
2618 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT },
2619 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND },
2620 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP },
2621 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2622 PR_UDP_NAT_T_ENDPOINT }
2623 };
2624 prsockopts_bool_opts_t opts;
2625 int val;
2626
2627 if (data != NULL) {
2628 opts.prsock_bool_opts = 0;
2629
2630 for (i = 0; i < sizeof (boolopts) / sizeof (boolopts[0]);
2631 i++) {
2632 vlen = sizeof (val);
2633 if (SOP_GETSOCKOPT(so, boolopts[i].level,
2634 boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2635 val != 0) {
2636 opts.prsock_bool_opts |= boolopts[i].bopt;
2637 }
2638 }
2639 }
2640
2641 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2642
2643 /* prmisc - PR_SOCKOPT_LINGER */
2644
2645 struct linger l;
2646
2647 vlen = sizeof (l);
2648 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2649 0, cred) == 0 && vlen > 0) {
2650 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2651 }
2652
2653 /* prmisc - PR_SOCKOPT_* int types */
2654
2655 static struct sopt {
2656 int level;
2657 int opt;
2658 int bopt;
2659 } sopts[] = {
2660 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE },
2661 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF },
2662 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF }
2663 };
2664
2665 for (i = 0; i < sizeof (sopts) / sizeof (sopts[0]); i++) {
2666 vlen = sizeof (val);
2667 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2668 &val, &vlen, 0, cred) == 0 && vlen > 0) {
2669 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2670 }
2671 }
2672
2673 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2674
2675 in_addr_t nexthop_val;
2676
2677 vlen = sizeof (nexthop_val);
2678 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2679 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2680 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2681 &nexthop_val, vlen);
2682 }
2683
2684 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2685
2686 struct sockaddr_in6 nexthop6_val;
2687
2688 vlen = sizeof (nexthop6_val);
2689 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2690 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2691 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2692 &nexthop6_val, vlen);
2693 }
2694
2695 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2696
2697 char cong[CC_ALGO_NAME_MAX];
2698
2699 vlen = sizeof (cong);
2700 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2701 &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2702 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2703 }
2704
2705 /* prmisc - PR_SOCKFILTERS_PRIV */
2706
2707 struct fil_info fi;
2708
2709 vlen = sizeof (fi);
2710 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2711 &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2712 pr_misc_header_t *misc;
2713 size_t len;
2714
2715 /*
2716 * We limit the number of returned filters to 32.
2717 * This is the maximum number that pfiles will print
2718 * anyway.
2719 */
2720 vlen = MIN(32, fi.fi_pos + 1);
2721 vlen *= sizeof (fi);
2722
2723 len = sizeof (*misc) + vlen;
2724 sz += len;
2725
2726 if (data != NULL) {
2727 misc = pr_iol_newbuf(data, len);
2728 misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2729 misc->pr_misc_size = len;
2730 misc++;
2731 len = vlen;
2732 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2733 misc, &vlen, 0, cred) == 0) {
2734 /*
2735 * In case the number of filters has reduced
2736 * since the first call, explicitly zero out
2737 * any unpopulated space.
2738 */
2739 if (vlen < len)
2740 bzero(misc + vlen, len - vlen);
2741 } else {
2742 /* Something went wrong, zero out the result */
2743 bzero(misc, vlen);
2744 }
2745 }
2746 }
2747
2748 return (sz);
2749 }
2750
2751 u_offset_t
2752 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2753 {
2754 u_offset_t sz;
2755
2756 /*
2757 * All fdinfo files will be at least this big -
2758 * sizeof fdinfo struct + zero length trailer
2759 */
2760 sz = offsetof(prfdinfov2_t, pr_misc) + sizeof (pr_misc_header_t);
2761
2762 /* Pathname */
2763 if (vp->v_type != VSOCK && vp->v_type != VDOOR)
2764 sz += prfdinfopath(p, vp, NULL, cred);
2765
2766 /* Socket options */
2767 if (vp->v_type == VSOCK)
2768 sz += prfdinfosockopt(vp, NULL, cred);
2769
2770 /* TLI/XTI sockets */
2771 if (vp->v_type == VCHR && pristli(vp) && vp->v_stream != NULL)
2772 sz += prfdinfotlisockopt(vp, NULL, cred);
2773
2774 return (sz);
2775 }
2776
2777 int
2778 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfov2_t *fdinfo, cred_t *cred,
2779 list_t *data)
2780 {
2781 vattr_t vattr;
2782 int error;
2783
2784 if (vp == NULL)
2785 return (ENOENT);
2786
2787 /*
2788 * Initialise defaults for values that do not default to zero.
2789 */
2790 fdinfo->pr_uid = (uid_t)-1;
2791 fdinfo->pr_gid = (gid_t)-1;
2792 fdinfo->pr_size = -1;
2793 fdinfo->pr_locktype = F_UNLCK;
2794 fdinfo->pr_lockpid = -1;
2795 fdinfo->pr_locksysid = -1;
2796 fdinfo->pr_peerpid = -1;
2797
2798 /* Offset */
2799
2800 switch (vp->v_type) {
2801 case VFIFO:
2802 case VDOOR:
2803 case VSOCK:
2804 /* Do not provide an offset for these file types */
2805 fdinfo->pr_offset = -1;
2806 break;
2807 }
2808
2809 /* Attributes */
2810 vattr.va_mask = AT_STAT;
2811 if (VOP_GETATTR(vp, &vattr, 0, cred, NULL) == 0) {
2812 fdinfo->pr_major = getmajor(vattr.va_fsid);
2813 fdinfo->pr_minor = getminor(vattr.va_fsid);
2814 fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2815 fdinfo->pr_rminor = getminor(vattr.va_rdev);
2816 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2817 fdinfo->pr_size = (off64_t)vattr.va_size;
2818 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2819 fdinfo->pr_uid = vattr.va_uid;
2820 fdinfo->pr_gid = vattr.va_gid;
2821 if (vp->v_type == VSOCK)
2822 fdinfo->pr_fileflags |= sock_getfasync(vp);
2823 }
2824
2825 /* locks */
2826
2827 flock64_t bf;
2828
2829 bzero(&bf, sizeof (bf));
2830 bf.l_type = F_WRLCK;
2831
2832 if (VOP_FRLOCK(vp, F_GETLK, &bf,
2833 (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
2834 cred, NULL) == 0 && bf.l_type != F_UNLCK) {
2835 fdinfo->pr_locktype = bf.l_type;
2836 fdinfo->pr_lockpid = bf.l_pid;
2837 fdinfo->pr_locksysid = bf.l_sysid;
2838 }
2839
2840 /* peer cred */
2841
2842 k_peercred_t kpc;
2843
2844 switch (vp->v_type) {
2845 case VFIFO:
2846 case VSOCK: {
2847 int32_t rval;
2848
2849 error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
2850 FKIOCTL, cred, &rval, NULL);
2851 break;
2852 }
2853 case VCHR: {
2854 struct strioctl strioc;
2855 int32_t rval;
2856
2857 if (vp->v_stream == NULL) {
2858 error = ENOTSUP;
2859 break;
2860 }
2861 strioc.ic_cmd = _I_GETPEERCRED;
2862 strioc.ic_timout = INFTIM;
2863 strioc.ic_len = (int)sizeof (k_peercred_t);
2864 strioc.ic_dp = (char *)&kpc;
2865
2866 error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
2867 STR_NOSIG | K_TO_K, cred, &rval);
2868 break;
2869 }
2870 default:
2871 error = ENOTSUP;
2872 }
2873
2874 if (error == 0 && kpc.pc_cr != NULL) {
2875 proc_t *peerp;
2876
2877 fdinfo->pr_peerpid = kpc.pc_cpid;
2878
2879 crfree(kpc.pc_cr);
2880
2881 mutex_enter(&pidlock);
2882 if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
2883 user_t *up;
2884
2885 mutex_enter(&peerp->p_lock);
2886 mutex_exit(&pidlock);
2887
2888 up = PTOU(peerp);
2889 bcopy(up->u_comm, fdinfo->pr_peername,
2890 MIN(sizeof (up->u_comm),
2891 sizeof (fdinfo->pr_peername) - 1));
2892
2893 mutex_exit(&peerp->p_lock);
2894 } else {
2895 mutex_exit(&pidlock);
2896 }
2897 }
2898
2899 /*
2900 * Don't attempt to determine the vnode path for a socket or a door
2901 * as it will cause a linear scan of the dnlc table given there is no
2902 * v_path associated with the vnode.
2903 */
2904 if (vp->v_type != VSOCK && vp->v_type != VDOOR)
2905 (void) prfdinfopath(p, vp, data, cred);
2906
2907 if (vp->v_type == VSOCK)
2908 (void) prfdinfosockopt(vp, data, cred);
2909
2910 /* TLI/XTI stream sockets */
2911 if (vp->v_type == VCHR && pristli(vp) && vp->v_stream != NULL)
2912 (void) prfdinfotlisockopt(vp, data, cred);
2913
2914 /*
2915 * Add a terminating record with a zero size (pr_iol_newbuf will
2916 * bzero the memory)
2917 */
2918 (void) pr_iol_newbuf(data, sizeof (pr_misc_header_t));
2919
2920 return (0);
2921 }
2922
2923 #ifdef _SYSCALL32_IMPL
2924 void
2925 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2926 {
2927 kthread_t *t;
2928 struct cred *cred;
2929 hrtime_t hrutime, hrstime;
2930
2931 ASSERT(MUTEX_HELD(&p->p_lock));
2932
2933 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2934 bzero(psp, sizeof (*psp));
2935 else {
2936 thread_unlock(t);
2937 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2938 }
2939
2940 /*
2941 * only export SSYS and SMSACCT; everything else is off-limits to
2942 * userland apps.
2943 */
2944 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2945 psp->pr_nlwp = p->p_lwpcnt;
2946 psp->pr_nzomb = p->p_zombcnt;
2947 mutex_enter(&p->p_crlock);
2948 cred = p->p_cred;
2949 psp->pr_uid = crgetruid(cred);
2950 psp->pr_euid = crgetuid(cred);
2951 psp->pr_gid = crgetrgid(cred);
2952 psp->pr_egid = crgetgid(cred);
2953 mutex_exit(&p->p_crlock);
2954 psp->pr_pid = p->p_pid;
2955 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2956 (p->p_flag & SZONETOP)) {
2957 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2958 /*
2959 * Inside local zones, fake zsched's pid as parent pids for
2960 * processes which reference processes outside of the zone.
2961 */
2962 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2963 } else {
2964 psp->pr_ppid = p->p_ppid;
2965 }
2966 psp->pr_pgid = p->p_pgrp;
2967 psp->pr_sid = p->p_sessp->s_sid;
2968 psp->pr_taskid = p->p_task->tk_tkid;
2969 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2970 psp->pr_poolid = p->p_pool->pool_id;
2971 psp->pr_zoneid = p->p_zone->zone_id;
2972 if ((psp->pr_contract = PRCTID(p)) == 0)
2973 psp->pr_contract = -1;
2974 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2975 switch (p->p_model) {
2976 case DATAMODEL_ILP32:
2977 psp->pr_dmodel = PR_MODEL_ILP32;
2978 break;
2979 case DATAMODEL_LP64:
2980 psp->pr_dmodel = PR_MODEL_LP64;
2981 break;
2982 }
2983 hrutime = mstate_aggr_state(p, LMS_USER);
2984 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2985 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2986 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2987
2988 if (t == NULL) {
2989 extern int wstat(int, int); /* needs a header file */
2990 int wcode = p->p_wcode; /* must be atomic read */
2991
2992 if (wcode)
2993 psp->pr_wstat = wstat(wcode, p->p_wdata);
2994 psp->pr_ttydev = PRNODEV32;
2995 psp->pr_lwp.pr_state = SZOMB;
2996 psp->pr_lwp.pr_sname = 'Z';
2997 } else {
2998 user_t *up = PTOU(p);
2999 struct as *as;
3000 dev_t d;
3001 extern dev_t rwsconsdev, rconsdev, uconsdev;
3002
3003 d = cttydev(p);
3004 /*
3005 * If the controlling terminal is the real
3006 * or workstation console device, map to what the
3007 * user thinks is the console device. Handle case when
3008 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3009 */
3010 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3011 d = uconsdev;
3012 (void) cmpldev(&psp->pr_ttydev, d);
3013 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3014 bcopy(up->u_comm, psp->pr_fname,
3015 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3016 bcopy(up->u_psargs, psp->pr_psargs,
3017 MIN(PRARGSZ-1, PSARGSZ));
3018 psp->pr_argc = up->u_argc;
3019 psp->pr_argv = (caddr32_t)up->u_argv;
3020 psp->pr_envp = (caddr32_t)up->u_envp;
3021
3022 /* get the chosen lwp's lwpsinfo */
3023 prgetlwpsinfo32(t, &psp->pr_lwp);
3024
3025 /* compute %cpu for the process */
3026 if (p->p_lwpcnt == 1)
3027 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3028 else {
3029 uint64_t pct = 0;
3030 hrtime_t cur_time;
3031
3032 t = p->p_tlist;
3033 cur_time = gethrtime_unscaled();
3034 do {
3035 pct += cpu_update_pct(t, cur_time);
3036 } while ((t = t->t_forw) != p->p_tlist);
3037
3038 psp->pr_pctcpu = prgetpctcpu(pct);
3039 }
3040 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3041 psp->pr_size = 0;
3042 psp->pr_rssize = 0;
3043 } else {
3044 mutex_exit(&p->p_lock);
3045 AS_LOCK_ENTER(as, RW_READER);
3046 psp->pr_size = (size32_t)
3047 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3048 psp->pr_rssize = (size32_t)
3049 (rm_asrss(as) * (PAGESIZE / 1024));
3050 psp->pr_pctmem = rm_pctmemory(as);
3051 AS_LOCK_EXIT(as);
3052 mutex_enter(&p->p_lock);
3053 }
3054 }
3055
3056 /*
3057 * If we are looking at an LP64 process, zero out
3058 * the fields that cannot be represented in ILP32.
3059 */
3060 if (p->p_model != DATAMODEL_ILP32) {
3061 psp->pr_size = 0;
3062 psp->pr_rssize = 0;
3063 psp->pr_argv = 0;
3064 psp->pr_envp = 0;
3065 }
3066 }
3067
3068 #endif /* _SYSCALL32_IMPL */
3069
3070 void
3071 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3072 {
3073 klwp_t *lwp = ttolwp(t);
3074 sobj_ops_t *sobj;
3075 char c, state;
3076 uint64_t pct;
3077 int retval, niceval;
3078 hrtime_t hrutime, hrstime;
3079
3080 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3081
3082 bzero(psp, sizeof (*psp));
3083
3084 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
3085 psp->pr_lwpid = t->t_tid;
3086 psp->pr_addr = (uintptr_t)t;
3087 psp->pr_wchan = (uintptr_t)t->t_wchan;
3088
3089 /* map the thread state enum into a process state enum */
3090 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3091 switch (state) {
3092 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
3093 case TS_RUN: state = SRUN; c = 'R'; break;
3094 case TS_ONPROC: state = SONPROC; c = 'O'; break;
3095 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
3096 case TS_STOPPED: state = SSTOP; c = 'T'; break;
3097 case TS_WAIT: state = SWAIT; c = 'W'; break;
3098 default: state = 0; c = '?'; break;
3099 }
3100 psp->pr_state = state;
3101 psp->pr_sname = c;
3102 if ((sobj = t->t_sobj_ops) != NULL)
3103 psp->pr_stype = SOBJ_TYPE(sobj);
3104 retval = CL_DONICE(t, NULL, 0, &niceval);
3105 if (retval == 0) {
3106 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3107 psp->pr_nice = niceval + NZERO;
3108 }
3109 psp->pr_syscall = t->t_sysnum;
3110 psp->pr_pri = t->t_pri;
3111 psp->pr_start.tv_sec = t->t_start;
3112 psp->pr_start.tv_nsec = 0L;
3113 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3114 scalehrtime(&hrutime);
3115 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3116 lwp->lwp_mstate.ms_acct[LMS_TRAP];
3117 scalehrtime(&hrstime);
3118 hrt2ts(hrutime + hrstime, &psp->pr_time);
3119 /* compute %cpu for the lwp */
3120 pct = cpu_update_pct(t, gethrtime_unscaled());
3121 psp->pr_pctcpu = prgetpctcpu(pct);
3122 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
3123 if (psp->pr_cpu > 99)
3124 psp->pr_cpu = 99;
3125
3126 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3127 sizeof (psp->pr_clname) - 1);
3128 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
3129 psp->pr_onpro = t->t_cpu->cpu_id;
3130 psp->pr_bindpro = t->t_bind_cpu;
3131 psp->pr_bindpset = t->t_bind_pset;
3132 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3133 }
3134
3135 #ifdef _SYSCALL32_IMPL
3136 void
3137 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3138 {
3139 proc_t *p = ttoproc(t);
3140 klwp_t *lwp = ttolwp(t);
3141 sobj_ops_t *sobj;
3142 char c, state;
3143 uint64_t pct;
3144 int retval, niceval;
3145 hrtime_t hrutime, hrstime;
3146
3147 ASSERT(MUTEX_HELD(&p->p_lock));
3148
3149 bzero(psp, sizeof (*psp));
3150
3151 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
3152 psp->pr_lwpid = t->t_tid;
3153 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
3154 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
3155
3156 /* map the thread state enum into a process state enum */
3157 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3158 switch (state) {
3159 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
3160 case TS_RUN: state = SRUN; c = 'R'; break;
3161 case TS_ONPROC: state = SONPROC; c = 'O'; break;
3162 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
3163 case TS_STOPPED: state = SSTOP; c = 'T'; break;
3164 case TS_WAIT: state = SWAIT; c = 'W'; break;
3165 default: state = 0; c = '?'; break;
3166 }
3167 psp->pr_state = state;
3168 psp->pr_sname = c;
3169 if ((sobj = t->t_sobj_ops) != NULL)
3170 psp->pr_stype = SOBJ_TYPE(sobj);
3171 retval = CL_DONICE(t, NULL, 0, &niceval);
3172 if (retval == 0) {
3173 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3174 psp->pr_nice = niceval + NZERO;
3175 } else {
3176 psp->pr_oldpri = 0;
3177 psp->pr_nice = 0;
3178 }
3179 psp->pr_syscall = t->t_sysnum;
3180 psp->pr_pri = t->t_pri;
3181 psp->pr_start.tv_sec = (time32_t)t->t_start;
3182 psp->pr_start.tv_nsec = 0L;
3183 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3184 scalehrtime(&hrutime);
3185 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3186 lwp->lwp_mstate.ms_acct[LMS_TRAP];
3187 scalehrtime(&hrstime);
3188 hrt2ts32(hrutime + hrstime, &psp->pr_time);
3189 /* compute %cpu for the lwp */
3190 pct = cpu_update_pct(t, gethrtime_unscaled());
3191 psp->pr_pctcpu = prgetpctcpu(pct);
3192 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
3193 if (psp->pr_cpu > 99)
3194 psp->pr_cpu = 99;
3195
3196 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3197 sizeof (psp->pr_clname) - 1);
3198 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
3199 psp->pr_onpro = t->t_cpu->cpu_id;
3200 psp->pr_bindpro = t->t_bind_cpu;
3201 psp->pr_bindpset = t->t_bind_pset;
3202 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3203 }
3204 #endif /* _SYSCALL32_IMPL */
3205
3206 #ifdef _SYSCALL32_IMPL
3207
3208 #define PR_COPY_FIELD(s, d, field) d->field = s->field
3209
3210 #define PR_COPY_FIELD_ILP32(s, d, field) \
3211 if (s->pr_dmodel == PR_MODEL_ILP32) { \
3212 d->field = s->field; \
3213 }
3214
3215 #define PR_COPY_TIMESPEC(s, d, field) \
3216 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3217
3218 #define PR_COPY_BUF(s, d, field) \
3219 bcopy(s->field, d->field, sizeof (d->field));
3220
3221 #define PR_IGNORE_FIELD(s, d, field)
3222
3223 void
3224 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3225 {
3226 bzero(dest, sizeof (*dest));
3227
3228 PR_COPY_FIELD(src, dest, pr_flag);
3229 PR_COPY_FIELD(src, dest, pr_lwpid);
3230 PR_IGNORE_FIELD(src, dest, pr_addr);
3231 PR_IGNORE_FIELD(src, dest, pr_wchan);
3232 PR_COPY_FIELD(src, dest, pr_stype);
3233 PR_COPY_FIELD(src, dest, pr_state);
3234 PR_COPY_FIELD(src, dest, pr_sname);
3235 PR_COPY_FIELD(src, dest, pr_nice);
3236 PR_COPY_FIELD(src, dest, pr_syscall);
3237 PR_COPY_FIELD(src, dest, pr_oldpri);
3238 PR_COPY_FIELD(src, dest, pr_cpu);
3239 PR_COPY_FIELD(src, dest, pr_pri);
3240 PR_COPY_FIELD(src, dest, pr_pctcpu);
3241 PR_COPY_TIMESPEC(src, dest, pr_start);
3242 PR_COPY_BUF(src, dest, pr_clname);
3243 PR_COPY_BUF(src, dest, pr_name);
3244 PR_COPY_FIELD(src, dest, pr_onpro);
3245 PR_COPY_FIELD(src, dest, pr_bindpro);
3246 PR_COPY_FIELD(src, dest, pr_bindpset);
3247 PR_COPY_FIELD(src, dest, pr_lgrp);
3248 }
3249
3250 void
3251 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3252 {
3253 bzero(dest, sizeof (*dest));
3254
3255 PR_COPY_FIELD(src, dest, pr_flag);
3256 PR_COPY_FIELD(src, dest, pr_nlwp);
3257 PR_COPY_FIELD(src, dest, pr_pid);
3258 PR_COPY_FIELD(src, dest, pr_ppid);
3259 PR_COPY_FIELD(src, dest, pr_pgid);
3260 PR_COPY_FIELD(src, dest, pr_sid);
3261 PR_COPY_FIELD(src, dest, pr_uid);
3262 PR_COPY_FIELD(src, dest, pr_euid);
3263 PR_COPY_FIELD(src, dest, pr_gid);
3264 PR_COPY_FIELD(src, dest, pr_egid);
3265 PR_IGNORE_FIELD(src, dest, pr_addr);
3266 PR_COPY_FIELD_ILP32(src, dest, pr_size);
3267 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3268 PR_COPY_FIELD(src, dest, pr_ttydev);
3269 PR_COPY_FIELD(src, dest, pr_pctcpu);
3270 PR_COPY_FIELD(src, dest, pr_pctmem);
3271 PR_COPY_TIMESPEC(src, dest, pr_start);
3272 PR_COPY_TIMESPEC(src, dest, pr_time);
3273 PR_COPY_TIMESPEC(src, dest, pr_ctime);
3274 PR_COPY_BUF(src, dest, pr_fname);
3275 PR_COPY_BUF(src, dest, pr_psargs);
3276 PR_COPY_FIELD(src, dest, pr_wstat);
3277 PR_COPY_FIELD(src, dest, pr_argc);
3278 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3279 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3280 PR_COPY_FIELD(src, dest, pr_dmodel);
3281 PR_COPY_FIELD(src, dest, pr_taskid);
3282 PR_COPY_FIELD(src, dest, pr_projid);
3283 PR_COPY_FIELD(src, dest, pr_nzomb);
3284 PR_COPY_FIELD(src, dest, pr_poolid);
3285 PR_COPY_FIELD(src, dest, pr_contract);
3286 PR_COPY_FIELD(src, dest, pr_poolid);
3287 PR_COPY_FIELD(src, dest, pr_poolid);
3288
3289 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3290 }
3291
3292 #undef PR_COPY_FIELD
3293 #undef PR_COPY_FIELD_ILP32
3294 #undef PR_COPY_TIMESPEC
3295 #undef PR_COPY_BUF
3296 #undef PR_IGNORE_FIELD
3297
3298 #endif /* _SYSCALL32_IMPL */
3299
3300 /*
3301 * This used to get called when microstate accounting was disabled but
3302 * microstate information was requested. Since Microstate accounting is on
3303 * regardless of the proc flags, this simply makes it appear to procfs that
3304 * microstate accounting is on. This is relatively meaningless since you
3305 * can't turn it off, but this is here for the sake of appearances.
3306 */
3307
3308 /*ARGSUSED*/
3309 void
3310 estimate_msacct(kthread_t *t, hrtime_t curtime)
3311 {
3312 proc_t *p;
3313
3314 if (t == NULL)
3315 return;
3316
3317 p = ttoproc(t);
3318 ASSERT(MUTEX_HELD(&p->p_lock));
3319
3320 /*
3321 * A system process (p0) could be referenced if the thread is
3322 * in the process of exiting. Don't turn on microstate accounting
3323 * in that case.
3324 */
3325 if (p->p_flag & SSYS)
3326 return;
3327
3328 /*
3329 * Loop through all the LWPs (kernel threads) in the process.
3330 */
3331 t = p->p_tlist;
3332 do {
3333 t->t_proc_flag |= TP_MSACCT;
3334 } while ((t = t->t_forw) != p->p_tlist);
3335
3336 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
3337 }
3338
3339 /*
3340 * It's not really possible to disable microstate accounting anymore.
3341 * However, this routine simply turns off the ms accounting flags in a process
3342 * This way procfs can still pretend to turn microstate accounting on and
3343 * off for a process, but it actually doesn't do anything. This is
3344 * a neutered form of preemptive idiot-proofing.
3345 */
3346 void
3347 disable_msacct(proc_t *p)
3348 {
3349 kthread_t *t;
3350
3351 ASSERT(MUTEX_HELD(&p->p_lock));
3352
3353 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
3354 /*
3355 * Loop through all the LWPs (kernel threads) in the process.
3356 */
3357 if ((t = p->p_tlist) != NULL) {
3358 do {
3359 /* clear per-thread flag */
3360 t->t_proc_flag &= ~TP_MSACCT;
3361 } while ((t = t->t_forw) != p->p_tlist);
3362 }
3363 }
3364
3365 /*
3366 * Return resource usage information.
3367 */
3368 void
3369 prgetusage(kthread_t *t, prhusage_t *pup)
3370 {
3371 klwp_t *lwp = ttolwp(t);
3372 hrtime_t *mstimep;
3373 struct mstate *ms = &lwp->lwp_mstate;
3374 int state;
3375 int i;
3376 hrtime_t curtime;
3377 hrtime_t waitrq;
3378 hrtime_t tmp1;
3379
3380 curtime = gethrtime_unscaled();
3381
3382 pup->pr_lwpid = t->t_tid;
3383 pup->pr_count = 1;
3384 pup->pr_create = ms->ms_start;
3385 pup->pr_term = ms->ms_term;
3386 scalehrtime(&pup->pr_create);
3387 scalehrtime(&pup->pr_term);
3388 if (ms->ms_term == 0) {
3389 pup->pr_rtime = curtime - ms->ms_start;
3390 scalehrtime(&pup->pr_rtime);
3391 } else {
3392 pup->pr_rtime = ms->ms_term - ms->ms_start;
3393 scalehrtime(&pup->pr_rtime);
3394 }
3395
3396
3397 pup->pr_utime = ms->ms_acct[LMS_USER];
3398 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
3399 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
3400 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
3401 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
3402 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
3403 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3404 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
3405 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3406 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3407
3408 prscaleusage(pup);
3409
3410 /*
3411 * Adjust for time waiting in the dispatcher queue.
3412 */
3413 waitrq = t->t_waitrq; /* hopefully atomic */
3414 if (waitrq != 0) {
3415 if (waitrq > curtime) {
3416 curtime = gethrtime_unscaled();
3417 }
3418 tmp1 = curtime - waitrq;
3419 scalehrtime(&tmp1);
3420 pup->pr_wtime += tmp1;
3421 curtime = waitrq;
3422 }
3423
3424 /*
3425 * Adjust for time spent in current microstate.
3426 */
3427 if (ms->ms_state_start > curtime) {
3428 curtime = gethrtime_unscaled();
3429 }
3430
3431 i = 0;
3432 do {
3433 switch (state = t->t_mstate) {
3434 case LMS_SLEEP:
3435 /*
3436 * Update the timer for the current sleep state.
3437 */
3438 switch (state = ms->ms_prev) {
3439 case LMS_TFAULT:
3440 case LMS_DFAULT:
3441 case LMS_KFAULT:
3442 case LMS_USER_LOCK:
3443 break;
3444 default:
3445 state = LMS_SLEEP;
3446 break;
3447 }
3448 break;
3449 case LMS_TFAULT:
3450 case LMS_DFAULT:
3451 case LMS_KFAULT:
3452 case LMS_USER_LOCK:
3453 state = LMS_SYSTEM;
3454 break;
3455 }
3456 switch (state) {
3457 case LMS_USER: mstimep = &pup->pr_utime; break;
3458 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3459 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3460 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3461 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3462 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3463 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3464 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3465 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3466 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3467 default: panic("prgetusage: unknown microstate");
3468 }
3469 tmp1 = curtime - ms->ms_state_start;
3470 if (tmp1 < 0) {
3471 curtime = gethrtime_unscaled();
3472 i++;
3473 continue;
3474 }
3475 scalehrtime(&tmp1);
3476 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3477
3478 *mstimep += tmp1;
3479
3480 /* update pup timestamp */
3481 pup->pr_tstamp = curtime;
3482 scalehrtime(&pup->pr_tstamp);
3483
3484 /*
3485 * Resource usage counters.
3486 */
3487 pup->pr_minf = lwp->lwp_ru.minflt;
3488 pup->pr_majf = lwp->lwp_ru.majflt;
3489 pup->pr_nswap = lwp->lwp_ru.nswap;
3490 pup->pr_inblk = lwp->lwp_ru.inblock;
3491 pup->pr_oublk = lwp->lwp_ru.oublock;
3492 pup->pr_msnd = lwp->lwp_ru.msgsnd;
3493 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
3494 pup->pr_sigs = lwp->lwp_ru.nsignals;
3495 pup->pr_vctx = lwp->lwp_ru.nvcsw;
3496 pup->pr_ictx = lwp->lwp_ru.nivcsw;
3497 pup->pr_sysc = lwp->lwp_ru.sysc;
3498 pup->pr_ioch = lwp->lwp_ru.ioch;
3499 }
3500
3501 /*
3502 * Convert ms_acct stats from unscaled high-res time to nanoseconds
3503 */
3504 void
3505 prscaleusage(prhusage_t *usg)
3506 {
3507 scalehrtime(&usg->pr_utime);
3508 scalehrtime(&usg->pr_stime);
3509 scalehrtime(&usg->pr_ttime);
3510 scalehrtime(&usg->pr_tftime);
3511 scalehrtime(&usg->pr_dftime);
3512 scalehrtime(&usg->pr_kftime);
3513 scalehrtime(&usg->pr_ltime);
3514 scalehrtime(&usg->pr_slptime);
3515 scalehrtime(&usg->pr_wtime);
3516 scalehrtime(&usg->pr_stoptime);
3517 }
3518
3519
3520 /*
3521 * Sum resource usage information.
3522 */
3523 void
3524 praddusage(kthread_t *t, prhusage_t *pup)
3525 {
3526 klwp_t *lwp = ttolwp(t);
3527 hrtime_t *mstimep;
3528 struct mstate *ms = &lwp->lwp_mstate;
3529 int state;
3530 int i;
3531 hrtime_t curtime;
3532 hrtime_t waitrq;
3533 hrtime_t tmp;
3534 prhusage_t conv;
3535
3536 curtime = gethrtime_unscaled();
3537
3538 if (ms->ms_term == 0) {
3539 tmp = curtime - ms->ms_start;
3540 scalehrtime(&tmp);
3541 pup->pr_rtime += tmp;
3542 } else {
3543 tmp = ms->ms_term - ms->ms_start;
3544 scalehrtime(&tmp);
3545 pup->pr_rtime += tmp;
3546 }
3547
3548 conv.pr_utime = ms->ms_acct[LMS_USER];
3549 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3550 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3551 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3552 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3553 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3554 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3555 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3556 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3557 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3558
3559 prscaleusage(&conv);
3560
3561 pup->pr_utime += conv.pr_utime;
3562 pup->pr_stime += conv.pr_stime;
3563 pup->pr_ttime += conv.pr_ttime;
3564 pup->pr_tftime += conv.pr_tftime;
3565 pup->pr_dftime += conv.pr_dftime;
3566 pup->pr_kftime += conv.pr_kftime;
3567 pup->pr_ltime += conv.pr_ltime;
3568 pup->pr_slptime += conv.pr_slptime;
3569 pup->pr_wtime += conv.pr_wtime;
3570 pup->pr_stoptime += conv.pr_stoptime;
3571
3572 /*
3573 * Adjust for time waiting in the dispatcher queue.
3574 */
3575 waitrq = t->t_waitrq; /* hopefully atomic */
3576 if (waitrq != 0) {
3577 if (waitrq > curtime) {
3578 curtime = gethrtime_unscaled();
3579 }
3580 tmp = curtime - waitrq;
3581 scalehrtime(&tmp);
3582 pup->pr_wtime += tmp;
3583 curtime = waitrq;
3584 }
3585
3586 /*
3587 * Adjust for time spent in current microstate.
3588 */
3589 if (ms->ms_state_start > curtime) {
3590 curtime = gethrtime_unscaled();
3591 }
3592
3593 i = 0;
3594 do {
3595 switch (state = t->t_mstate) {
3596 case LMS_SLEEP:
3597 /*
3598 * Update the timer for the current sleep state.
3599 */
3600 switch (state = ms->ms_prev) {
3601 case LMS_TFAULT:
3602 case LMS_DFAULT:
3603 case LMS_KFAULT:
3604 case LMS_USER_LOCK:
3605 break;
3606 default:
3607 state = LMS_SLEEP;
3608 break;
3609 }
3610 break;
3611 case LMS_TFAULT:
3612 case LMS_DFAULT:
3613 case LMS_KFAULT:
3614 case LMS_USER_LOCK:
3615 state = LMS_SYSTEM;
3616 break;
3617 }
3618 switch (state) {
3619 case LMS_USER: mstimep = &pup->pr_utime; break;
3620 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3621 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3622 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3623 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3624 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3625 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3626 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3627 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3628 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3629 default: panic("praddusage: unknown microstate");
3630 }
3631 tmp = curtime - ms->ms_state_start;
3632 if (tmp < 0) {
3633 curtime = gethrtime_unscaled();
3634 i++;
3635 continue;
3636 }
3637 scalehrtime(&tmp);
3638 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3639
3640 *mstimep += tmp;
3641
3642 /* update pup timestamp */
3643 pup->pr_tstamp = curtime;
3644 scalehrtime(&pup->pr_tstamp);
3645
3646 /*
3647 * Resource usage counters.
3648 */
3649 pup->pr_minf += lwp->lwp_ru.minflt;
3650 pup->pr_majf += lwp->lwp_ru.majflt;
3651 pup->pr_nswap += lwp->lwp_ru.nswap;
3652 pup->pr_inblk += lwp->lwp_ru.inblock;
3653 pup->pr_oublk += lwp->lwp_ru.oublock;
3654 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3655 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3656 pup->pr_sigs += lwp->lwp_ru.nsignals;
3657 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3658 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3659 pup->pr_sysc += lwp->lwp_ru.sysc;
3660 pup->pr_ioch += lwp->lwp_ru.ioch;
3661 }
3662
3663 /*
3664 * Convert a prhusage_t to a prusage_t.
3665 * This means convert each hrtime_t to a timestruc_t
3666 * and copy the count fields uint64_t => ulong_t.
3667 */
3668 void
3669 prcvtusage(prhusage_t *pup, prusage_t *upup)
3670 {
3671 uint64_t *ullp;
3672 ulong_t *ulp;
3673 int i;
3674
3675 upup->pr_lwpid = pup->pr_lwpid;
3676 upup->pr_count = pup->pr_count;
3677
3678 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3679 hrt2ts(pup->pr_create, &upup->pr_create);
3680 hrt2ts(pup->pr_term, &upup->pr_term);
3681 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3682 hrt2ts(pup->pr_utime, &upup->pr_utime);
3683 hrt2ts(pup->pr_stime, &upup->pr_stime);
3684 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3685 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3686 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3687 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3688 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3689 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3690 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3691 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3692 bzero(upup->filltime, sizeof (upup->filltime));
3693
3694 ullp = &pup->pr_minf;
3695 ulp = &upup->pr_minf;
3696 for (i = 0; i < 22; i++)
3697 *ulp++ = (ulong_t)*ullp++;
3698 }
3699
3700 #ifdef _SYSCALL32_IMPL
3701 void
3702 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3703 {
3704 uint64_t *ullp;
3705 uint32_t *ulp;
3706 int i;
3707
3708 upup->pr_lwpid = pup->pr_lwpid;
3709 upup->pr_count = pup->pr_count;
3710
3711 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3712 hrt2ts32(pup->pr_create, &upup->pr_create);
3713 hrt2ts32(pup->pr_term, &upup->pr_term);
3714 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3715 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3716 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3717 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3718 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3719 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3720 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3721 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3722 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3723 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3724 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3725 bzero(upup->filltime, sizeof (upup->filltime));
3726
3727 ullp = &pup->pr_minf;
3728 ulp = &upup->pr_minf;
3729 for (i = 0; i < 22; i++)
3730 *ulp++ = (uint32_t)*ullp++;
3731 }
3732 #endif /* _SYSCALL32_IMPL */
3733
3734 /*
3735 * Determine whether a set is empty.
3736 */
3737 int
3738 setisempty(uint32_t *sp, uint_t n)
3739 {
3740 while (n--)
3741 if (*sp++)
3742 return (0);
3743 return (1);
3744 }
3745
3746 /*
3747 * Utility routine for establishing a watched area in the process.
3748 * Keep the list of watched areas sorted by virtual address.
3749 */
3750 int
3751 set_watched_area(proc_t *p, struct watched_area *pwa)
3752 {
3753 caddr_t vaddr = pwa->wa_vaddr;
3754 caddr_t eaddr = pwa->wa_eaddr;
3755 ulong_t flags = pwa->wa_flags;
3756 struct watched_area *target;
3757 avl_index_t where;
3758 int error = 0;
3759
3760 /* we must not be holding p->p_lock, but the process must be locked */
3761 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3762 ASSERT(p->p_proc_flag & P_PR_LOCK);
3763
3764 /*
3765 * If this is our first watchpoint, enable watchpoints for the process.
3766 */
3767 if (!pr_watch_active(p)) {
3768 kthread_t *t;
3769
3770 mutex_enter(&p->p_lock);
3771 if ((t = p->p_tlist) != NULL) {
3772 do {
3773 watch_enable(t);
3774 } while ((t = t->t_forw) != p->p_tlist);
3775 }
3776 mutex_exit(&p->p_lock);
3777 }
3778
3779 target = pr_find_watched_area(p, pwa, &where);
3780 if (target != NULL) {
3781 /*
3782 * We discovered an existing, overlapping watched area.
3783 * Allow it only if it is an exact match.
3784 */
3785 if (target->wa_vaddr != vaddr ||
3786 target->wa_eaddr != eaddr)
3787 error = EINVAL;
3788 else if (target->wa_flags != flags) {
3789 error = set_watched_page(p, vaddr, eaddr,
3790 flags, target->wa_flags);
3791 target->wa_flags = flags;
3792 }
3793 kmem_free(pwa, sizeof (struct watched_area));
3794 } else {
3795 avl_insert(&p->p_warea, pwa, where);
3796 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3797 }
3798
3799 return (error);
3800 }
3801
3802 /*
3803 * Utility routine for clearing a watched area in the process.
3804 * Must be an exact match of the virtual address.
3805 * size and flags don't matter.
3806 */
3807 int
3808 clear_watched_area(proc_t *p, struct watched_area *pwa)
3809 {
3810 struct watched_area *found;
3811
3812 /* we must not be holding p->p_lock, but the process must be locked */
3813 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3814 ASSERT(p->p_proc_flag & P_PR_LOCK);
3815
3816
3817 if (!pr_watch_active(p)) {
3818 kmem_free(pwa, sizeof (struct watched_area));
3819 return (0);
3820 }
3821
3822 /*
3823 * Look for a matching address in the watched areas. If a match is
3824 * found, clear the old watched area and adjust the watched page(s). It
3825 * is not an error if there is no match.
3826 */
3827 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3828 found->wa_vaddr == pwa->wa_vaddr) {
3829 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3830 found->wa_flags);
3831 avl_remove(&p->p_warea, found);
3832 kmem_free(found, sizeof (struct watched_area));
3833 }
3834
3835 kmem_free(pwa, sizeof (struct watched_area));
3836
3837 /*
3838 * If we removed the last watched area from the process, disable
3839 * watchpoints.
3840 */
3841 if (!pr_watch_active(p)) {
3842 kthread_t *t;
3843
3844 mutex_enter(&p->p_lock);
3845 if ((t = p->p_tlist) != NULL) {
3846 do {
3847 watch_disable(t);
3848 } while ((t = t->t_forw) != p->p_tlist);
3849 }
3850 mutex_exit(&p->p_lock);
3851 }
3852
3853 return (0);
3854 }
3855
3856 /*
3857 * Frees all the watched_area structures
3858 */
3859 void
3860 pr_free_watchpoints(proc_t *p)
3861 {
3862 struct watched_area *delp;
3863 void *cookie;
3864
3865 cookie = NULL;
3866 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3867 kmem_free(delp, sizeof (struct watched_area));
3868
3869 avl_destroy(&p->p_warea);
3870 }
3871
3872 /*
3873 * This one is called by the traced process to unwatch all the
3874 * pages while deallocating the list of watched_page structs.
3875 */
3876 void
3877 pr_free_watched_pages(proc_t *p)
3878 {
3879 struct as *as = p->p_as;
3880 struct watched_page *pwp;
3881 uint_t prot;
3882 int retrycnt, err;
3883 void *cookie;
3884
3885 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3886 return;
3887
3888 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3889 AS_LOCK_ENTER(as, RW_WRITER);
3890
3891 pwp = avl_first(&as->a_wpage);
3892
3893 cookie = NULL;
3894 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3895 retrycnt = 0;
3896 if ((prot = pwp->wp_oprot) != 0) {
3897 caddr_t addr = pwp->wp_vaddr;
3898 struct seg *seg;
3899 retry:
3900
3901 if ((pwp->wp_prot != prot ||
3902 (pwp->wp_flags & WP_NOWATCH)) &&
3903 (seg = as_segat(as, addr)) != NULL) {
3904 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3905 if (err == IE_RETRY) {
3906 ASSERT(retrycnt == 0);
3907 retrycnt++;
3908 goto retry;
3909 }
3910 }
3911 }
3912 kmem_free(pwp, sizeof (struct watched_page));
3913 }
3914
3915 avl_destroy(&as->a_wpage);
3916 p->p_wprot = NULL;
3917
3918 AS_LOCK_EXIT(as);
3919 }
3920
3921 /*
3922 * Insert a watched area into the list of watched pages.
3923 * If oflags is zero then we are adding a new watched area.
3924 * Otherwise we are changing the flags of an existing watched area.
3925 */
3926 static int
3927 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3928 ulong_t flags, ulong_t oflags)
3929 {
3930 struct as *as = p->p_as;
3931 avl_tree_t *pwp_tree;
3932 struct watched_page *pwp, *newpwp;
3933 struct watched_page tpw;
3934 avl_index_t where;
3935 struct seg *seg;
3936 uint_t prot;
3937 caddr_t addr;
3938
3939 /*
3940 * We need to pre-allocate a list of structures before we grab the
3941 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3942 * held.
3943 */
3944 newpwp = NULL;
3945 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3946 addr < eaddr; addr += PAGESIZE) {
3947 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3948 pwp->wp_list = newpwp;
3949 newpwp = pwp;
3950 }
3951
3952 AS_LOCK_ENTER(as, RW_WRITER);
3953
3954 /*
3955 * Search for an existing watched page to contain the watched area.
3956 * If none is found, grab a new one from the available list
3957 * and insert it in the active list, keeping the list sorted
3958 * by user-level virtual address.
3959 */
3960 if (p->p_flag & SVFWAIT)
3961 pwp_tree = &p->p_wpage;
3962 else
3963 pwp_tree = &as->a_wpage;
3964
3965 again:
3966 if (avl_numnodes(pwp_tree) > prnwatch) {
3967 AS_LOCK_EXIT(as);
3968 while (newpwp != NULL) {
3969 pwp = newpwp->wp_list;
3970 kmem_free(newpwp, sizeof (struct watched_page));
3971 newpwp = pwp;
3972 }
3973 return (E2BIG);
3974 }
3975
3976 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3977 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3978 pwp = newpwp;
3979 newpwp = newpwp->wp_list;
3980 pwp->wp_list = NULL;
3981 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3982 (uintptr_t)PAGEMASK);
3983 avl_insert(pwp_tree, pwp, where);
3984 }
3985
3986 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3987
3988 if (oflags & WA_READ)
3989 pwp->wp_read--;
3990 if (oflags & WA_WRITE)
3991 pwp->wp_write--;
3992 if (oflags & WA_EXEC)
3993 pwp->wp_exec--;
3994
3995 ASSERT(pwp->wp_read >= 0);
3996 ASSERT(pwp->wp_write >= 0);
3997 ASSERT(pwp->wp_exec >= 0);
3998
3999 if (flags & WA_READ)
4000 pwp->wp_read++;
4001 if (flags & WA_WRITE)
4002 pwp->wp_write++;
4003 if (flags & WA_EXEC)
4004 pwp->wp_exec++;
4005
4006 if (!(p->p_flag & SVFWAIT)) {
4007 vaddr = pwp->wp_vaddr;
4008 if (pwp->wp_oprot == 0 &&
4009 (seg = as_segat(as, vaddr)) != NULL) {
4010 SEGOP_GETPROT(seg, vaddr, 0, &prot);
4011 pwp->wp_oprot = (uchar_t)prot;
4012 pwp->wp_prot = (uchar_t)prot;
4013 }
4014 if (pwp->wp_oprot != 0) {
4015 prot = pwp->wp_oprot;
4016 if (pwp->wp_read)
4017 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4018 if (pwp->wp_write)
4019 prot &= ~PROT_WRITE;
4020 if (pwp->wp_exec)
4021 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4022 if (!(pwp->wp_flags & WP_NOWATCH) &&
4023 pwp->wp_prot != prot &&
4024 (pwp->wp_flags & WP_SETPROT) == 0) {
4025 pwp->wp_flags |= WP_SETPROT;
4026 pwp->wp_list = p->p_wprot;
4027 p->p_wprot = pwp;
4028 }
4029 pwp->wp_prot = (uchar_t)prot;
4030 }
4031 }
4032
4033 /*
4034 * If the watched area extends into the next page then do
4035 * it over again with the virtual address of the next page.
4036 */
4037 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4038 goto again;
4039
4040 AS_LOCK_EXIT(as);
4041
4042 /*
4043 * Free any pages we may have over-allocated
4044 */
4045 while (newpwp != NULL) {
4046 pwp = newpwp->wp_list;
4047 kmem_free(newpwp, sizeof (struct watched_page));
4048 newpwp = pwp;
4049 }
4050
4051 return (0);
4052 }
4053
4054 /*
4055 * Remove a watched area from the list of watched pages.
4056 * A watched area may extend over more than one page.
4057 */
4058 static void
4059 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4060 {
4061 struct as *as = p->p_as;
4062 struct watched_page *pwp;
4063 struct watched_page tpw;
4064 avl_tree_t *tree;
4065 avl_index_t where;
4066
4067 AS_LOCK_ENTER(as, RW_WRITER);
4068
4069 if (p->p_flag & SVFWAIT)
4070 tree = &p->p_wpage;
4071 else
4072 tree = &as->a_wpage;
4073
4074 tpw.wp_vaddr = vaddr =
4075 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4076 pwp = avl_find(tree, &tpw, &where);
4077 if (pwp == NULL)
4078 pwp = avl_nearest(tree, where, AVL_AFTER);
4079
4080 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4081 ASSERT(vaddr <= pwp->wp_vaddr);
4082
4083 if (flags & WA_READ)
4084 pwp->wp_read--;
4085 if (flags & WA_WRITE)
4086 pwp->wp_write--;
4087 if (flags & WA_EXEC)
4088 pwp->wp_exec--;
4089
4090 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4091 /*
4092 * Reset the hat layer's protections on this page.
4093 */
4094 if (pwp->wp_oprot != 0) {
4095 uint_t prot = pwp->wp_oprot;
4096
4097 if (pwp->wp_read)
4098 prot &=
4099 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4100 if (pwp->wp_write)
4101 prot &= ~PROT_WRITE;
4102 if (pwp->wp_exec)
4103 prot &=
4104 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4105 if (!(pwp->wp_flags & WP_NOWATCH) &&
4106 pwp->wp_prot != prot &&
4107 (pwp->wp_flags & WP_SETPROT) == 0) {
4108 pwp->wp_flags |= WP_SETPROT;
4109 pwp->wp_list = p->p_wprot;
4110 p->p_wprot = pwp;
4111 }
4112 pwp->wp_prot = (uchar_t)prot;
4113 }
4114 } else {
4115 /*
4116 * No watched areas remain in this page.
4117 * Reset everything to normal.
4118 */
4119 if (pwp->wp_oprot != 0) {
4120 pwp->wp_prot = pwp->wp_oprot;
4121 if ((pwp->wp_flags & WP_SETPROT) == 0) {
4122 pwp->wp_flags |= WP_SETPROT;
4123 pwp->wp_list = p->p_wprot;
4124 p->p_wprot = pwp;
4125 }
4126 }
4127 }
4128
4129 pwp = AVL_NEXT(tree, pwp);
4130 }
4131
4132 AS_LOCK_EXIT(as);
4133 }
4134
4135 /*
4136 * Return the original protections for the specified page.
4137 */
4138 static void
4139 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4140 {
4141 struct watched_page *pwp;
4142 struct watched_page tpw;
4143
4144 ASSERT(AS_LOCK_HELD(as));
4145
4146 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4147 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4148 *prot = pwp->wp_oprot;
4149 }
4150
4151 static prpagev_t *
4152 pr_pagev_create(struct seg *seg, int check_noreserve)
4153 {
4154 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4155 size_t total_pages = seg_pages(seg);
4156
4157 /*
4158 * Limit the size of our vectors to pagev_lim pages at a time. We need
4159 * 4 or 5 bytes of storage per page, so this means we limit ourself
4160 * to about a megabyte of kernel heap by default.
4161 */
4162 pagev->pg_npages = MIN(total_pages, pagev_lim);
4163 pagev->pg_pnbase = 0;
4164
4165 pagev->pg_protv =
4166 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4167
4168 if (check_noreserve)
4169 pagev->pg_incore =
4170 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4171 else
4172 pagev->pg_incore = NULL;
4173
4174 return (pagev);
4175 }
4176
4177 static void
4178 pr_pagev_destroy(prpagev_t *pagev)
4179 {
4180 if (pagev->pg_incore != NULL)
4181 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4182
4183 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4184 kmem_free(pagev, sizeof (prpagev_t));
4185 }
4186
4187 static caddr_t
4188 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4189 {
4190 ulong_t lastpg = seg_page(seg, eaddr - 1);
4191 ulong_t pn, pnlim;
4192 caddr_t saddr;
4193 size_t len;
4194
4195 ASSERT(addr >= seg->s_base && addr <= eaddr);
4196
4197 if (addr == eaddr)
4198 return (eaddr);
4199
4200 refill:
4201 ASSERT(addr < eaddr);
4202 pagev->pg_pnbase = seg_page(seg, addr);
4203 pnlim = pagev->pg_pnbase + pagev->pg_npages;
4204 saddr = addr;
4205
4206 if (lastpg < pnlim)
4207 len = (size_t)(eaddr - addr);
4208 else
4209 len = pagev->pg_npages * PAGESIZE;
4210
4211 if (pagev->pg_incore != NULL) {
4212 /*
4213 * INCORE cleverly has different semantics than GETPROT:
4214 * it returns info on pages up to but NOT including addr + len.
4215 */
4216 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4217 pn = pagev->pg_pnbase;
4218
4219 do {
4220 /*
4221 * Guilty knowledge here: We know that segvn_incore
4222 * returns more than just the low-order bit that
4223 * indicates the page is actually in memory. If any
4224 * bits are set, then the page has backing store.
4225 */
4226 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4227 goto out;
4228
4229 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4230
4231 /*
4232 * If we examined all the pages in the vector but we're not
4233 * at the end of the segment, take another lap.
4234 */
4235 if (addr < eaddr)
4236 goto refill;
4237 }
4238
4239 /*
4240 * Need to take len - 1 because addr + len is the address of the
4241 * first byte of the page just past the end of what we want.
4242 */
4243 out:
4244 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4245 return (addr);
4246 }
4247
4248 static caddr_t
4249 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4250 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4251 {
4252 /*
4253 * Our starting address is either the specified address, or the base
4254 * address from the start of the pagev. If the latter is greater,
4255 * this means a previous call to pr_pagev_fill has already scanned
4256 * further than the end of the previous mapping.
4257 */
4258 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4259 caddr_t addr = MAX(*saddrp, base);
4260 ulong_t pn = seg_page(seg, addr);
4261 uint_t prot, nprot;
4262
4263 /*
4264 * If we're dealing with noreserve pages, then advance addr to
4265 * the address of the next page which has backing store.
4266 */
4267 if (pagev->pg_incore != NULL) {
4268 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4269 if ((addr += PAGESIZE) == eaddr) {
4270 *saddrp = addr;
4271 prot = 0;
4272 goto out;
4273 }
4274 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4275 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4276 if (addr == eaddr) {
4277 *saddrp = addr;
4278 prot = 0;
4279 goto out;
4280 }
4281 pn = seg_page(seg, addr);
4282 }
4283 }
4284 }
4285
4286 /*
4287 * Get the protections on the page corresponding to addr.
4288 */
4289 pn = seg_page(seg, addr);
4290 ASSERT(pn >= pagev->pg_pnbase);
4291 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4292
4293 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4294 getwatchprot(seg->s_as, addr, &prot);
4295 *saddrp = addr;
4296
4297 /*
4298 * Now loop until we find a backed page with different protections
4299 * or we reach the end of this segment.
4300 */
4301 while ((addr += PAGESIZE) < eaddr) {
4302 /*
4303 * If pn has advanced to the page number following what we
4304 * have information on, refill the page vector and reset
4305 * addr and pn. If pr_pagev_fill does not return the
4306 * address of the next page, we have a discontiguity and
4307 * thus have reached the end of the current mapping.
4308 */
4309 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4310 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4311 if (naddr != addr)
4312 goto out;
4313 pn = seg_page(seg, addr);
4314 }
4315
4316 /*
4317 * The previous page's protections are in prot, and it has
4318 * backing. If this page is MAP_NORESERVE and has no backing,
4319 * then end this mapping and return the previous protections.
4320 */
4321 if (pagev->pg_incore != NULL &&
4322 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4323 break;
4324
4325 /*
4326 * Otherwise end the mapping if this page's protections (nprot)
4327 * are different than those in the previous page (prot).
4328 */
4329 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4330 getwatchprot(seg->s_as, addr, &nprot);
4331
4332 if (nprot != prot)
4333 break;
4334 }
4335
4336 out:
4337 *protp = prot;
4338 return (addr);
4339 }
4340
4341 size_t
4342 pr_getsegsize(struct seg *seg, int reserved)
4343 {
4344 size_t size = seg->s_size;
4345
4346 /*
4347 * If we're interested in the reserved space, return the size of the
4348 * segment itself. Everything else in this function is a special case
4349 * to determine the actual underlying size of various segment types.
4350 */
4351 if (reserved)
4352 return (size);
4353
4354 /*
4355 * If this is a segvn mapping of a regular file, return the smaller
4356 * of the segment size and the remaining size of the file beyond
4357 * the file offset corresponding to seg->s_base.
4358 */
4359 if (seg->s_ops == &segvn_ops) {
4360 vattr_t vattr;
4361 vnode_t *vp;
4362
4363 vattr.va_mask = AT_SIZE;
4364
4365 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4366 vp != NULL && vp->v_type == VREG &&
4367 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4368
4369 u_offset_t fsize = vattr.va_size;
4370 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4371
4372 if (fsize < offset)
4373 fsize = 0;
4374 else
4375 fsize -= offset;
4376
4377 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4378
4379 if (fsize < (u_offset_t)size)
4380 size = (size_t)fsize;
4381 }
4382
4383 return (size);
4384 }
4385
4386 /*
4387 * If this is an ISM shared segment, don't include pages that are
4388 * beyond the real size of the spt segment that backs it.
4389 */
4390 if (seg->s_ops == &segspt_shmops)
4391 return (MIN(spt_realsize(seg), size));
4392
4393 /*
4394 * If this is segment is a mapping from /dev/null, then this is a
4395 * reservation of virtual address space and has no actual size.
4396 * Such segments are backed by segdev and have type set to neither
4397 * MAP_SHARED nor MAP_PRIVATE.
4398 */
4399 if (seg->s_ops == &segdev_ops &&
4400 ((SEGOP_GETTYPE(seg, seg->s_base) &
4401 (MAP_SHARED | MAP_PRIVATE)) == 0))
4402 return (0);
4403
4404 /*
4405 * If this segment doesn't match one of the special types we handle,
4406 * just return the size of the segment itself.
4407 */
4408 return (size);
4409 }
4410
4411 uint_t
4412 pr_getprot(struct seg *seg, int reserved, void **tmp,
4413 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4414 {
4415 struct as *as = seg->s_as;
4416
4417 caddr_t saddr = *saddrp;
4418 caddr_t naddr;
4419
4420 int check_noreserve;
4421 uint_t prot;
4422
4423 union {
4424 struct segvn_data *svd;
4425 struct segdev_data *sdp;
4426 void *data;
4427 } s;
4428
4429 s.data = seg->s_data;
4430
4431 ASSERT(AS_WRITE_HELD(as));
4432 ASSERT(saddr >= seg->s_base && saddr < eaddr);
4433 ASSERT(eaddr <= seg->s_base + seg->s_size);
4434
4435 /*
4436 * Don't include MAP_NORESERVE pages in the address range
4437 * unless their mappings have actually materialized.
4438 * We cheat by knowing that segvn is the only segment
4439 * driver that supports MAP_NORESERVE.
4440 */
4441 check_noreserve =
4442 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4443 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4444 (s.svd->flags & MAP_NORESERVE));
4445
4446 /*
4447 * Examine every page only as a last resort. We use guilty knowledge
4448 * of segvn and segdev to avoid this: if there are no per-page
4449 * protections present in the segment and we don't care about
4450 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4451 */
4452 if (!check_noreserve && saddr == seg->s_base &&
4453 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4454 prot = s.svd->prot;
4455 getwatchprot(as, saddr, &prot);
4456 naddr = eaddr;
4457
4458 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4459 s.sdp != NULL && s.sdp->pageprot == 0) {
4460 prot = s.sdp->prot;
4461 getwatchprot(as, saddr, &prot);
4462 naddr = eaddr;
4463
4464 } else {
4465 prpagev_t *pagev;
4466
4467 /*
4468 * If addr is sitting at the start of the segment, then
4469 * create a page vector to store protection and incore
4470 * information for pages in the segment, and fill it.
4471 * Otherwise, we expect *tmp to address the prpagev_t
4472 * allocated by a previous call to this function.
4473 */
4474 if (saddr == seg->s_base) {
4475 pagev = pr_pagev_create(seg, check_noreserve);
4476 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4477
4478 ASSERT(*tmp == NULL);
4479 *tmp = pagev;
4480
4481 ASSERT(saddr <= eaddr);
4482 *saddrp = saddr;
4483
4484 if (saddr == eaddr) {
4485 naddr = saddr;
4486 prot = 0;
4487 goto out;
4488 }
4489
4490 } else {
4491 ASSERT(*tmp != NULL);
4492 pagev = (prpagev_t *)*tmp;
4493 }
4494
4495 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4496 ASSERT(naddr <= eaddr);
4497 }
4498
4499 out:
4500 if (naddr == eaddr)
4501 pr_getprot_done(tmp);
4502 *naddrp = naddr;
4503 return (prot);
4504 }
4505
4506 void
4507 pr_getprot_done(void **tmp)
4508 {
4509 if (*tmp != NULL) {
4510 pr_pagev_destroy((prpagev_t *)*tmp);
4511 *tmp = NULL;
4512 }
4513 }
4514
4515 /*
4516 * Return true iff the vnode is a /proc file from the object directory.
4517 */
4518 int
4519 pr_isobject(vnode_t *vp)
4520 {
4521 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4522 }
4523
4524 /*
4525 * Return true iff the vnode is a /proc file opened by the process itself.
4526 */
4527 int
4528 pr_isself(vnode_t *vp)
4529 {
4530 /*
4531 * XXX: To retain binary compatibility with the old
4532 * ioctl()-based version of /proc, we exempt self-opens
4533 * of /proc/<pid> from being marked close-on-exec.
4534 */
4535 return (vn_matchops(vp, prvnodeops) &&
4536 (VTOP(vp)->pr_flags & PR_ISSELF) &&
4537 VTOP(vp)->pr_type != PR_PIDDIR);
4538 }
4539
4540 static ssize_t
4541 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4542 {
4543 ssize_t pagesize, hatsize;
4544
4545 ASSERT(AS_WRITE_HELD(seg->s_as));
4546 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4547 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4548 ASSERT(saddr < eaddr);
4549
4550 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4551 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4552 ASSERT(pagesize != 0);
4553
4554 if (pagesize == -1)
4555 pagesize = PAGESIZE;
4556
4557 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4558
4559 while (saddr < eaddr) {
4560 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4561 break;
4562 ASSERT(IS_P2ALIGNED(saddr, pagesize));
4563 saddr += pagesize;
4564 }
4565
4566 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
4567 return (hatsize);
4568 }
4569
4570 /*
4571 * Return an array of structures with extended memory map information.
4572 * We allocate here; the caller must deallocate.
4573 */
4574 int
4575 prgetxmap(proc_t *p, list_t *iolhead)
4576 {
4577 struct as *as = p->p_as;
4578 prxmap_t *mp;
4579 struct seg *seg;
4580 struct seg *brkseg, *stkseg;
4581 struct vnode *vp;
4582 struct vattr vattr;
4583 uint_t prot;
4584
4585 ASSERT(as != &kas && AS_WRITE_HELD(as));
4586
4587 /*
4588 * Request an initial buffer size that doesn't waste memory
4589 * if the address space has only a small number of segments.
4590 */
4591 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4592
4593 if ((seg = AS_SEGFIRST(as)) == NULL)
4594 return (0);
4595
4596 brkseg = break_seg(p);
4597 stkseg = as_segat(as, prgetstackbase(p));
4598
4599 do {
4600 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4601 caddr_t saddr, naddr, baddr;
4602 void *tmp = NULL;
4603 ssize_t psz;
4604 char *parr;
4605 uint64_t npages;
4606 uint64_t pagenum;
4607
4608 if ((seg->s_flags & S_HOLE) != 0) {
4609 continue;
4610 }
4611 /*
4612 * Segment loop part one: iterate from the base of the segment
4613 * to its end, pausing at each address boundary (baddr) between
4614 * ranges that have different virtual memory protections.
4615 */
4616 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4617 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4618 ASSERT(baddr >= saddr && baddr <= eaddr);
4619
4620 /*
4621 * Segment loop part two: iterate from the current
4622 * position to the end of the protection boundary,
4623 * pausing at each address boundary (naddr) between
4624 * ranges that have different underlying page sizes.
4625 */
4626 for (; saddr < baddr; saddr = naddr) {
4627 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4628 ASSERT(naddr >= saddr && naddr <= baddr);
4629
4630 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4631
4632 mp->pr_vaddr = (uintptr_t)saddr;
4633 mp->pr_size = naddr - saddr;
4634 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4635 mp->pr_mflags = 0;
4636 if (prot & PROT_READ)
4637 mp->pr_mflags |= MA_READ;
4638 if (prot & PROT_WRITE)
4639 mp->pr_mflags |= MA_WRITE;
4640 if (prot & PROT_EXEC)
4641 mp->pr_mflags |= MA_EXEC;
4642 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4643 mp->pr_mflags |= MA_SHARED;
4644 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4645 mp->pr_mflags |= MA_NORESERVE;
4646 if (seg->s_ops == &segspt_shmops ||
4647 (seg->s_ops == &segvn_ops &&
4648 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4649 vp == NULL)))
4650 mp->pr_mflags |= MA_ANON;
4651 if (seg == brkseg)
4652 mp->pr_mflags |= MA_BREAK;
4653 else if (seg == stkseg)
4654 mp->pr_mflags |= MA_STACK;
4655 if (seg->s_ops == &segspt_shmops)
4656 mp->pr_mflags |= MA_ISM | MA_SHM;
4657
4658 mp->pr_pagesize = PAGESIZE;
4659 if (psz == -1) {
4660 mp->pr_hatpagesize = 0;
4661 } else {
4662 mp->pr_hatpagesize = psz;
4663 }
4664
4665 /*
4666 * Manufacture a filename for the "object" dir.
4667 */
4668 mp->pr_dev = PRNODEV;
4669 vattr.va_mask = AT_FSID|AT_NODEID;
4670 if (seg->s_ops == &segvn_ops &&
4671 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4672 vp != NULL && vp->v_type == VREG &&
4673 VOP_GETATTR(vp, &vattr, 0, CRED(),
4674 NULL) == 0) {
4675 mp->pr_dev = vattr.va_fsid;
4676 mp->pr_ino = vattr.va_nodeid;
4677 if (vp == p->p_exec)
4678 (void) strcpy(mp->pr_mapname,
4679 "a.out");
4680 else
4681 pr_object_name(mp->pr_mapname,
4682 vp, &vattr);
4683 }
4684
4685 /*
4686 * Get the SysV shared memory id, if any.
4687 */
4688 if ((mp->pr_mflags & MA_SHARED) &&
4689 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4690 seg->s_base)) != SHMID_NONE) {
4691 if (mp->pr_shmid == SHMID_FREE)
4692 mp->pr_shmid = -1;
4693
4694 mp->pr_mflags |= MA_SHM;
4695 } else {
4696 mp->pr_shmid = -1;
4697 }
4698
4699 npages = ((uintptr_t)(naddr - saddr)) >>
4700 PAGESHIFT;
4701 parr = kmem_zalloc(npages, KM_SLEEP);
4702
4703 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4704
4705 for (pagenum = 0; pagenum < npages; pagenum++) {
4706 if (parr[pagenum] & SEG_PAGE_INCORE)
4707 mp->pr_rss++;
4708 if (parr[pagenum] & SEG_PAGE_ANON)
4709 mp->pr_anon++;
4710 if (parr[pagenum] & SEG_PAGE_LOCKED)
4711 mp->pr_locked++;
4712 }
4713 kmem_free(parr, npages);
4714 }
4715 }
4716 ASSERT(tmp == NULL);
4717 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4718
4719 return (0);
4720 }
4721
4722 /*
4723 * Return the process's credentials. We don't need a 32-bit equivalent of
4724 * this function because prcred_t and prcred32_t are actually the same.
4725 */
4726 void
4727 prgetcred(proc_t *p, prcred_t *pcrp)
4728 {
4729 mutex_enter(&p->p_crlock);
4730 cred2prcred(p->p_cred, pcrp);
4731 mutex_exit(&p->p_crlock);
4732 }
4733
4734 void
4735 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4736 {
4737 ASSERT(psfp != NULL);
4738
4739 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4740 psfp->pr_lower = p->p_secflags.psf_lower;
4741 psfp->pr_upper = p->p_secflags.psf_upper;
4742 psfp->pr_effective = p->p_secflags.psf_effective;
4743 psfp->pr_inherit = p->p_secflags.psf_inherit;
4744 }
4745
4746 /*
4747 * Compute actual size of the prpriv_t structure.
4748 */
4749
4750 size_t
4751 prgetprivsize(void)
4752 {
4753 return (priv_prgetprivsize(NULL));
4754 }
4755
4756 /*
4757 * Return the process's privileges. We don't need a 32-bit equivalent of
4758 * this function because prpriv_t and prpriv32_t are actually the same.
4759 */
4760 void
4761 prgetpriv(proc_t *p, prpriv_t *pprp)
4762 {
4763 mutex_enter(&p->p_crlock);
4764 cred2prpriv(p->p_cred, pprp);
4765 mutex_exit(&p->p_crlock);
4766 }
4767
4768 #ifdef _SYSCALL32_IMPL
4769 /*
4770 * Return an array of structures with HAT memory map information.
4771 * We allocate here; the caller must deallocate.
4772 */
4773 int
4774 prgetxmap32(proc_t *p, list_t *iolhead)
4775 {
4776 struct as *as = p->p_as;
4777 prxmap32_t *mp;
4778 struct seg *seg;
4779 struct seg *brkseg, *stkseg;
4780 struct vnode *vp;
4781 struct vattr vattr;
4782 uint_t prot;
4783
4784 ASSERT(as != &kas && AS_WRITE_HELD(as));
4785
4786 /*
4787 * Request an initial buffer size that doesn't waste memory
4788 * if the address space has only a small number of segments.
4789 */
4790 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4791
4792 if ((seg = AS_SEGFIRST(as)) == NULL)
4793 return (0);
4794
4795 brkseg = break_seg(p);
4796 stkseg = as_segat(as, prgetstackbase(p));
4797
4798 do {
4799 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4800 caddr_t saddr, naddr, baddr;
4801 void *tmp = NULL;
4802 ssize_t psz;
4803 char *parr;
4804 uint64_t npages;
4805 uint64_t pagenum;
4806
4807 if ((seg->s_flags & S_HOLE) != 0) {
4808 continue;
4809 }
4810
4811 /*
4812 * Segment loop part one: iterate from the base of the segment
4813 * to its end, pausing at each address boundary (baddr) between
4814 * ranges that have different virtual memory protections.
4815 */
4816 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4817 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4818 ASSERT(baddr >= saddr && baddr <= eaddr);
4819
4820 /*
4821 * Segment loop part two: iterate from the current
4822 * position to the end of the protection boundary,
4823 * pausing at each address boundary (naddr) between
4824 * ranges that have different underlying page sizes.
4825 */
4826 for (; saddr < baddr; saddr = naddr) {
4827 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4828 ASSERT(naddr >= saddr && naddr <= baddr);
4829
4830 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4831
4832 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4833 mp->pr_size = (size32_t)(naddr - saddr);
4834 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4835 mp->pr_mflags = 0;
4836 if (prot & PROT_READ)
4837 mp->pr_mflags |= MA_READ;
4838 if (prot & PROT_WRITE)
4839 mp->pr_mflags |= MA_WRITE;
4840 if (prot & PROT_EXEC)
4841 mp->pr_mflags |= MA_EXEC;
4842 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4843 mp->pr_mflags |= MA_SHARED;
4844 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4845 mp->pr_mflags |= MA_NORESERVE;
4846 if (seg->s_ops == &segspt_shmops ||
4847 (seg->s_ops == &segvn_ops &&
4848 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4849 vp == NULL)))
4850 mp->pr_mflags |= MA_ANON;
4851 if (seg == brkseg)
4852 mp->pr_mflags |= MA_BREAK;
4853 else if (seg == stkseg)
4854 mp->pr_mflags |= MA_STACK;
4855 if (seg->s_ops == &segspt_shmops)
4856 mp->pr_mflags |= MA_ISM | MA_SHM;
4857
4858 mp->pr_pagesize = PAGESIZE;
4859 if (psz == -1) {
4860 mp->pr_hatpagesize = 0;
4861 } else {
4862 mp->pr_hatpagesize = psz;
4863 }
4864
4865 /*
4866 * Manufacture a filename for the "object" dir.
4867 */
4868 mp->pr_dev = PRNODEV32;
4869 vattr.va_mask = AT_FSID|AT_NODEID;
4870 if (seg->s_ops == &segvn_ops &&
4871 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4872 vp != NULL && vp->v_type == VREG &&
4873 VOP_GETATTR(vp, &vattr, 0, CRED(),
4874 NULL) == 0) {
4875 (void) cmpldev(&mp->pr_dev,
4876 vattr.va_fsid);
4877 mp->pr_ino = vattr.va_nodeid;
4878 if (vp == p->p_exec)
4879 (void) strcpy(mp->pr_mapname,
4880 "a.out");
4881 else
4882 pr_object_name(mp->pr_mapname,
4883 vp, &vattr);
4884 }
4885
4886 /*
4887 * Get the SysV shared memory id, if any.
4888 */
4889 if ((mp->pr_mflags & MA_SHARED) &&
4890 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4891 seg->s_base)) != SHMID_NONE) {
4892 if (mp->pr_shmid == SHMID_FREE)
4893 mp->pr_shmid = -1;
4894
4895 mp->pr_mflags |= MA_SHM;
4896 } else {
4897 mp->pr_shmid = -1;
4898 }
4899
4900 npages = ((uintptr_t)(naddr - saddr)) >>
4901 PAGESHIFT;
4902 parr = kmem_zalloc(npages, KM_SLEEP);
4903
4904 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4905
4906 for (pagenum = 0; pagenum < npages; pagenum++) {
4907 if (parr[pagenum] & SEG_PAGE_INCORE)
4908 mp->pr_rss++;
4909 if (parr[pagenum] & SEG_PAGE_ANON)
4910 mp->pr_anon++;
4911 if (parr[pagenum] & SEG_PAGE_LOCKED)
4912 mp->pr_locked++;
4913 }
4914 kmem_free(parr, npages);
4915 }
4916 }
4917 ASSERT(tmp == NULL);
4918 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4919
4920 return (0);
4921 }
4922 #endif /* _SYSCALL32_IMPL */