Print this page
uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap. Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/proc/prsubr.c
+++ new/usr/src/uts/common/fs/proc/prsubr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 28 /* All Rights Reserved */
29 29
30 30 #include <sys/types.h>
31 31 #include <sys/t_lock.h>
32 32 #include <sys/param.h>
33 33 #include <sys/cmn_err.h>
34 34 #include <sys/cred.h>
35 35 #include <sys/priv.h>
36 36 #include <sys/debug.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/inline.h>
39 39 #include <sys/kmem.h>
40 40 #include <sys/mman.h>
41 41 #include <sys/proc.h>
42 42 #include <sys/brand.h>
43 43 #include <sys/sobject.h>
44 44 #include <sys/sysmacros.h>
45 45 #include <sys/systm.h>
46 46 #include <sys/uio.h>
47 47 #include <sys/var.h>
48 48 #include <sys/vfs.h>
49 49 #include <sys/vnode.h>
50 50 #include <sys/session.h>
51 51 #include <sys/pcb.h>
52 52 #include <sys/signal.h>
53 53 #include <sys/user.h>
54 54 #include <sys/disp.h>
55 55 #include <sys/class.h>
56 56 #include <sys/ts.h>
57 57 #include <sys/bitmap.h>
58 58 #include <sys/poll.h>
59 59 #include <sys/shm_impl.h>
60 60 #include <sys/fault.h>
61 61 #include <sys/syscall.h>
62 62 #include <sys/procfs.h>
63 63 #include <sys/processor.h>
64 64 #include <sys/cpuvar.h>
65 65 #include <sys/copyops.h>
66 66 #include <sys/time.h>
67 67 #include <sys/msacct.h>
68 68 #include <vm/as.h>
69 69 #include <vm/rm.h>
70 70 #include <vm/seg.h>
71 71 #include <vm/seg_vn.h>
72 72 #include <vm/seg_dev.h>
73 73 #include <vm/seg_spt.h>
74 74 #include <vm/page.h>
75 75 #include <sys/vmparam.h>
76 76 #include <sys/swap.h>
77 77 #include <fs/proc/prdata.h>
78 78 #include <sys/task.h>
79 79 #include <sys/project.h>
80 80 #include <sys/contract_impl.h>
81 81 #include <sys/contract/process.h>
82 82 #include <sys/contract/process_impl.h>
83 83 #include <sys/schedctl.h>
84 84 #include <sys/pool.h>
85 85 #include <sys/zone.h>
86 86 #include <sys/atomic.h>
87 87 #include <sys/sdt.h>
88 88
89 89 #define MAX_ITERS_SPIN 5
90 90
91 91 typedef struct prpagev {
92 92 uint_t *pg_protv; /* vector of page permissions */
93 93 char *pg_incore; /* vector of incore flags */
94 94 size_t pg_npages; /* number of pages in protv and incore */
95 95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 96 } prpagev_t;
97 97
98 98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
99 99
100 100 extern struct seg_ops segdev_ops; /* needs a header file */
101 101 extern struct seg_ops segspt_shmops; /* needs a header file */
102 102
103 103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
105 105
106 106 /*
107 107 * Choose an lwp from the complete set of lwps for the process.
108 108 * This is called for any operation applied to the process
109 109 * file descriptor that requires an lwp to operate upon.
110 110 *
111 111 * Returns a pointer to the thread for the selected LWP,
112 112 * and with the dispatcher lock held for the thread.
113 113 *
114 114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 115 * don't touch this code unless you know all of the implications.
116 116 */
117 117 kthread_t *
118 118 prchoose(proc_t *p)
119 119 {
120 120 kthread_t *t;
121 121 kthread_t *t_onproc = NULL; /* running on processor */
122 122 kthread_t *t_run = NULL; /* runnable, on disp queue */
123 123 kthread_t *t_sleep = NULL; /* sleeping */
124 124 kthread_t *t_hold = NULL; /* sleeping, performing hold */
125 125 kthread_t *t_susp = NULL; /* suspended stop */
126 126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
127 127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
128 128 kthread_t *t_req = NULL; /* requested stop */
129 129 kthread_t *t_istop = NULL; /* event-of-interest stop */
130 130 kthread_t *t_dtrace = NULL; /* DTrace stop */
131 131
132 132 ASSERT(MUTEX_HELD(&p->p_lock));
133 133
134 134 /*
135 135 * If the agent lwp exists, it takes precedence over all others.
136 136 */
137 137 if ((t = p->p_agenttp) != NULL) {
138 138 thread_lock(t);
139 139 return (t);
140 140 }
141 141
142 142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
143 143 return (t);
144 144 do { /* for eacn lwp in the process */
145 145 if (VSTOPPED(t)) { /* virtually stopped */
146 146 if (t_req == NULL)
147 147 t_req = t;
148 148 continue;
149 149 }
150 150
151 151 thread_lock(t); /* make sure thread is in good state */
152 152 switch (t->t_state) {
153 153 default:
154 154 panic("prchoose: bad thread state %d, thread 0x%p",
155 155 t->t_state, (void *)t);
156 156 /*NOTREACHED*/
157 157 case TS_SLEEP:
158 158 /* this is filthy */
159 159 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 160 t->t_wchan0 == NULL) {
161 161 if (t_hold == NULL)
162 162 t_hold = t;
163 163 } else {
164 164 if (t_sleep == NULL)
165 165 t_sleep = t;
166 166 }
167 167 break;
168 168 case TS_RUN:
169 169 case TS_WAIT:
170 170 if (t_run == NULL)
171 171 t_run = t;
172 172 break;
173 173 case TS_ONPROC:
174 174 if (t_onproc == NULL)
175 175 t_onproc = t;
176 176 break;
177 177 case TS_ZOMB: /* last possible choice */
178 178 break;
179 179 case TS_STOPPED:
180 180 switch (t->t_whystop) {
181 181 case PR_SUSPENDED:
182 182 if (t_susp == NULL)
183 183 t_susp = t;
184 184 break;
185 185 case PR_JOBCONTROL:
186 186 if (t->t_proc_flag & TP_PRSTOP) {
187 187 if (t_jdstop == NULL)
188 188 t_jdstop = t;
189 189 } else {
190 190 if (t_jstop == NULL)
191 191 t_jstop = t;
192 192 }
193 193 break;
194 194 case PR_REQUESTED:
195 195 if (t->t_dtrace_stop && t_dtrace == NULL)
196 196 t_dtrace = t;
197 197 else if (t_req == NULL)
198 198 t_req = t;
199 199 break;
200 200 case PR_SYSENTRY:
201 201 case PR_SYSEXIT:
202 202 case PR_SIGNALLED:
203 203 case PR_FAULTED:
204 204 /*
205 205 * Make an lwp calling exit() be the
206 206 * last lwp seen in the process.
207 207 */
208 208 if (t_istop == NULL ||
209 209 (t_istop->t_whystop == PR_SYSENTRY &&
210 210 t_istop->t_whatstop == SYS_exit))
211 211 t_istop = t;
212 212 break;
213 213 case PR_CHECKPOINT: /* can't happen? */
214 214 break;
215 215 default:
216 216 panic("prchoose: bad t_whystop %d, thread 0x%p",
217 217 t->t_whystop, (void *)t);
218 218 /*NOTREACHED*/
219 219 }
220 220 break;
221 221 }
222 222 thread_unlock(t);
223 223 } while ((t = t->t_forw) != p->p_tlist);
224 224
225 225 if (t_onproc)
226 226 t = t_onproc;
227 227 else if (t_run)
228 228 t = t_run;
229 229 else if (t_sleep)
230 230 t = t_sleep;
231 231 else if (t_jstop)
232 232 t = t_jstop;
233 233 else if (t_jdstop)
234 234 t = t_jdstop;
235 235 else if (t_istop)
236 236 t = t_istop;
237 237 else if (t_dtrace)
238 238 t = t_dtrace;
239 239 else if (t_req)
240 240 t = t_req;
241 241 else if (t_hold)
242 242 t = t_hold;
243 243 else if (t_susp)
244 244 t = t_susp;
245 245 else /* TS_ZOMB */
246 246 t = p->p_tlist;
247 247
248 248 if (t != NULL)
249 249 thread_lock(t);
250 250 return (t);
251 251 }
252 252
253 253 /*
254 254 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255 255 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256 256 * on the /proc file descriptor. Called from stop() when a traced
257 257 * process stops on an event of interest. Also called from exit()
258 258 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
259 259 */
260 260 void
261 261 prnotify(struct vnode *vp)
262 262 {
263 263 prcommon_t *pcp = VTOP(vp)->pr_common;
264 264
265 265 mutex_enter(&pcp->prc_mutex);
266 266 cv_broadcast(&pcp->prc_wait);
267 267 mutex_exit(&pcp->prc_mutex);
268 268 if (pcp->prc_flags & PRC_POLL) {
269 269 /*
270 270 * We call pollwakeup() with POLLHUP to ensure that
271 271 * the pollers are awakened even if they are polling
272 272 * for nothing (i.e., waiting for the process to exit).
273 273 * This enables the use of the PRC_POLL flag for optimization
274 274 * (we can turn off PRC_POLL only if we know no pollers remain).
275 275 */
276 276 pcp->prc_flags &= ~PRC_POLL;
277 277 pollwakeup(&pcp->prc_pollhead, POLLHUP);
278 278 }
279 279 }
280 280
281 281 /* called immediately below, in prfree() */
282 282 static void
283 283 prfreenotify(vnode_t *vp)
284 284 {
285 285 prnode_t *pnp;
286 286 prcommon_t *pcp;
287 287
288 288 while (vp != NULL) {
289 289 pnp = VTOP(vp);
290 290 pcp = pnp->pr_common;
291 291 ASSERT(pcp->prc_thread == NULL);
292 292 pcp->prc_proc = NULL;
293 293 /*
294 294 * We can't call prnotify() here because we are holding
295 295 * pidlock. We assert that there is no need to.
296 296 */
297 297 mutex_enter(&pcp->prc_mutex);
298 298 cv_broadcast(&pcp->prc_wait);
299 299 mutex_exit(&pcp->prc_mutex);
300 300 ASSERT(!(pcp->prc_flags & PRC_POLL));
301 301
302 302 vp = pnp->pr_next;
303 303 pnp->pr_next = NULL;
304 304 }
305 305 }
306 306
307 307 /*
308 308 * Called from a hook in freeproc() when a traced process is removed
309 309 * from the process table. The proc-table pointers of all associated
310 310 * /proc vnodes are cleared to indicate that the process has gone away.
311 311 */
312 312 void
313 313 prfree(proc_t *p)
314 314 {
315 315 uint_t slot = p->p_slot;
316 316
317 317 ASSERT(MUTEX_HELD(&pidlock));
318 318
319 319 /*
320 320 * Block the process against /proc so it can be freed.
321 321 * It cannot be freed while locked by some controlling process.
322 322 * Lock ordering:
323 323 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
324 324 */
325 325 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
326 326 mutex_enter(&p->p_lock);
327 327 while (p->p_proc_flag & P_PR_LOCK) {
328 328 mutex_exit(&pr_pidlock);
329 329 cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 330 mutex_exit(&p->p_lock);
331 331 mutex_enter(&pr_pidlock);
332 332 mutex_enter(&p->p_lock);
333 333 }
334 334
335 335 ASSERT(p->p_tlist == NULL);
336 336
337 337 prfreenotify(p->p_plist);
338 338 p->p_plist = NULL;
339 339
340 340 prfreenotify(p->p_trace);
341 341 p->p_trace = NULL;
342 342
343 343 /*
344 344 * We broadcast to wake up everyone waiting for this process.
345 345 * No one can reach this process from this point on.
346 346 */
347 347 cv_broadcast(&pr_pid_cv[slot]);
348 348
349 349 mutex_exit(&p->p_lock);
350 350 mutex_exit(&pr_pidlock);
351 351 }
352 352
353 353 /*
354 354 * Called from a hook in exit() when a traced process is becoming a zombie.
355 355 */
356 356 void
357 357 prexit(proc_t *p)
358 358 {
359 359 ASSERT(MUTEX_HELD(&p->p_lock));
360 360
361 361 if (pr_watch_active(p)) {
362 362 pr_free_watchpoints(p);
363 363 watch_disable(curthread);
364 364 }
365 365 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 366 if (p->p_trace) {
367 367 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 368 prnotify(p->p_trace);
369 369 }
370 370 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
371 371 }
372 372
373 373 /*
374 374 * Called when a thread calls lwp_exit().
375 375 */
376 376 void
377 377 prlwpexit(kthread_t *t)
378 378 {
379 379 vnode_t *vp;
380 380 prnode_t *pnp;
381 381 prcommon_t *pcp;
382 382 proc_t *p = ttoproc(t);
383 383 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
384 384
385 385 ASSERT(t == curthread);
386 386 ASSERT(MUTEX_HELD(&p->p_lock));
387 387
388 388 /*
389 389 * The process must be blocked against /proc to do this safely.
390 390 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 391 * It is the caller's responsibility to have called prbarrier(p).
392 392 */
393 393 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
394 394
395 395 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 396 pnp = VTOP(vp);
397 397 pcp = pnp->pr_common;
398 398 if (pcp->prc_thread == t) {
399 399 pcp->prc_thread = NULL;
400 400 pcp->prc_flags |= PRC_DESTROY;
401 401 }
402 402 }
403 403
404 404 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 405 pnp = VTOP(vp);
406 406 pcp = pnp->pr_common;
407 407 pcp->prc_thread = NULL;
408 408 pcp->prc_flags |= PRC_DESTROY;
409 409 prnotify(vp);
410 410 }
411 411
412 412 if (p->p_trace)
413 413 prnotify(p->p_trace);
414 414 }
415 415
416 416 /*
417 417 * Called when a zombie thread is joined or when a
418 418 * detached lwp exits. Called from lwp_hash_out().
419 419 */
420 420 void
421 421 prlwpfree(proc_t *p, lwpent_t *lep)
422 422 {
423 423 vnode_t *vp;
424 424 prnode_t *pnp;
425 425 prcommon_t *pcp;
426 426
427 427 ASSERT(MUTEX_HELD(&p->p_lock));
428 428
429 429 /*
430 430 * The process must be blocked against /proc to do this safely.
431 431 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 432 * It is the caller's responsibility to have called prbarrier(p).
433 433 */
434 434 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
435 435
436 436 vp = lep->le_trace;
437 437 lep->le_trace = NULL;
438 438 while (vp) {
439 439 prnotify(vp);
440 440 pnp = VTOP(vp);
441 441 pcp = pnp->pr_common;
442 442 ASSERT(pcp->prc_thread == NULL &&
443 443 (pcp->prc_flags & PRC_DESTROY));
444 444 pcp->prc_tslot = -1;
445 445 vp = pnp->pr_next;
446 446 pnp->pr_next = NULL;
447 447 }
448 448
449 449 if (p->p_trace)
450 450 prnotify(p->p_trace);
451 451 }
452 452
453 453 /*
454 454 * Called from a hook in exec() when a thread starts exec().
455 455 */
456 456 void
457 457 prexecstart(void)
458 458 {
459 459 proc_t *p = ttoproc(curthread);
460 460 klwp_t *lwp = ttolwp(curthread);
461 461
462 462 /*
463 463 * The P_PR_EXEC flag blocks /proc operations for
464 464 * the duration of the exec().
465 465 * We can't start exec() while the process is
466 466 * locked by /proc, so we call prbarrier().
467 467 * lwp_nostop keeps the process from being stopped
468 468 * via job control for the duration of the exec().
469 469 */
470 470
471 471 ASSERT(MUTEX_HELD(&p->p_lock));
472 472 prbarrier(p);
473 473 lwp->lwp_nostop++;
474 474 p->p_proc_flag |= P_PR_EXEC;
475 475 }
476 476
477 477 /*
478 478 * Called from a hook in exec() when a thread finishes exec().
479 479 * The thread may or may not have succeeded. Some other thread
480 480 * may have beat it to the punch.
481 481 */
482 482 void
483 483 prexecend(void)
484 484 {
485 485 proc_t *p = ttoproc(curthread);
486 486 klwp_t *lwp = ttolwp(curthread);
487 487 vnode_t *vp;
488 488 prnode_t *pnp;
489 489 prcommon_t *pcp;
490 490 model_t model = p->p_model;
491 491 id_t tid = curthread->t_tid;
492 492 int tslot = curthread->t_dslot;
493 493
494 494 ASSERT(MUTEX_HELD(&p->p_lock));
495 495
496 496 lwp->lwp_nostop--;
497 497 if (p->p_flag & SEXITLWPS) {
498 498 /*
499 499 * We are on our way to exiting because some
500 500 * other thread beat us in the race to exec().
501 501 * Don't clear the P_PR_EXEC flag in this case.
502 502 */
503 503 return;
504 504 }
505 505
506 506 /*
507 507 * Wake up anyone waiting in /proc for the process to complete exec().
508 508 */
509 509 p->p_proc_flag &= ~P_PR_EXEC;
510 510 if ((vp = p->p_trace) != NULL) {
511 511 pcp = VTOP(vp)->pr_common;
512 512 mutex_enter(&pcp->prc_mutex);
513 513 cv_broadcast(&pcp->prc_wait);
514 514 mutex_exit(&pcp->prc_mutex);
515 515 for (; vp != NULL; vp = pnp->pr_next) {
516 516 pnp = VTOP(vp);
517 517 pnp->pr_common->prc_datamodel = model;
518 518 }
519 519 }
520 520 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
521 521 /*
522 522 * We dealt with the process common above.
523 523 */
524 524 ASSERT(p->p_trace != NULL);
525 525 pcp = VTOP(vp)->pr_common;
526 526 mutex_enter(&pcp->prc_mutex);
527 527 cv_broadcast(&pcp->prc_wait);
528 528 mutex_exit(&pcp->prc_mutex);
529 529 for (; vp != NULL; vp = pnp->pr_next) {
530 530 pnp = VTOP(vp);
531 531 pcp = pnp->pr_common;
532 532 pcp->prc_datamodel = model;
533 533 pcp->prc_tid = tid;
534 534 pcp->prc_tslot = tslot;
535 535 }
536 536 }
537 537 }
538 538
539 539 /*
540 540 * Called from a hook in relvm() just before freeing the address space.
541 541 * We free all the watched areas now.
542 542 */
543 543 void
544 544 prrelvm(void)
545 545 {
546 546 proc_t *p = ttoproc(curthread);
547 547
548 548 mutex_enter(&p->p_lock);
549 549 prbarrier(p); /* block all other /proc operations */
550 550 if (pr_watch_active(p)) {
551 551 pr_free_watchpoints(p);
552 552 watch_disable(curthread);
553 553 }
554 554 mutex_exit(&p->p_lock);
555 555 pr_free_watched_pages(p);
556 556 }
557 557
558 558 /*
559 559 * Called from hooks in exec-related code when a traced process
560 560 * attempts to exec(2) a setuid/setgid program or an unreadable
561 561 * file. Rather than fail the exec we invalidate the associated
562 562 * /proc vnodes so that subsequent attempts to use them will fail.
563 563 *
564 564 * All /proc vnodes, except directory vnodes, are retained on a linked
565 565 * list (rooted at p_plist in the process structure) until last close.
566 566 *
567 567 * A controlling process must re-open the /proc files in order to
568 568 * regain control.
569 569 */
570 570 void
571 571 prinvalidate(struct user *up)
572 572 {
573 573 kthread_t *t = curthread;
574 574 proc_t *p = ttoproc(t);
575 575 vnode_t *vp;
576 576 prnode_t *pnp;
577 577 int writers = 0;
578 578
579 579 mutex_enter(&p->p_lock);
580 580 prbarrier(p); /* block all other /proc operations */
581 581
582 582 /*
583 583 * At this moment, there can be only one lwp in the process.
584 584 */
585 585 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
586 586
587 587 /*
588 588 * Invalidate any currently active /proc vnodes.
589 589 */
590 590 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 591 pnp = VTOP(vp);
592 592 switch (pnp->pr_type) {
593 593 case PR_PSINFO: /* these files can read by anyone */
594 594 case PR_LPSINFO:
595 595 case PR_LWPSINFO:
596 596 case PR_LWPDIR:
597 597 case PR_LWPIDDIR:
598 598 case PR_USAGE:
599 599 case PR_LUSAGE:
600 600 case PR_LWPUSAGE:
601 601 break;
602 602 default:
603 603 pnp->pr_flags |= PR_INVAL;
604 604 break;
605 605 }
606 606 }
607 607 /*
608 608 * Wake up anyone waiting for the process or lwp.
609 609 * p->p_trace is guaranteed to be non-NULL if there
610 610 * are any open /proc files for this process.
611 611 */
612 612 if ((vp = p->p_trace) != NULL) {
613 613 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
614 614
615 615 prnotify(vp);
616 616 /*
617 617 * Are there any writers?
618 618 */
619 619 if ((writers = pcp->prc_writers) != 0) {
620 620 /*
621 621 * Clear the exclusive open flag (old /proc interface).
622 622 * Set prc_selfopens equal to prc_writers so that
623 623 * the next O_EXCL|O_WRITE open will succeed
624 624 * even with existing (though invalid) writers.
625 625 * prclose() must decrement prc_selfopens when
626 626 * the invalid files are closed.
627 627 */
628 628 pcp->prc_flags &= ~PRC_EXCL;
629 629 ASSERT(pcp->prc_selfopens <= writers);
630 630 pcp->prc_selfopens = writers;
631 631 }
632 632 }
633 633 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 634 while (vp != NULL) {
635 635 /*
636 636 * We should not invalidate the lwpiddir vnodes,
637 637 * but the necessities of maintaining the old
638 638 * ioctl()-based version of /proc require it.
639 639 */
640 640 pnp = VTOP(vp);
641 641 pnp->pr_flags |= PR_INVAL;
642 642 prnotify(vp);
643 643 vp = pnp->pr_next;
644 644 }
645 645
646 646 /*
647 647 * If any tracing flags are in effect and any vnodes are open for
648 648 * writing then set the requested-stop and run-on-last-close flags.
649 649 * Otherwise, clear all tracing flags.
650 650 */
651 651 t->t_proc_flag &= ~TP_PAUSE;
652 652 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 653 t->t_proc_flag |= TP_PRSTOP;
654 654 aston(t); /* so ISSIG will see the flag */
655 655 p->p_proc_flag |= P_PR_RUNLCL;
656 656 } else {
657 657 premptyset(&up->u_entrymask); /* syscalls */
658 658 premptyset(&up->u_exitmask);
659 659 up->u_systrap = 0;
660 660 premptyset(&p->p_sigmask); /* signals */
661 661 premptyset(&p->p_fltmask); /* faults */
662 662 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 663 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 664 prnostep(ttolwp(t));
665 665 }
666 666
667 667 mutex_exit(&p->p_lock);
668 668 }
669 669
670 670 /*
671 671 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672 672 * Return with pr_pidlock held in all cases.
673 673 * Return with p_lock held if the the process still exists.
674 674 * Return value is the process pointer if the process still exists, else NULL.
675 675 * If we lock the process, give ourself kernel priority to avoid deadlocks;
676 676 * this is undone in prunlock().
677 677 */
678 678 proc_t *
679 679 pr_p_lock(prnode_t *pnp)
680 680 {
681 681 proc_t *p;
682 682 prcommon_t *pcp;
683 683
684 684 mutex_enter(&pr_pidlock);
685 685 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 686 return (NULL);
687 687 mutex_enter(&p->p_lock);
688 688 while (p->p_proc_flag & P_PR_LOCK) {
689 689 /*
690 690 * This cv/mutex pair is persistent even if
691 691 * the process disappears while we sleep.
692 692 */
693 693 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 694 kmutex_t *mp = &p->p_lock;
695 695
696 696 mutex_exit(&pr_pidlock);
697 697 cv_wait(cv, mp);
698 698 mutex_exit(mp);
699 699 mutex_enter(&pr_pidlock);
700 700 if (pcp->prc_proc == NULL)
701 701 return (NULL);
702 702 ASSERT(p == pcp->prc_proc);
703 703 mutex_enter(&p->p_lock);
704 704 }
705 705 p->p_proc_flag |= P_PR_LOCK;
706 706 THREAD_KPRI_REQUEST();
707 707 return (p);
708 708 }
709 709
710 710 /*
711 711 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
712 712 * This prevents any lwp of the process from disappearing and
713 713 * blocks most operations that a process can perform on itself.
714 714 * Returns 0 on success, a non-zero error number on failure.
715 715 *
716 716 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
717 717 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
718 718 *
719 719 * error returns:
720 720 * ENOENT: process or lwp has disappeared or process is exiting
721 721 * (or has become a zombie and zdisp == ZNO).
722 722 * EAGAIN: procfs vnode has become invalid.
723 723 * EINTR: signal arrived while waiting for exec to complete.
724 724 */
725 725 int
726 726 prlock(prnode_t *pnp, int zdisp)
727 727 {
728 728 prcommon_t *pcp;
729 729 proc_t *p;
730 730
731 731 again:
732 732 pcp = pnp->pr_common;
733 733 p = pr_p_lock(pnp);
734 734 mutex_exit(&pr_pidlock);
735 735
736 736 /*
737 737 * Return ENOENT immediately if there is no process.
738 738 */
739 739 if (p == NULL)
740 740 return (ENOENT);
741 741
742 742 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
743 743
744 744 /*
745 745 * Return ENOENT if process entered zombie state or is exiting
746 746 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
747 747 */
748 748 if (zdisp == ZNO &&
749 749 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
750 750 prunlock(pnp);
751 751 return (ENOENT);
752 752 }
753 753
754 754 /*
755 755 * If lwp-specific, check to see if lwp has disappeared.
756 756 */
757 757 if (pcp->prc_flags & PRC_LWP) {
758 758 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
759 759 pcp->prc_tslot == -1) {
760 760 prunlock(pnp);
761 761 return (ENOENT);
762 762 }
763 763 }
764 764
765 765 /*
766 766 * Return EAGAIN if we have encountered a security violation.
767 767 * (The process exec'd a set-id or unreadable executable file.)
768 768 */
769 769 if (pnp->pr_flags & PR_INVAL) {
770 770 prunlock(pnp);
771 771 return (EAGAIN);
772 772 }
773 773
774 774 /*
775 775 * If process is undergoing an exec(), wait for
776 776 * completion and then start all over again.
777 777 */
778 778 if (p->p_proc_flag & P_PR_EXEC) {
779 779 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
780 780 mutex_enter(&pcp->prc_mutex);
781 781 prunlock(pnp);
782 782 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
783 783 mutex_exit(&pcp->prc_mutex);
784 784 return (EINTR);
785 785 }
786 786 mutex_exit(&pcp->prc_mutex);
787 787 goto again;
788 788 }
789 789
790 790 /*
791 791 * We return holding p->p_lock.
792 792 */
793 793 return (0);
794 794 }
795 795
796 796 /*
797 797 * Undo prlock() and pr_p_lock().
798 798 * p->p_lock is still held; pr_pidlock is no longer held.
799 799 *
800 800 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
801 801 * if any, waiting for the flag to be dropped; it retains p->p_lock.
802 802 *
803 803 * prunlock() calls prunmark() and then drops p->p_lock.
804 804 */
805 805 void
806 806 prunmark(proc_t *p)
807 807 {
808 808 ASSERT(p->p_proc_flag & P_PR_LOCK);
809 809 ASSERT(MUTEX_HELD(&p->p_lock));
810 810
811 811 cv_signal(&pr_pid_cv[p->p_slot]);
812 812 p->p_proc_flag &= ~P_PR_LOCK;
813 813 THREAD_KPRI_RELEASE();
814 814 }
815 815
816 816 void
817 817 prunlock(prnode_t *pnp)
818 818 {
819 819 prcommon_t *pcp = pnp->pr_common;
820 820 proc_t *p = pcp->prc_proc;
821 821
822 822 /*
823 823 * If we (or someone) gave it a SIGKILL, and it is not
824 824 * already a zombie, set it running unconditionally.
825 825 */
826 826 if ((p->p_flag & SKILLED) &&
827 827 !(p->p_flag & SEXITING) &&
828 828 !(pcp->prc_flags & PRC_DESTROY) &&
829 829 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
830 830 (void) pr_setrun(pnp, 0);
831 831 prunmark(p);
832 832 mutex_exit(&p->p_lock);
833 833 }
834 834
835 835 /*
836 836 * Called while holding p->p_lock to delay until the process is unlocked.
837 837 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
838 838 * The process cannot become locked again until p->p_lock is dropped.
839 839 */
840 840 void
841 841 prbarrier(proc_t *p)
842 842 {
843 843 ASSERT(MUTEX_HELD(&p->p_lock));
844 844
845 845 if (p->p_proc_flag & P_PR_LOCK) {
846 846 /* The process is locked; delay until not locked */
847 847 uint_t slot = p->p_slot;
848 848
849 849 while (p->p_proc_flag & P_PR_LOCK)
850 850 cv_wait(&pr_pid_cv[slot], &p->p_lock);
851 851 cv_signal(&pr_pid_cv[slot]);
852 852 }
853 853 }
854 854
855 855 /*
856 856 * Return process/lwp status.
857 857 * The u-block is mapped in by this routine and unmapped at the end.
858 858 */
859 859 void
860 860 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
861 861 {
862 862 kthread_t *t;
863 863
864 864 ASSERT(MUTEX_HELD(&p->p_lock));
865 865
866 866 t = prchoose(p); /* returns locked thread */
867 867 ASSERT(t != NULL);
868 868 thread_unlock(t);
869 869
870 870 /* just bzero the process part, prgetlwpstatus() does the rest */
871 871 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
872 872 sp->pr_nlwp = p->p_lwpcnt;
873 873 sp->pr_nzomb = p->p_zombcnt;
874 874 prassignset(&sp->pr_sigpend, &p->p_sig);
875 875 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
876 876 sp->pr_brksize = p->p_brksize;
877 877 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
878 878 sp->pr_stksize = p->p_stksize;
879 879 sp->pr_pid = p->p_pid;
880 880 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
881 881 (p->p_flag & SZONETOP)) {
882 882 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
883 883 /*
884 884 * Inside local zones, fake zsched's pid as parent pids for
885 885 * processes which reference processes outside of the zone.
↓ open down ↓ |
885 lines elided |
↑ open up ↑ |
886 886 */
887 887 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
888 888 } else {
889 889 sp->pr_ppid = p->p_ppid;
890 890 }
891 891 sp->pr_pgid = p->p_pgrp;
892 892 sp->pr_sid = p->p_sessp->s_sid;
893 893 sp->pr_taskid = p->p_task->tk_tkid;
894 894 sp->pr_projid = p->p_task->tk_proj->kpj_id;
895 895 sp->pr_zoneid = p->p_zone->zone_id;
896 + bcopy(&p->p_secflags, &sp->pr_secflags, sizeof (psecflags_t));
896 897 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
897 898 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
898 899 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
899 900 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
900 901 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
901 902 prassignset(&sp->pr_flttrace, &p->p_fltmask);
902 903 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
903 904 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
904 905 switch (p->p_model) {
905 906 case DATAMODEL_ILP32:
906 907 sp->pr_dmodel = PR_MODEL_ILP32;
907 908 break;
908 909 case DATAMODEL_LP64:
909 910 sp->pr_dmodel = PR_MODEL_LP64;
910 911 break;
911 912 }
912 913 if (p->p_agenttp)
913 914 sp->pr_agentid = p->p_agenttp->t_tid;
914 915
915 916 /* get the chosen lwp's status */
916 917 prgetlwpstatus(t, &sp->pr_lwp, zp);
917 918
918 919 /* replicate the flags */
919 920 sp->pr_flags = sp->pr_lwp.pr_flags;
920 921 }
921 922
922 923 #ifdef _SYSCALL32_IMPL
923 924 void
924 925 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
925 926 {
926 927 proc_t *p = ttoproc(t);
927 928 klwp_t *lwp = ttolwp(t);
928 929 struct mstate *ms = &lwp->lwp_mstate;
929 930 hrtime_t usr, sys;
930 931 int flags;
931 932 ulong_t instr;
932 933
933 934 ASSERT(MUTEX_HELD(&p->p_lock));
934 935
935 936 bzero(sp, sizeof (*sp));
936 937 flags = 0L;
937 938 if (t->t_state == TS_STOPPED) {
938 939 flags |= PR_STOPPED;
939 940 if ((t->t_schedflag & TS_PSTART) == 0)
940 941 flags |= PR_ISTOP;
941 942 } else if (VSTOPPED(t)) {
942 943 flags |= PR_STOPPED|PR_ISTOP;
943 944 }
944 945 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
945 946 flags |= PR_DSTOP;
946 947 if (lwp->lwp_asleep)
947 948 flags |= PR_ASLEEP;
948 949 if (t == p->p_agenttp)
949 950 flags |= PR_AGENT;
950 951 if (!(t->t_proc_flag & TP_TWAIT))
951 952 flags |= PR_DETACH;
952 953 if (t->t_proc_flag & TP_DAEMON)
953 954 flags |= PR_DAEMON;
954 955 if (p->p_proc_flag & P_PR_FORK)
955 956 flags |= PR_FORK;
956 957 if (p->p_proc_flag & P_PR_RUNLCL)
957 958 flags |= PR_RLC;
958 959 if (p->p_proc_flag & P_PR_KILLCL)
959 960 flags |= PR_KLC;
960 961 if (p->p_proc_flag & P_PR_ASYNC)
961 962 flags |= PR_ASYNC;
962 963 if (p->p_proc_flag & P_PR_BPTADJ)
963 964 flags |= PR_BPTADJ;
964 965 if (p->p_proc_flag & P_PR_PTRACE)
965 966 flags |= PR_PTRACE;
966 967 if (p->p_flag & SMSACCT)
967 968 flags |= PR_MSACCT;
968 969 if (p->p_flag & SMSFORK)
969 970 flags |= PR_MSFORK;
970 971 if (p->p_flag & SVFWAIT)
971 972 flags |= PR_VFORKP;
972 973 sp->pr_flags = flags;
973 974 if (VSTOPPED(t)) {
974 975 sp->pr_why = PR_REQUESTED;
975 976 sp->pr_what = 0;
976 977 } else {
977 978 sp->pr_why = t->t_whystop;
978 979 sp->pr_what = t->t_whatstop;
979 980 }
980 981 sp->pr_lwpid = t->t_tid;
981 982 sp->pr_cursig = lwp->lwp_cursig;
982 983 prassignset(&sp->pr_lwppend, &t->t_sig);
983 984 schedctl_finish_sigblock(t);
984 985 prassignset(&sp->pr_lwphold, &t->t_hold);
985 986 if (t->t_whystop == PR_FAULTED) {
986 987 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
987 988 if (t->t_whatstop == FLTPAGE)
988 989 sp->pr_info.si_addr =
989 990 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
990 991 } else if (lwp->lwp_curinfo)
991 992 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
992 993 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
993 994 sp->pr_info.si_zoneid != zp->zone_id) {
994 995 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
995 996 sp->pr_info.si_uid = 0;
996 997 sp->pr_info.si_ctid = -1;
997 998 sp->pr_info.si_zoneid = zp->zone_id;
998 999 }
999 1000 sp->pr_altstack.ss_sp =
1000 1001 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1001 1002 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1002 1003 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1003 1004 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1004 1005 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1005 1006 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1006 1007 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1007 1008 sizeof (sp->pr_clname) - 1);
1008 1009 if (flags & PR_STOPPED)
1009 1010 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1010 1011 usr = ms->ms_acct[LMS_USER];
1011 1012 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1012 1013 scalehrtime(&usr);
1013 1014 scalehrtime(&sys);
1014 1015 hrt2ts32(usr, &sp->pr_utime);
1015 1016 hrt2ts32(sys, &sp->pr_stime);
1016 1017
1017 1018 /*
1018 1019 * Fetch the current instruction, if not a system process.
1019 1020 * We don't attempt this unless the lwp is stopped.
1020 1021 */
1021 1022 if ((p->p_flag & SSYS) || p->p_as == &kas)
1022 1023 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1023 1024 else if (!(flags & PR_STOPPED))
1024 1025 sp->pr_flags |= PR_PCINVAL;
1025 1026 else if (!prfetchinstr(lwp, &instr))
1026 1027 sp->pr_flags |= PR_PCINVAL;
1027 1028 else
1028 1029 sp->pr_instr = (uint32_t)instr;
1029 1030
1030 1031 /*
1031 1032 * Drop p_lock while touching the lwp's stack.
1032 1033 */
1033 1034 mutex_exit(&p->p_lock);
1034 1035 if (prisstep(lwp))
1035 1036 sp->pr_flags |= PR_STEP;
1036 1037 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1037 1038 int i;
1038 1039
1039 1040 sp->pr_syscall = get_syscall32_args(lwp,
1040 1041 (int *)sp->pr_sysarg, &i);
1041 1042 sp->pr_nsysarg = (ushort_t)i;
1042 1043 }
1043 1044 if ((flags & PR_STOPPED) || t == curthread)
1044 1045 prgetprregs32(lwp, sp->pr_reg);
1045 1046 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1046 1047 (flags & PR_VFORKP)) {
1047 1048 long r1, r2;
1048 1049 user_t *up;
1049 1050 auxv_t *auxp;
1050 1051 int i;
1051 1052
1052 1053 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1053 1054 if (sp->pr_errno == 0) {
1054 1055 sp->pr_rval1 = (int32_t)r1;
1055 1056 sp->pr_rval2 = (int32_t)r2;
1056 1057 sp->pr_errpriv = PRIV_NONE;
1057 1058 } else
1058 1059 sp->pr_errpriv = lwp->lwp_badpriv;
1059 1060
1060 1061 if (t->t_sysnum == SYS_execve) {
1061 1062 up = PTOU(p);
1062 1063 sp->pr_sysarg[0] = 0;
1063 1064 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1064 1065 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1065 1066 for (i = 0, auxp = up->u_auxv;
1066 1067 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1067 1068 i++, auxp++) {
1068 1069 if (auxp->a_type == AT_SUN_EXECNAME) {
1069 1070 sp->pr_sysarg[0] =
1070 1071 (caddr32_t)
1071 1072 (uintptr_t)auxp->a_un.a_ptr;
1072 1073 break;
1073 1074 }
1074 1075 }
1075 1076 }
1076 1077 }
1077 1078 if (prhasfp())
1078 1079 prgetprfpregs32(lwp, &sp->pr_fpreg);
1079 1080 mutex_enter(&p->p_lock);
1080 1081 }
1081 1082
1082 1083 void
1083 1084 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1084 1085 {
1085 1086 kthread_t *t;
1086 1087
1087 1088 ASSERT(MUTEX_HELD(&p->p_lock));
1088 1089
1089 1090 t = prchoose(p); /* returns locked thread */
1090 1091 ASSERT(t != NULL);
1091 1092 thread_unlock(t);
1092 1093
1093 1094 /* just bzero the process part, prgetlwpstatus32() does the rest */
1094 1095 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1095 1096 sp->pr_nlwp = p->p_lwpcnt;
1096 1097 sp->pr_nzomb = p->p_zombcnt;
1097 1098 prassignset(&sp->pr_sigpend, &p->p_sig);
1098 1099 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1099 1100 sp->pr_brksize = (uint32_t)p->p_brksize;
1100 1101 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1101 1102 sp->pr_stksize = (uint32_t)p->p_stksize;
1102 1103 sp->pr_pid = p->p_pid;
1103 1104 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1104 1105 (p->p_flag & SZONETOP)) {
1105 1106 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1106 1107 /*
1107 1108 * Inside local zones, fake zsched's pid as parent pids for
1108 1109 * processes which reference processes outside of the zone.
↓ open down ↓ |
203 lines elided |
↑ open up ↑ |
1109 1110 */
1110 1111 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1111 1112 } else {
1112 1113 sp->pr_ppid = p->p_ppid;
1113 1114 }
1114 1115 sp->pr_pgid = p->p_pgrp;
1115 1116 sp->pr_sid = p->p_sessp->s_sid;
1116 1117 sp->pr_taskid = p->p_task->tk_tkid;
1117 1118 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1118 1119 sp->pr_zoneid = p->p_zone->zone_id;
1120 + bcopy(&p->p_secflags, &sp->pr_secflags, sizeof (psecflags_t));
1119 1121 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1120 1122 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1121 1123 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1122 1124 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1123 1125 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1124 1126 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1125 1127 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1126 1128 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1127 1129 switch (p->p_model) {
1128 1130 case DATAMODEL_ILP32:
1129 1131 sp->pr_dmodel = PR_MODEL_ILP32;
1130 1132 break;
1131 1133 case DATAMODEL_LP64:
1132 1134 sp->pr_dmodel = PR_MODEL_LP64;
1133 1135 break;
1134 1136 }
1135 1137 if (p->p_agenttp)
1136 1138 sp->pr_agentid = p->p_agenttp->t_tid;
1137 1139
1138 1140 /* get the chosen lwp's status */
1139 1141 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1140 1142
1141 1143 /* replicate the flags */
1142 1144 sp->pr_flags = sp->pr_lwp.pr_flags;
1143 1145 }
1144 1146 #endif /* _SYSCALL32_IMPL */
1145 1147
1146 1148 /*
1147 1149 * Return lwp status.
1148 1150 */
1149 1151 void
1150 1152 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1151 1153 {
1152 1154 proc_t *p = ttoproc(t);
1153 1155 klwp_t *lwp = ttolwp(t);
1154 1156 struct mstate *ms = &lwp->lwp_mstate;
1155 1157 hrtime_t usr, sys;
1156 1158 int flags;
1157 1159 ulong_t instr;
1158 1160
1159 1161 ASSERT(MUTEX_HELD(&p->p_lock));
1160 1162
1161 1163 bzero(sp, sizeof (*sp));
1162 1164 flags = 0L;
1163 1165 if (t->t_state == TS_STOPPED) {
1164 1166 flags |= PR_STOPPED;
1165 1167 if ((t->t_schedflag & TS_PSTART) == 0)
1166 1168 flags |= PR_ISTOP;
1167 1169 } else if (VSTOPPED(t)) {
1168 1170 flags |= PR_STOPPED|PR_ISTOP;
1169 1171 }
1170 1172 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1171 1173 flags |= PR_DSTOP;
1172 1174 if (lwp->lwp_asleep)
1173 1175 flags |= PR_ASLEEP;
1174 1176 if (t == p->p_agenttp)
1175 1177 flags |= PR_AGENT;
1176 1178 if (!(t->t_proc_flag & TP_TWAIT))
1177 1179 flags |= PR_DETACH;
1178 1180 if (t->t_proc_flag & TP_DAEMON)
1179 1181 flags |= PR_DAEMON;
1180 1182 if (p->p_proc_flag & P_PR_FORK)
1181 1183 flags |= PR_FORK;
1182 1184 if (p->p_proc_flag & P_PR_RUNLCL)
1183 1185 flags |= PR_RLC;
1184 1186 if (p->p_proc_flag & P_PR_KILLCL)
1185 1187 flags |= PR_KLC;
1186 1188 if (p->p_proc_flag & P_PR_ASYNC)
1187 1189 flags |= PR_ASYNC;
1188 1190 if (p->p_proc_flag & P_PR_BPTADJ)
1189 1191 flags |= PR_BPTADJ;
1190 1192 if (p->p_proc_flag & P_PR_PTRACE)
1191 1193 flags |= PR_PTRACE;
1192 1194 if (p->p_flag & SMSACCT)
1193 1195 flags |= PR_MSACCT;
1194 1196 if (p->p_flag & SMSFORK)
1195 1197 flags |= PR_MSFORK;
1196 1198 if (p->p_flag & SVFWAIT)
1197 1199 flags |= PR_VFORKP;
1198 1200 if (p->p_pgidp->pid_pgorphaned)
1199 1201 flags |= PR_ORPHAN;
1200 1202 if (p->p_pidflag & CLDNOSIGCHLD)
1201 1203 flags |= PR_NOSIGCHLD;
1202 1204 if (p->p_pidflag & CLDWAITPID)
1203 1205 flags |= PR_WAITPID;
1204 1206 sp->pr_flags = flags;
1205 1207 if (VSTOPPED(t)) {
1206 1208 sp->pr_why = PR_REQUESTED;
1207 1209 sp->pr_what = 0;
1208 1210 } else {
1209 1211 sp->pr_why = t->t_whystop;
1210 1212 sp->pr_what = t->t_whatstop;
1211 1213 }
1212 1214 sp->pr_lwpid = t->t_tid;
1213 1215 sp->pr_cursig = lwp->lwp_cursig;
1214 1216 prassignset(&sp->pr_lwppend, &t->t_sig);
1215 1217 schedctl_finish_sigblock(t);
1216 1218 prassignset(&sp->pr_lwphold, &t->t_hold);
1217 1219 if (t->t_whystop == PR_FAULTED)
1218 1220 bcopy(&lwp->lwp_siginfo,
1219 1221 &sp->pr_info, sizeof (k_siginfo_t));
1220 1222 else if (lwp->lwp_curinfo)
1221 1223 bcopy(&lwp->lwp_curinfo->sq_info,
1222 1224 &sp->pr_info, sizeof (k_siginfo_t));
1223 1225 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1224 1226 sp->pr_info.si_zoneid != zp->zone_id) {
1225 1227 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1226 1228 sp->pr_info.si_uid = 0;
1227 1229 sp->pr_info.si_ctid = -1;
1228 1230 sp->pr_info.si_zoneid = zp->zone_id;
1229 1231 }
1230 1232 sp->pr_altstack = lwp->lwp_sigaltstack;
1231 1233 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1232 1234 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1233 1235 sp->pr_ustack = lwp->lwp_ustack;
1234 1236 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1235 1237 sizeof (sp->pr_clname) - 1);
1236 1238 if (flags & PR_STOPPED)
1237 1239 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1238 1240 usr = ms->ms_acct[LMS_USER];
1239 1241 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1240 1242 scalehrtime(&usr);
1241 1243 scalehrtime(&sys);
1242 1244 hrt2ts(usr, &sp->pr_utime);
1243 1245 hrt2ts(sys, &sp->pr_stime);
1244 1246
1245 1247 /*
1246 1248 * Fetch the current instruction, if not a system process.
1247 1249 * We don't attempt this unless the lwp is stopped.
1248 1250 */
1249 1251 if ((p->p_flag & SSYS) || p->p_as == &kas)
1250 1252 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1251 1253 else if (!(flags & PR_STOPPED))
1252 1254 sp->pr_flags |= PR_PCINVAL;
1253 1255 else if (!prfetchinstr(lwp, &instr))
1254 1256 sp->pr_flags |= PR_PCINVAL;
1255 1257 else
1256 1258 sp->pr_instr = instr;
1257 1259
1258 1260 /*
1259 1261 * Drop p_lock while touching the lwp's stack.
1260 1262 */
1261 1263 mutex_exit(&p->p_lock);
1262 1264 if (prisstep(lwp))
1263 1265 sp->pr_flags |= PR_STEP;
1264 1266 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1265 1267 int i;
1266 1268
1267 1269 sp->pr_syscall = get_syscall_args(lwp,
1268 1270 (long *)sp->pr_sysarg, &i);
1269 1271 sp->pr_nsysarg = (ushort_t)i;
1270 1272 }
1271 1273 if ((flags & PR_STOPPED) || t == curthread)
1272 1274 prgetprregs(lwp, sp->pr_reg);
1273 1275 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1274 1276 (flags & PR_VFORKP)) {
1275 1277 user_t *up;
1276 1278 auxv_t *auxp;
1277 1279 int i;
1278 1280
1279 1281 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1280 1282 if (sp->pr_errno == 0)
1281 1283 sp->pr_errpriv = PRIV_NONE;
1282 1284 else
1283 1285 sp->pr_errpriv = lwp->lwp_badpriv;
1284 1286
1285 1287 if (t->t_sysnum == SYS_execve) {
1286 1288 up = PTOU(p);
1287 1289 sp->pr_sysarg[0] = 0;
1288 1290 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1289 1291 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1290 1292 for (i = 0, auxp = up->u_auxv;
1291 1293 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1292 1294 i++, auxp++) {
1293 1295 if (auxp->a_type == AT_SUN_EXECNAME) {
1294 1296 sp->pr_sysarg[0] =
1295 1297 (uintptr_t)auxp->a_un.a_ptr;
1296 1298 break;
1297 1299 }
1298 1300 }
1299 1301 }
1300 1302 }
1301 1303 if (prhasfp())
1302 1304 prgetprfpregs(lwp, &sp->pr_fpreg);
1303 1305 mutex_enter(&p->p_lock);
1304 1306 }
1305 1307
1306 1308 /*
1307 1309 * Get the sigaction structure for the specified signal. The u-block
1308 1310 * must already have been mapped in by the caller.
1309 1311 */
1310 1312 void
1311 1313 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1312 1314 {
1313 1315 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1314 1316
1315 1317 bzero(sp, sizeof (*sp));
1316 1318
1317 1319 if (sig != 0 && (unsigned)sig < nsig) {
1318 1320 sp->sa_handler = up->u_signal[sig-1];
1319 1321 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1320 1322 if (sigismember(&up->u_sigonstack, sig))
1321 1323 sp->sa_flags |= SA_ONSTACK;
1322 1324 if (sigismember(&up->u_sigresethand, sig))
1323 1325 sp->sa_flags |= SA_RESETHAND;
1324 1326 if (sigismember(&up->u_sigrestart, sig))
1325 1327 sp->sa_flags |= SA_RESTART;
1326 1328 if (sigismember(&p->p_siginfo, sig))
1327 1329 sp->sa_flags |= SA_SIGINFO;
1328 1330 if (sigismember(&up->u_signodefer, sig))
1329 1331 sp->sa_flags |= SA_NODEFER;
1330 1332 if (sig == SIGCLD) {
1331 1333 if (p->p_flag & SNOWAIT)
1332 1334 sp->sa_flags |= SA_NOCLDWAIT;
1333 1335 if ((p->p_flag & SJCTL) == 0)
1334 1336 sp->sa_flags |= SA_NOCLDSTOP;
1335 1337 }
1336 1338 }
1337 1339 }
1338 1340
1339 1341 #ifdef _SYSCALL32_IMPL
1340 1342 void
1341 1343 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1342 1344 {
1343 1345 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1344 1346
1345 1347 bzero(sp, sizeof (*sp));
1346 1348
1347 1349 if (sig != 0 && (unsigned)sig < nsig) {
1348 1350 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1349 1351 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1350 1352 if (sigismember(&up->u_sigonstack, sig))
1351 1353 sp->sa_flags |= SA_ONSTACK;
1352 1354 if (sigismember(&up->u_sigresethand, sig))
1353 1355 sp->sa_flags |= SA_RESETHAND;
1354 1356 if (sigismember(&up->u_sigrestart, sig))
1355 1357 sp->sa_flags |= SA_RESTART;
1356 1358 if (sigismember(&p->p_siginfo, sig))
1357 1359 sp->sa_flags |= SA_SIGINFO;
1358 1360 if (sigismember(&up->u_signodefer, sig))
1359 1361 sp->sa_flags |= SA_NODEFER;
1360 1362 if (sig == SIGCLD) {
1361 1363 if (p->p_flag & SNOWAIT)
1362 1364 sp->sa_flags |= SA_NOCLDWAIT;
1363 1365 if ((p->p_flag & SJCTL) == 0)
1364 1366 sp->sa_flags |= SA_NOCLDSTOP;
1365 1367 }
1366 1368 }
1367 1369 }
1368 1370 #endif /* _SYSCALL32_IMPL */
1369 1371
1370 1372 /*
1371 1373 * Count the number of segments in this process's address space.
1372 1374 */
1373 1375 int
1374 1376 prnsegs(struct as *as, int reserved)
1375 1377 {
1376 1378 int n = 0;
1377 1379 struct seg *seg;
1378 1380
1379 1381 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1380 1382
1381 1383 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382 1384 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383 1385 caddr_t saddr, naddr;
1384 1386 void *tmp = NULL;
1385 1387
1386 1388 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387 1389 (void) pr_getprot(seg, reserved, &tmp,
1388 1390 &saddr, &naddr, eaddr);
1389 1391 if (saddr != naddr)
1390 1392 n++;
1391 1393 }
1392 1394
1393 1395 ASSERT(tmp == NULL);
1394 1396 }
1395 1397
1396 1398 return (n);
1397 1399 }
1398 1400
1399 1401 /*
1400 1402 * Convert uint32_t to decimal string w/o leading zeros.
1401 1403 * Add trailing null characters if 'len' is greater than string length.
1402 1404 * Return the string length.
1403 1405 */
1404 1406 int
1405 1407 pr_u32tos(uint32_t n, char *s, int len)
1406 1408 {
1407 1409 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1408 1410 char *cp = cbuf;
1409 1411 char *end = s + len;
1410 1412
1411 1413 do {
1412 1414 *cp++ = (char)(n % 10 + '0');
1413 1415 n /= 10;
1414 1416 } while (n);
1415 1417
1416 1418 len = (int)(cp - cbuf);
1417 1419
1418 1420 do {
1419 1421 *s++ = *--cp;
1420 1422 } while (cp > cbuf);
1421 1423
1422 1424 while (s < end) /* optional pad */
1423 1425 *s++ = '\0';
1424 1426
1425 1427 return (len);
1426 1428 }
1427 1429
1428 1430 /*
1429 1431 * Convert uint64_t to decimal string w/o leading zeros.
1430 1432 * Return the string length.
1431 1433 */
1432 1434 static int
1433 1435 pr_u64tos(uint64_t n, char *s)
1434 1436 {
1435 1437 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1436 1438 char *cp = cbuf;
1437 1439 int len;
1438 1440
1439 1441 do {
1440 1442 *cp++ = (char)(n % 10 + '0');
1441 1443 n /= 10;
1442 1444 } while (n);
1443 1445
1444 1446 len = (int)(cp - cbuf);
1445 1447
1446 1448 do {
1447 1449 *s++ = *--cp;
1448 1450 } while (cp > cbuf);
1449 1451
1450 1452 return (len);
1451 1453 }
1452 1454
1453 1455 void
1454 1456 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1455 1457 {
1456 1458 char *s = name;
1457 1459 struct vfs *vfsp;
1458 1460 struct vfssw *vfsswp;
1459 1461
1460 1462 if ((vfsp = vp->v_vfsp) != NULL &&
1461 1463 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1462 1464 *vfsswp->vsw_name) {
1463 1465 (void) strcpy(s, vfsswp->vsw_name);
1464 1466 s += strlen(s);
1465 1467 *s++ = '.';
1466 1468 }
1467 1469 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468 1470 *s++ = '.';
1469 1471 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470 1472 *s++ = '.';
1471 1473 s += pr_u64tos(vattr->va_nodeid, s);
1472 1474 *s++ = '\0';
1473 1475 }
1474 1476
1475 1477 struct seg *
1476 1478 break_seg(proc_t *p)
1477 1479 {
1478 1480 caddr_t addr = p->p_brkbase;
1479 1481 struct seg *seg;
1480 1482 struct vnode *vp;
1481 1483
1482 1484 if (p->p_brksize != 0)
1483 1485 addr += p->p_brksize - 1;
1484 1486 seg = as_segat(p->p_as, addr);
1485 1487 if (seg != NULL && seg->s_ops == &segvn_ops &&
1486 1488 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487 1489 return (seg);
1488 1490 return (NULL);
1489 1491 }
1490 1492
1491 1493 /*
1492 1494 * Implementation of service functions to handle procfs generic chained
1493 1495 * copyout buffers.
1494 1496 */
1495 1497 typedef struct pr_iobuf_list {
1496 1498 list_node_t piol_link; /* buffer linkage */
1497 1499 size_t piol_size; /* total size (header + data) */
1498 1500 size_t piol_usedsize; /* amount to copy out from this buf */
1499 1501 } piol_t;
1500 1502
1501 1503 #define MAPSIZE (64 * 1024)
1502 1504 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1503 1505
1504 1506 void
1505 1507 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1506 1508 {
1507 1509 piol_t *iol;
1508 1510 size_t initial_size = MIN(1, n) * itemsize;
1509 1511
1510 1512 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1511 1513
1512 1514 ASSERT(list_head(iolhead) == NULL);
1513 1515 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1514 1516 ASSERT(initial_size > 0);
1515 1517
1516 1518 /*
1517 1519 * Someone creating chained copyout buffers may ask for less than
1518 1520 * MAPSIZE if the amount of data to be buffered is known to be
1519 1521 * smaller than that.
1520 1522 * But in order to prevent involuntary self-denial of service,
1521 1523 * the requested input size is clamped at MAPSIZE.
1522 1524 */
1523 1525 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1524 1526 iol = kmem_alloc(initial_size, KM_SLEEP);
1525 1527 list_insert_head(iolhead, iol);
1526 1528 iol->piol_usedsize = 0;
1527 1529 iol->piol_size = initial_size;
1528 1530 }
1529 1531
1530 1532 void *
1531 1533 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1532 1534 {
1533 1535 piol_t *iol;
1534 1536 char *new;
1535 1537
1536 1538 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1537 1539 ASSERT(list_head(iolhead) != NULL);
1538 1540
1539 1541 iol = (piol_t *)list_tail(iolhead);
1540 1542
1541 1543 if (iol->piol_size <
1542 1544 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1543 1545 /*
1544 1546 * Out of space in the current buffer. Allocate more.
1545 1547 */
1546 1548 piol_t *newiol;
1547 1549
1548 1550 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1549 1551 newiol->piol_size = MAPSIZE;
1550 1552 newiol->piol_usedsize = 0;
1551 1553
1552 1554 list_insert_after(iolhead, iol, newiol);
1553 1555 iol = list_next(iolhead, iol);
1554 1556 ASSERT(iol == newiol);
1555 1557 }
1556 1558 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1557 1559 iol->piol_usedsize += itemsize;
1558 1560 bzero(new, itemsize);
1559 1561 return (new);
1560 1562 }
1561 1563
1562 1564 int
1563 1565 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1564 1566 {
1565 1567 int error = errin;
1566 1568 piol_t *iol;
1567 1569
1568 1570 while ((iol = list_head(iolhead)) != NULL) {
1569 1571 list_remove(iolhead, iol);
1570 1572 if (!error) {
1571 1573 if (copyout(PIOL_DATABUF(iol), *tgt,
1572 1574 iol->piol_usedsize))
1573 1575 error = EFAULT;
1574 1576 *tgt += iol->piol_usedsize;
1575 1577 }
1576 1578 kmem_free(iol, iol->piol_size);
1577 1579 }
1578 1580 list_destroy(iolhead);
1579 1581
1580 1582 return (error);
1581 1583 }
1582 1584
1583 1585 int
1584 1586 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1585 1587 {
1586 1588 offset_t off = uiop->uio_offset;
1587 1589 char *base;
1588 1590 size_t size;
1589 1591 piol_t *iol;
1590 1592 int error = errin;
1591 1593
1592 1594 while ((iol = list_head(iolhead)) != NULL) {
1593 1595 list_remove(iolhead, iol);
1594 1596 base = PIOL_DATABUF(iol);
1595 1597 size = iol->piol_usedsize;
1596 1598 if (off <= size && error == 0 && uiop->uio_resid > 0)
1597 1599 error = uiomove(base + off, size - off,
1598 1600 UIO_READ, uiop);
1599 1601 off = MAX(0, off - (offset_t)size);
1600 1602 kmem_free(iol, iol->piol_size);
1601 1603 }
1602 1604 list_destroy(iolhead);
1603 1605
1604 1606 return (error);
1605 1607 }
1606 1608
1607 1609 /*
1608 1610 * Return an array of structures with memory map information.
1609 1611 * We allocate here; the caller must deallocate.
1610 1612 */
1611 1613 int
1612 1614 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1613 1615 {
1614 1616 struct as *as = p->p_as;
1615 1617 prmap_t *mp;
1616 1618 struct seg *seg;
1617 1619 struct seg *brkseg, *stkseg;
1618 1620 struct vnode *vp;
1619 1621 struct vattr vattr;
1620 1622 uint_t prot;
1621 1623
1622 1624 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1623 1625
1624 1626 /*
1625 1627 * Request an initial buffer size that doesn't waste memory
1626 1628 * if the address space has only a small number of segments.
1627 1629 */
1628 1630 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1629 1631
1630 1632 if ((seg = AS_SEGFIRST(as)) == NULL)
1631 1633 return (0);
1632 1634
1633 1635 brkseg = break_seg(p);
1634 1636 stkseg = as_segat(as, prgetstackbase(p));
1635 1637
1636 1638 do {
1637 1639 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638 1640 caddr_t saddr, naddr;
1639 1641 void *tmp = NULL;
1640 1642
1641 1643 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642 1644 prot = pr_getprot(seg, reserved, &tmp,
1643 1645 &saddr, &naddr, eaddr);
1644 1646 if (saddr == naddr)
1645 1647 continue;
1646 1648
1647 1649 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1648 1650
1649 1651 mp->pr_vaddr = (uintptr_t)saddr;
1650 1652 mp->pr_size = naddr - saddr;
1651 1653 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1652 1654 mp->pr_mflags = 0;
1653 1655 if (prot & PROT_READ)
1654 1656 mp->pr_mflags |= MA_READ;
1655 1657 if (prot & PROT_WRITE)
1656 1658 mp->pr_mflags |= MA_WRITE;
1657 1659 if (prot & PROT_EXEC)
1658 1660 mp->pr_mflags |= MA_EXEC;
1659 1661 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1660 1662 mp->pr_mflags |= MA_SHARED;
1661 1663 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1662 1664 mp->pr_mflags |= MA_NORESERVE;
1663 1665 if (seg->s_ops == &segspt_shmops ||
1664 1666 (seg->s_ops == &segvn_ops &&
1665 1667 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1666 1668 mp->pr_mflags |= MA_ANON;
1667 1669 if (seg == brkseg)
1668 1670 mp->pr_mflags |= MA_BREAK;
1669 1671 else if (seg == stkseg) {
1670 1672 mp->pr_mflags |= MA_STACK;
1671 1673 if (reserved) {
1672 1674 size_t maxstack =
1673 1675 ((size_t)p->p_stk_ctl +
1674 1676 PAGEOFFSET) & PAGEMASK;
1675 1677 mp->pr_vaddr =
1676 1678 (uintptr_t)prgetstackbase(p) +
1677 1679 p->p_stksize - maxstack;
1678 1680 mp->pr_size = (uintptr_t)naddr -
1679 1681 mp->pr_vaddr;
1680 1682 }
1681 1683 }
1682 1684 if (seg->s_ops == &segspt_shmops)
1683 1685 mp->pr_mflags |= MA_ISM | MA_SHM;
1684 1686 mp->pr_pagesize = PAGESIZE;
1685 1687
1686 1688 /*
1687 1689 * Manufacture a filename for the "object" directory.
1688 1690 */
1689 1691 vattr.va_mask = AT_FSID|AT_NODEID;
1690 1692 if (seg->s_ops == &segvn_ops &&
1691 1693 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1692 1694 vp != NULL && vp->v_type == VREG &&
1693 1695 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694 1696 if (vp == p->p_exec)
1695 1697 (void) strcpy(mp->pr_mapname, "a.out");
1696 1698 else
1697 1699 pr_object_name(mp->pr_mapname,
1698 1700 vp, &vattr);
1699 1701 }
1700 1702
1701 1703 /*
1702 1704 * Get the SysV shared memory id, if any.
1703 1705 */
1704 1706 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705 1707 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706 1708 SHMID_NONE) {
1707 1709 if (mp->pr_shmid == SHMID_FREE)
1708 1710 mp->pr_shmid = -1;
1709 1711
1710 1712 mp->pr_mflags |= MA_SHM;
1711 1713 } else {
1712 1714 mp->pr_shmid = -1;
1713 1715 }
1714 1716 }
1715 1717 ASSERT(tmp == NULL);
1716 1718 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1717 1719
1718 1720 return (0);
1719 1721 }
1720 1722
1721 1723 #ifdef _SYSCALL32_IMPL
1722 1724 int
1723 1725 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1724 1726 {
1725 1727 struct as *as = p->p_as;
1726 1728 prmap32_t *mp;
1727 1729 struct seg *seg;
1728 1730 struct seg *brkseg, *stkseg;
1729 1731 struct vnode *vp;
1730 1732 struct vattr vattr;
1731 1733 uint_t prot;
1732 1734
1733 1735 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1734 1736
1735 1737 /*
1736 1738 * Request an initial buffer size that doesn't waste memory
1737 1739 * if the address space has only a small number of segments.
1738 1740 */
1739 1741 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1740 1742
1741 1743 if ((seg = AS_SEGFIRST(as)) == NULL)
1742 1744 return (0);
1743 1745
1744 1746 brkseg = break_seg(p);
1745 1747 stkseg = as_segat(as, prgetstackbase(p));
1746 1748
1747 1749 do {
1748 1750 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749 1751 caddr_t saddr, naddr;
1750 1752 void *tmp = NULL;
1751 1753
1752 1754 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753 1755 prot = pr_getprot(seg, reserved, &tmp,
1754 1756 &saddr, &naddr, eaddr);
1755 1757 if (saddr == naddr)
1756 1758 continue;
1757 1759
1758 1760 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1759 1761
1760 1762 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761 1763 mp->pr_size = (size32_t)(naddr - saddr);
1762 1764 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1763 1765 mp->pr_mflags = 0;
1764 1766 if (prot & PROT_READ)
1765 1767 mp->pr_mflags |= MA_READ;
1766 1768 if (prot & PROT_WRITE)
1767 1769 mp->pr_mflags |= MA_WRITE;
1768 1770 if (prot & PROT_EXEC)
1769 1771 mp->pr_mflags |= MA_EXEC;
1770 1772 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1771 1773 mp->pr_mflags |= MA_SHARED;
1772 1774 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1773 1775 mp->pr_mflags |= MA_NORESERVE;
1774 1776 if (seg->s_ops == &segspt_shmops ||
1775 1777 (seg->s_ops == &segvn_ops &&
1776 1778 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1777 1779 mp->pr_mflags |= MA_ANON;
1778 1780 if (seg == brkseg)
1779 1781 mp->pr_mflags |= MA_BREAK;
1780 1782 else if (seg == stkseg) {
1781 1783 mp->pr_mflags |= MA_STACK;
1782 1784 if (reserved) {
1783 1785 size_t maxstack =
1784 1786 ((size_t)p->p_stk_ctl +
1785 1787 PAGEOFFSET) & PAGEMASK;
1786 1788 uintptr_t vaddr =
1787 1789 (uintptr_t)prgetstackbase(p) +
1788 1790 p->p_stksize - maxstack;
1789 1791 mp->pr_vaddr = (caddr32_t)vaddr;
1790 1792 mp->pr_size = (size32_t)
1791 1793 ((uintptr_t)naddr - vaddr);
1792 1794 }
1793 1795 }
1794 1796 if (seg->s_ops == &segspt_shmops)
1795 1797 mp->pr_mflags |= MA_ISM | MA_SHM;
1796 1798 mp->pr_pagesize = PAGESIZE;
1797 1799
1798 1800 /*
1799 1801 * Manufacture a filename for the "object" directory.
1800 1802 */
1801 1803 vattr.va_mask = AT_FSID|AT_NODEID;
1802 1804 if (seg->s_ops == &segvn_ops &&
1803 1805 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1804 1806 vp != NULL && vp->v_type == VREG &&
1805 1807 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806 1808 if (vp == p->p_exec)
1807 1809 (void) strcpy(mp->pr_mapname, "a.out");
1808 1810 else
1809 1811 pr_object_name(mp->pr_mapname,
1810 1812 vp, &vattr);
1811 1813 }
1812 1814
1813 1815 /*
1814 1816 * Get the SysV shared memory id, if any.
1815 1817 */
1816 1818 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817 1819 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818 1820 SHMID_NONE) {
1819 1821 if (mp->pr_shmid == SHMID_FREE)
1820 1822 mp->pr_shmid = -1;
1821 1823
1822 1824 mp->pr_mflags |= MA_SHM;
1823 1825 } else {
1824 1826 mp->pr_shmid = -1;
1825 1827 }
1826 1828 }
1827 1829 ASSERT(tmp == NULL);
1828 1830 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1829 1831
1830 1832 return (0);
1831 1833 }
1832 1834 #endif /* _SYSCALL32_IMPL */
1833 1835
1834 1836 /*
1835 1837 * Return the size of the /proc page data file.
1836 1838 */
1837 1839 size_t
1838 1840 prpdsize(struct as *as)
1839 1841 {
1840 1842 struct seg *seg;
1841 1843 size_t size;
1842 1844
1843 1845 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1844 1846
1845 1847 if ((seg = AS_SEGFIRST(as)) == NULL)
1846 1848 return (0);
1847 1849
1848 1850 size = sizeof (prpageheader_t);
1849 1851 do {
1850 1852 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851 1853 caddr_t saddr, naddr;
1852 1854 void *tmp = NULL;
1853 1855 size_t npage;
1854 1856
1855 1857 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856 1858 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857 1859 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858 1860 size += sizeof (prasmap_t) + round8(npage);
1859 1861 }
1860 1862 ASSERT(tmp == NULL);
1861 1863 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1862 1864
1863 1865 return (size);
1864 1866 }
1865 1867
1866 1868 #ifdef _SYSCALL32_IMPL
1867 1869 size_t
1868 1870 prpdsize32(struct as *as)
1869 1871 {
1870 1872 struct seg *seg;
1871 1873 size_t size;
1872 1874
1873 1875 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
1874 1876
1875 1877 if ((seg = AS_SEGFIRST(as)) == NULL)
1876 1878 return (0);
1877 1879
1878 1880 size = sizeof (prpageheader32_t);
1879 1881 do {
1880 1882 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881 1883 caddr_t saddr, naddr;
1882 1884 void *tmp = NULL;
1883 1885 size_t npage;
1884 1886
1885 1887 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886 1888 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887 1889 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888 1890 size += sizeof (prasmap32_t) + round8(npage);
1889 1891 }
1890 1892 ASSERT(tmp == NULL);
1891 1893 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1892 1894
1893 1895 return (size);
1894 1896 }
1895 1897 #endif /* _SYSCALL32_IMPL */
1896 1898
1897 1899 /*
1898 1900 * Read page data information.
1899 1901 */
1900 1902 int
1901 1903 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1902 1904 {
1903 1905 struct as *as = p->p_as;
1904 1906 caddr_t buf;
1905 1907 size_t size;
1906 1908 prpageheader_t *php;
1907 1909 prasmap_t *pmp;
1908 1910 struct seg *seg;
1909 1911 int error;
1910 1912
1911 1913 again:
1912 1914 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1913 1915
1914 1916 if ((seg = AS_SEGFIRST(as)) == NULL) {
1915 1917 AS_LOCK_EXIT(as, &as->a_lock);
1916 1918 return (0);
1917 1919 }
1918 1920 size = prpdsize(as);
1919 1921 if (uiop->uio_resid < size) {
1920 1922 AS_LOCK_EXIT(as, &as->a_lock);
1921 1923 return (E2BIG);
1922 1924 }
1923 1925
1924 1926 buf = kmem_zalloc(size, KM_SLEEP);
1925 1927 php = (prpageheader_t *)buf;
1926 1928 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1927 1929
1928 1930 hrt2ts(gethrtime(), &php->pr_tstamp);
1929 1931 php->pr_nmap = 0;
1930 1932 php->pr_npage = 0;
1931 1933 do {
1932 1934 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933 1935 caddr_t saddr, naddr;
1934 1936 void *tmp = NULL;
1935 1937
1936 1938 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937 1939 struct vnode *vp;
1938 1940 struct vattr vattr;
1939 1941 size_t len;
1940 1942 size_t npage;
1941 1943 uint_t prot;
1942 1944 uintptr_t next;
1943 1945
1944 1946 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945 1947 if ((len = (size_t)(naddr - saddr)) == 0)
1946 1948 continue;
1947 1949 npage = len / PAGESIZE;
1948 1950 next = (uintptr_t)(pmp + 1) + round8(npage);
1949 1951 /*
1950 1952 * It's possible that the address space can change
1951 1953 * subtlely even though we're holding as->a_lock
1952 1954 * due to the nondeterminism of page_exists() in
1953 1955 * the presence of asychronously flushed pages or
1954 1956 * mapped files whose sizes are changing.
1955 1957 * page_exists() may be called indirectly from
1956 1958 * pr_getprot() by a SEGOP_INCORE() routine.
1957 1959 * If this happens we need to make sure we don't
1958 1960 * overrun the buffer whose size we computed based
1959 1961 * on the initial iteration through the segments.
1960 1962 * Once we've detected an overflow, we need to clean
1961 1963 * up the temporary memory allocated in pr_getprot()
1962 1964 * and retry. If there's a pending signal, we return
1963 1965 * EINTR so that this thread can be dislodged if
1964 1966 * a latent bug causes us to spin indefinitely.
1965 1967 */
1966 1968 if (next > (uintptr_t)buf + size) {
1967 1969 pr_getprot_done(&tmp);
1968 1970 AS_LOCK_EXIT(as, &as->a_lock);
1969 1971
1970 1972 kmem_free(buf, size);
1971 1973
1972 1974 if (ISSIG(curthread, JUSTLOOKING))
1973 1975 return (EINTR);
1974 1976
1975 1977 goto again;
1976 1978 }
1977 1979
1978 1980 php->pr_nmap++;
1979 1981 php->pr_npage += npage;
1980 1982 pmp->pr_vaddr = (uintptr_t)saddr;
1981 1983 pmp->pr_npage = npage;
1982 1984 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983 1985 pmp->pr_mflags = 0;
1984 1986 if (prot & PROT_READ)
1985 1987 pmp->pr_mflags |= MA_READ;
1986 1988 if (prot & PROT_WRITE)
1987 1989 pmp->pr_mflags |= MA_WRITE;
1988 1990 if (prot & PROT_EXEC)
1989 1991 pmp->pr_mflags |= MA_EXEC;
1990 1992 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1991 1993 pmp->pr_mflags |= MA_SHARED;
1992 1994 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1993 1995 pmp->pr_mflags |= MA_NORESERVE;
1994 1996 if (seg->s_ops == &segspt_shmops ||
1995 1997 (seg->s_ops == &segvn_ops &&
1996 1998 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1997 1999 pmp->pr_mflags |= MA_ANON;
1998 2000 if (seg->s_ops == &segspt_shmops)
1999 2001 pmp->pr_mflags |= MA_ISM | MA_SHM;
2000 2002 pmp->pr_pagesize = PAGESIZE;
2001 2003 /*
2002 2004 * Manufacture a filename for the "object" directory.
2003 2005 */
2004 2006 vattr.va_mask = AT_FSID|AT_NODEID;
2005 2007 if (seg->s_ops == &segvn_ops &&
2006 2008 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2007 2009 vp != NULL && vp->v_type == VREG &&
2008 2010 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009 2011 if (vp == p->p_exec)
2010 2012 (void) strcpy(pmp->pr_mapname, "a.out");
2011 2013 else
2012 2014 pr_object_name(pmp->pr_mapname,
2013 2015 vp, &vattr);
2014 2016 }
2015 2017
2016 2018 /*
2017 2019 * Get the SysV shared memory id, if any.
2018 2020 */
2019 2021 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020 2022 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021 2023 SHMID_NONE) {
2022 2024 if (pmp->pr_shmid == SHMID_FREE)
2023 2025 pmp->pr_shmid = -1;
2024 2026
2025 2027 pmp->pr_mflags |= MA_SHM;
2026 2028 } else {
2027 2029 pmp->pr_shmid = -1;
2028 2030 }
2029 2031
2030 2032 hat_getstat(as, saddr, len, hatid,
2031 2033 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032 2034 pmp = (prasmap_t *)next;
2033 2035 }
2034 2036 ASSERT(tmp == NULL);
2035 2037 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2036 2038
2037 2039 AS_LOCK_EXIT(as, &as->a_lock);
2038 2040
2039 2041 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040 2042 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041 2043 kmem_free(buf, size);
2042 2044
2043 2045 return (error);
2044 2046 }
2045 2047
2046 2048 #ifdef _SYSCALL32_IMPL
2047 2049 int
2048 2050 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2049 2051 {
2050 2052 struct as *as = p->p_as;
2051 2053 caddr_t buf;
2052 2054 size_t size;
2053 2055 prpageheader32_t *php;
2054 2056 prasmap32_t *pmp;
2055 2057 struct seg *seg;
2056 2058 int error;
2057 2059
2058 2060 again:
2059 2061 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2060 2062
2061 2063 if ((seg = AS_SEGFIRST(as)) == NULL) {
2062 2064 AS_LOCK_EXIT(as, &as->a_lock);
2063 2065 return (0);
2064 2066 }
2065 2067 size = prpdsize32(as);
2066 2068 if (uiop->uio_resid < size) {
2067 2069 AS_LOCK_EXIT(as, &as->a_lock);
2068 2070 return (E2BIG);
2069 2071 }
2070 2072
2071 2073 buf = kmem_zalloc(size, KM_SLEEP);
2072 2074 php = (prpageheader32_t *)buf;
2073 2075 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2074 2076
2075 2077 hrt2ts32(gethrtime(), &php->pr_tstamp);
2076 2078 php->pr_nmap = 0;
2077 2079 php->pr_npage = 0;
2078 2080 do {
2079 2081 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080 2082 caddr_t saddr, naddr;
2081 2083 void *tmp = NULL;
2082 2084
2083 2085 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084 2086 struct vnode *vp;
2085 2087 struct vattr vattr;
2086 2088 size_t len;
2087 2089 size_t npage;
2088 2090 uint_t prot;
2089 2091 uintptr_t next;
2090 2092
2091 2093 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092 2094 if ((len = (size_t)(naddr - saddr)) == 0)
2093 2095 continue;
2094 2096 npage = len / PAGESIZE;
2095 2097 next = (uintptr_t)(pmp + 1) + round8(npage);
2096 2098 /*
2097 2099 * It's possible that the address space can change
2098 2100 * subtlely even though we're holding as->a_lock
2099 2101 * due to the nondeterminism of page_exists() in
2100 2102 * the presence of asychronously flushed pages or
2101 2103 * mapped files whose sizes are changing.
2102 2104 * page_exists() may be called indirectly from
2103 2105 * pr_getprot() by a SEGOP_INCORE() routine.
2104 2106 * If this happens we need to make sure we don't
2105 2107 * overrun the buffer whose size we computed based
2106 2108 * on the initial iteration through the segments.
2107 2109 * Once we've detected an overflow, we need to clean
2108 2110 * up the temporary memory allocated in pr_getprot()
2109 2111 * and retry. If there's a pending signal, we return
2110 2112 * EINTR so that this thread can be dislodged if
2111 2113 * a latent bug causes us to spin indefinitely.
2112 2114 */
2113 2115 if (next > (uintptr_t)buf + size) {
2114 2116 pr_getprot_done(&tmp);
2115 2117 AS_LOCK_EXIT(as, &as->a_lock);
2116 2118
2117 2119 kmem_free(buf, size);
2118 2120
2119 2121 if (ISSIG(curthread, JUSTLOOKING))
2120 2122 return (EINTR);
2121 2123
2122 2124 goto again;
2123 2125 }
2124 2126
2125 2127 php->pr_nmap++;
2126 2128 php->pr_npage += npage;
2127 2129 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128 2130 pmp->pr_npage = (size32_t)npage;
2129 2131 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130 2132 pmp->pr_mflags = 0;
2131 2133 if (prot & PROT_READ)
2132 2134 pmp->pr_mflags |= MA_READ;
2133 2135 if (prot & PROT_WRITE)
2134 2136 pmp->pr_mflags |= MA_WRITE;
2135 2137 if (prot & PROT_EXEC)
2136 2138 pmp->pr_mflags |= MA_EXEC;
2137 2139 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2138 2140 pmp->pr_mflags |= MA_SHARED;
2139 2141 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2140 2142 pmp->pr_mflags |= MA_NORESERVE;
2141 2143 if (seg->s_ops == &segspt_shmops ||
2142 2144 (seg->s_ops == &segvn_ops &&
2143 2145 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2144 2146 pmp->pr_mflags |= MA_ANON;
2145 2147 if (seg->s_ops == &segspt_shmops)
2146 2148 pmp->pr_mflags |= MA_ISM | MA_SHM;
2147 2149 pmp->pr_pagesize = PAGESIZE;
2148 2150 /*
2149 2151 * Manufacture a filename for the "object" directory.
2150 2152 */
2151 2153 vattr.va_mask = AT_FSID|AT_NODEID;
2152 2154 if (seg->s_ops == &segvn_ops &&
2153 2155 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2154 2156 vp != NULL && vp->v_type == VREG &&
2155 2157 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156 2158 if (vp == p->p_exec)
2157 2159 (void) strcpy(pmp->pr_mapname, "a.out");
2158 2160 else
2159 2161 pr_object_name(pmp->pr_mapname,
2160 2162 vp, &vattr);
2161 2163 }
2162 2164
2163 2165 /*
2164 2166 * Get the SysV shared memory id, if any.
2165 2167 */
2166 2168 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167 2169 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168 2170 SHMID_NONE) {
2169 2171 if (pmp->pr_shmid == SHMID_FREE)
2170 2172 pmp->pr_shmid = -1;
2171 2173
2172 2174 pmp->pr_mflags |= MA_SHM;
2173 2175 } else {
2174 2176 pmp->pr_shmid = -1;
2175 2177 }
2176 2178
2177 2179 hat_getstat(as, saddr, len, hatid,
2178 2180 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179 2181 pmp = (prasmap32_t *)next;
2180 2182 }
2181 2183 ASSERT(tmp == NULL);
2182 2184 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2183 2185
2184 2186 AS_LOCK_EXIT(as, &as->a_lock);
2185 2187
2186 2188 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187 2189 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188 2190 kmem_free(buf, size);
2189 2191
2190 2192 return (error);
2191 2193 }
2192 2194 #endif /* _SYSCALL32_IMPL */
2193 2195
2194 2196 ushort_t
2195 2197 prgetpctcpu(uint64_t pct)
2196 2198 {
2197 2199 /*
2198 2200 * The value returned will be relevant in the zone of the examiner,
2199 2201 * which may not be the same as the zone which performed the procfs
2200 2202 * mount.
2201 2203 */
2202 2204 int nonline = zone_ncpus_online_get(curproc->p_zone);
2203 2205
2204 2206 /*
2205 2207 * Prorate over online cpus so we don't exceed 100%
2206 2208 */
2207 2209 if (nonline > 1)
2208 2210 pct /= nonline;
2209 2211 pct >>= 16; /* convert to 16-bit scaled integer */
2210 2212 if (pct > 0x8000) /* might happen, due to rounding */
2211 2213 pct = 0x8000;
2212 2214 return ((ushort_t)pct);
2213 2215 }
2214 2216
2215 2217 /*
2216 2218 * Return information used by ps(1).
2217 2219 */
2218 2220 void
2219 2221 prgetpsinfo(proc_t *p, psinfo_t *psp)
2220 2222 {
2221 2223 kthread_t *t;
2222 2224 struct cred *cred;
2223 2225 hrtime_t hrutime, hrstime;
2224 2226
2225 2227 ASSERT(MUTEX_HELD(&p->p_lock));
2226 2228
2227 2229 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2228 2230 bzero(psp, sizeof (*psp));
2229 2231 else {
2230 2232 thread_unlock(t);
2231 2233 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2232 2234 }
2233 2235
2234 2236 /*
2235 2237 * only export SSYS and SMSACCT; everything else is off-limits to
2236 2238 * userland apps.
2237 2239 */
2238 2240 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2239 2241 psp->pr_nlwp = p->p_lwpcnt;
2240 2242 psp->pr_nzomb = p->p_zombcnt;
2241 2243 mutex_enter(&p->p_crlock);
2242 2244 cred = p->p_cred;
2243 2245 psp->pr_uid = crgetruid(cred);
2244 2246 psp->pr_euid = crgetuid(cred);
2245 2247 psp->pr_gid = crgetrgid(cred);
2246 2248 psp->pr_egid = crgetgid(cred);
2247 2249 mutex_exit(&p->p_crlock);
2248 2250 psp->pr_pid = p->p_pid;
2249 2251 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2250 2252 (p->p_flag & SZONETOP)) {
2251 2253 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2252 2254 /*
2253 2255 * Inside local zones, fake zsched's pid as parent pids for
2254 2256 * processes which reference processes outside of the zone.
2255 2257 */
2256 2258 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2257 2259 } else {
2258 2260 psp->pr_ppid = p->p_ppid;
2259 2261 }
2260 2262 psp->pr_pgid = p->p_pgrp;
2261 2263 psp->pr_sid = p->p_sessp->s_sid;
2262 2264 psp->pr_taskid = p->p_task->tk_tkid;
2263 2265 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2264 2266 psp->pr_poolid = p->p_pool->pool_id;
2265 2267 psp->pr_zoneid = p->p_zone->zone_id;
2266 2268 if ((psp->pr_contract = PRCTID(p)) == 0)
2267 2269 psp->pr_contract = -1;
2268 2270 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2269 2271 switch (p->p_model) {
2270 2272 case DATAMODEL_ILP32:
2271 2273 psp->pr_dmodel = PR_MODEL_ILP32;
2272 2274 break;
2273 2275 case DATAMODEL_LP64:
2274 2276 psp->pr_dmodel = PR_MODEL_LP64;
2275 2277 break;
2276 2278 }
2277 2279 hrutime = mstate_aggr_state(p, LMS_USER);
2278 2280 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2279 2281 hrt2ts((hrutime + hrstime), &psp->pr_time);
2280 2282 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2281 2283
2282 2284 if (t == NULL) {
2283 2285 int wcode = p->p_wcode; /* must be atomic read */
2284 2286
2285 2287 if (wcode)
2286 2288 psp->pr_wstat = wstat(wcode, p->p_wdata);
2287 2289 psp->pr_ttydev = PRNODEV;
2288 2290 psp->pr_lwp.pr_state = SZOMB;
2289 2291 psp->pr_lwp.pr_sname = 'Z';
2290 2292 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2291 2293 psp->pr_lwp.pr_bindpset = PS_NONE;
2292 2294 } else {
2293 2295 user_t *up = PTOU(p);
2294 2296 struct as *as;
2295 2297 dev_t d;
2296 2298 extern dev_t rwsconsdev, rconsdev, uconsdev;
2297 2299
2298 2300 d = cttydev(p);
2299 2301 /*
2300 2302 * If the controlling terminal is the real
2301 2303 * or workstation console device, map to what the
2302 2304 * user thinks is the console device. Handle case when
2303 2305 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2304 2306 */
2305 2307 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2306 2308 d = uconsdev;
2307 2309 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2308 2310 psp->pr_start = up->u_start;
2309 2311 bcopy(up->u_comm, psp->pr_fname,
2310 2312 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2311 2313 bcopy(up->u_psargs, psp->pr_psargs,
2312 2314 MIN(PRARGSZ-1, PSARGSZ));
2313 2315 psp->pr_argc = up->u_argc;
2314 2316 psp->pr_argv = up->u_argv;
2315 2317 psp->pr_envp = up->u_envp;
2316 2318
2317 2319 /* get the chosen lwp's lwpsinfo */
2318 2320 prgetlwpsinfo(t, &psp->pr_lwp);
2319 2321
2320 2322 /* compute %cpu for the process */
2321 2323 if (p->p_lwpcnt == 1)
2322 2324 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323 2325 else {
2324 2326 uint64_t pct = 0;
2325 2327 hrtime_t cur_time = gethrtime_unscaled();
2326 2328
2327 2329 t = p->p_tlist;
2328 2330 do {
2329 2331 pct += cpu_update_pct(t, cur_time);
2330 2332 } while ((t = t->t_forw) != p->p_tlist);
2331 2333
2332 2334 psp->pr_pctcpu = prgetpctcpu(pct);
2333 2335 }
2334 2336 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335 2337 psp->pr_size = 0;
2336 2338 psp->pr_rssize = 0;
2337 2339 } else {
2338 2340 mutex_exit(&p->p_lock);
2339 2341 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2340 2342 psp->pr_size = btopr(as->a_resvsize) *
2341 2343 (PAGESIZE / 1024);
2342 2344 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343 2345 psp->pr_pctmem = rm_pctmemory(as);
2344 2346 AS_LOCK_EXIT(as, &as->a_lock);
2345 2347 mutex_enter(&p->p_lock);
2346 2348 }
2347 2349 }
2348 2350 }
2349 2351
2350 2352 #ifdef _SYSCALL32_IMPL
2351 2353 void
2352 2354 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2353 2355 {
2354 2356 kthread_t *t;
2355 2357 struct cred *cred;
2356 2358 hrtime_t hrutime, hrstime;
2357 2359
2358 2360 ASSERT(MUTEX_HELD(&p->p_lock));
2359 2361
2360 2362 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2361 2363 bzero(psp, sizeof (*psp));
2362 2364 else {
2363 2365 thread_unlock(t);
2364 2366 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2365 2367 }
2366 2368
2367 2369 /*
2368 2370 * only export SSYS and SMSACCT; everything else is off-limits to
2369 2371 * userland apps.
2370 2372 */
2371 2373 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2372 2374 psp->pr_nlwp = p->p_lwpcnt;
2373 2375 psp->pr_nzomb = p->p_zombcnt;
2374 2376 mutex_enter(&p->p_crlock);
2375 2377 cred = p->p_cred;
2376 2378 psp->pr_uid = crgetruid(cred);
2377 2379 psp->pr_euid = crgetuid(cred);
2378 2380 psp->pr_gid = crgetrgid(cred);
2379 2381 psp->pr_egid = crgetgid(cred);
2380 2382 mutex_exit(&p->p_crlock);
2381 2383 psp->pr_pid = p->p_pid;
2382 2384 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2383 2385 (p->p_flag & SZONETOP)) {
2384 2386 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2385 2387 /*
2386 2388 * Inside local zones, fake zsched's pid as parent pids for
2387 2389 * processes which reference processes outside of the zone.
2388 2390 */
2389 2391 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2390 2392 } else {
2391 2393 psp->pr_ppid = p->p_ppid;
2392 2394 }
2393 2395 psp->pr_pgid = p->p_pgrp;
2394 2396 psp->pr_sid = p->p_sessp->s_sid;
2395 2397 psp->pr_taskid = p->p_task->tk_tkid;
2396 2398 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2397 2399 psp->pr_poolid = p->p_pool->pool_id;
2398 2400 psp->pr_zoneid = p->p_zone->zone_id;
2399 2401 if ((psp->pr_contract = PRCTID(p)) == 0)
2400 2402 psp->pr_contract = -1;
2401 2403 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2402 2404 switch (p->p_model) {
2403 2405 case DATAMODEL_ILP32:
2404 2406 psp->pr_dmodel = PR_MODEL_ILP32;
2405 2407 break;
2406 2408 case DATAMODEL_LP64:
2407 2409 psp->pr_dmodel = PR_MODEL_LP64;
2408 2410 break;
2409 2411 }
2410 2412 hrutime = mstate_aggr_state(p, LMS_USER);
2411 2413 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2412 2414 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2413 2415 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2414 2416
2415 2417 if (t == NULL) {
2416 2418 extern int wstat(int, int); /* needs a header file */
2417 2419 int wcode = p->p_wcode; /* must be atomic read */
2418 2420
2419 2421 if (wcode)
2420 2422 psp->pr_wstat = wstat(wcode, p->p_wdata);
2421 2423 psp->pr_ttydev = PRNODEV32;
2422 2424 psp->pr_lwp.pr_state = SZOMB;
2423 2425 psp->pr_lwp.pr_sname = 'Z';
2424 2426 } else {
2425 2427 user_t *up = PTOU(p);
2426 2428 struct as *as;
2427 2429 dev_t d;
2428 2430 extern dev_t rwsconsdev, rconsdev, uconsdev;
2429 2431
2430 2432 d = cttydev(p);
2431 2433 /*
2432 2434 * If the controlling terminal is the real
2433 2435 * or workstation console device, map to what the
2434 2436 * user thinks is the console device. Handle case when
2435 2437 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2436 2438 */
2437 2439 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2438 2440 d = uconsdev;
2439 2441 (void) cmpldev(&psp->pr_ttydev, d);
2440 2442 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2441 2443 bcopy(up->u_comm, psp->pr_fname,
2442 2444 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2443 2445 bcopy(up->u_psargs, psp->pr_psargs,
2444 2446 MIN(PRARGSZ-1, PSARGSZ));
2445 2447 psp->pr_argc = up->u_argc;
2446 2448 psp->pr_argv = (caddr32_t)up->u_argv;
2447 2449 psp->pr_envp = (caddr32_t)up->u_envp;
2448 2450
2449 2451 /* get the chosen lwp's lwpsinfo */
2450 2452 prgetlwpsinfo32(t, &psp->pr_lwp);
2451 2453
2452 2454 /* compute %cpu for the process */
2453 2455 if (p->p_lwpcnt == 1)
2454 2456 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455 2457 else {
2456 2458 uint64_t pct = 0;
2457 2459 hrtime_t cur_time;
2458 2460
2459 2461 t = p->p_tlist;
2460 2462 cur_time = gethrtime_unscaled();
2461 2463 do {
2462 2464 pct += cpu_update_pct(t, cur_time);
2463 2465 } while ((t = t->t_forw) != p->p_tlist);
2464 2466
2465 2467 psp->pr_pctcpu = prgetpctcpu(pct);
2466 2468 }
2467 2469 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468 2470 psp->pr_size = 0;
2469 2471 psp->pr_rssize = 0;
2470 2472 } else {
2471 2473 mutex_exit(&p->p_lock);
2472 2474 AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
2473 2475 psp->pr_size = (size32_t)
2474 2476 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475 2477 psp->pr_rssize = (size32_t)
2476 2478 (rm_asrss(as) * (PAGESIZE / 1024));
2477 2479 psp->pr_pctmem = rm_pctmemory(as);
2478 2480 AS_LOCK_EXIT(as, &as->a_lock);
2479 2481 mutex_enter(&p->p_lock);
2480 2482 }
2481 2483 }
2482 2484
2483 2485 /*
2484 2486 * If we are looking at an LP64 process, zero out
2485 2487 * the fields that cannot be represented in ILP32.
2486 2488 */
2487 2489 if (p->p_model != DATAMODEL_ILP32) {
2488 2490 psp->pr_size = 0;
2489 2491 psp->pr_rssize = 0;
2490 2492 psp->pr_argv = 0;
2491 2493 psp->pr_envp = 0;
2492 2494 }
2493 2495 }
2494 2496
2495 2497 #endif /* _SYSCALL32_IMPL */
2496 2498
2497 2499 void
2498 2500 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2499 2501 {
2500 2502 klwp_t *lwp = ttolwp(t);
2501 2503 sobj_ops_t *sobj;
2502 2504 char c, state;
2503 2505 uint64_t pct;
2504 2506 int retval, niceval;
2505 2507 hrtime_t hrutime, hrstime;
2506 2508
2507 2509 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2508 2510
2509 2511 bzero(psp, sizeof (*psp));
2510 2512
2511 2513 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2512 2514 psp->pr_lwpid = t->t_tid;
2513 2515 psp->pr_addr = (uintptr_t)t;
2514 2516 psp->pr_wchan = (uintptr_t)t->t_wchan;
2515 2517
2516 2518 /* map the thread state enum into a process state enum */
2517 2519 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2518 2520 switch (state) {
2519 2521 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2520 2522 case TS_RUN: state = SRUN; c = 'R'; break;
2521 2523 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2522 2524 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2523 2525 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2524 2526 case TS_WAIT: state = SWAIT; c = 'W'; break;
2525 2527 default: state = 0; c = '?'; break;
2526 2528 }
2527 2529 psp->pr_state = state;
2528 2530 psp->pr_sname = c;
2529 2531 if ((sobj = t->t_sobj_ops) != NULL)
2530 2532 psp->pr_stype = SOBJ_TYPE(sobj);
2531 2533 retval = CL_DONICE(t, NULL, 0, &niceval);
2532 2534 if (retval == 0) {
2533 2535 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2534 2536 psp->pr_nice = niceval + NZERO;
2535 2537 }
2536 2538 psp->pr_syscall = t->t_sysnum;
2537 2539 psp->pr_pri = t->t_pri;
2538 2540 psp->pr_start.tv_sec = t->t_start;
2539 2541 psp->pr_start.tv_nsec = 0L;
2540 2542 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2541 2543 scalehrtime(&hrutime);
2542 2544 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2543 2545 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2544 2546 scalehrtime(&hrstime);
2545 2547 hrt2ts(hrutime + hrstime, &psp->pr_time);
2546 2548 /* compute %cpu for the lwp */
2547 2549 pct = cpu_update_pct(t, gethrtime_unscaled());
2548 2550 psp->pr_pctcpu = prgetpctcpu(pct);
2549 2551 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2550 2552 if (psp->pr_cpu > 99)
2551 2553 psp->pr_cpu = 99;
2552 2554
2553 2555 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2554 2556 sizeof (psp->pr_clname) - 1);
2555 2557 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2556 2558 psp->pr_onpro = t->t_cpu->cpu_id;
2557 2559 psp->pr_bindpro = t->t_bind_cpu;
2558 2560 psp->pr_bindpset = t->t_bind_pset;
2559 2561 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2560 2562 }
2561 2563
2562 2564 #ifdef _SYSCALL32_IMPL
2563 2565 void
2564 2566 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2565 2567 {
2566 2568 proc_t *p = ttoproc(t);
2567 2569 klwp_t *lwp = ttolwp(t);
2568 2570 sobj_ops_t *sobj;
2569 2571 char c, state;
2570 2572 uint64_t pct;
2571 2573 int retval, niceval;
2572 2574 hrtime_t hrutime, hrstime;
2573 2575
2574 2576 ASSERT(MUTEX_HELD(&p->p_lock));
2575 2577
2576 2578 bzero(psp, sizeof (*psp));
2577 2579
2578 2580 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2579 2581 psp->pr_lwpid = t->t_tid;
2580 2582 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2581 2583 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2582 2584
2583 2585 /* map the thread state enum into a process state enum */
2584 2586 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2585 2587 switch (state) {
2586 2588 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2587 2589 case TS_RUN: state = SRUN; c = 'R'; break;
2588 2590 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2589 2591 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2590 2592 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2591 2593 case TS_WAIT: state = SWAIT; c = 'W'; break;
2592 2594 default: state = 0; c = '?'; break;
2593 2595 }
2594 2596 psp->pr_state = state;
2595 2597 psp->pr_sname = c;
2596 2598 if ((sobj = t->t_sobj_ops) != NULL)
2597 2599 psp->pr_stype = SOBJ_TYPE(sobj);
2598 2600 retval = CL_DONICE(t, NULL, 0, &niceval);
2599 2601 if (retval == 0) {
2600 2602 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2601 2603 psp->pr_nice = niceval + NZERO;
2602 2604 } else {
2603 2605 psp->pr_oldpri = 0;
2604 2606 psp->pr_nice = 0;
2605 2607 }
2606 2608 psp->pr_syscall = t->t_sysnum;
2607 2609 psp->pr_pri = t->t_pri;
2608 2610 psp->pr_start.tv_sec = (time32_t)t->t_start;
2609 2611 psp->pr_start.tv_nsec = 0L;
2610 2612 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2611 2613 scalehrtime(&hrutime);
2612 2614 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2613 2615 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2614 2616 scalehrtime(&hrstime);
2615 2617 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2616 2618 /* compute %cpu for the lwp */
2617 2619 pct = cpu_update_pct(t, gethrtime_unscaled());
2618 2620 psp->pr_pctcpu = prgetpctcpu(pct);
2619 2621 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2620 2622 if (psp->pr_cpu > 99)
2621 2623 psp->pr_cpu = 99;
2622 2624
2623 2625 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2624 2626 sizeof (psp->pr_clname) - 1);
2625 2627 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2626 2628 psp->pr_onpro = t->t_cpu->cpu_id;
2627 2629 psp->pr_bindpro = t->t_bind_cpu;
2628 2630 psp->pr_bindpset = t->t_bind_pset;
2629 2631 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2630 2632 }
2631 2633 #endif /* _SYSCALL32_IMPL */
2632 2634
2633 2635 #ifdef _SYSCALL32_IMPL
2634 2636
2635 2637 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2636 2638
2637 2639 #define PR_COPY_FIELD_ILP32(s, d, field) \
2638 2640 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2639 2641 d->field = s->field; \
2640 2642 }
2641 2643
2642 2644 #define PR_COPY_TIMESPEC(s, d, field) \
2643 2645 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2644 2646
2645 2647 #define PR_COPY_BUF(s, d, field) \
2646 2648 bcopy(s->field, d->field, sizeof (d->field));
2647 2649
2648 2650 #define PR_IGNORE_FIELD(s, d, field)
2649 2651
2650 2652 void
2651 2653 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2652 2654 {
2653 2655 bzero(dest, sizeof (*dest));
2654 2656
2655 2657 PR_COPY_FIELD(src, dest, pr_flag);
2656 2658 PR_COPY_FIELD(src, dest, pr_lwpid);
2657 2659 PR_IGNORE_FIELD(src, dest, pr_addr);
2658 2660 PR_IGNORE_FIELD(src, dest, pr_wchan);
2659 2661 PR_COPY_FIELD(src, dest, pr_stype);
2660 2662 PR_COPY_FIELD(src, dest, pr_state);
2661 2663 PR_COPY_FIELD(src, dest, pr_sname);
2662 2664 PR_COPY_FIELD(src, dest, pr_nice);
2663 2665 PR_COPY_FIELD(src, dest, pr_syscall);
2664 2666 PR_COPY_FIELD(src, dest, pr_oldpri);
2665 2667 PR_COPY_FIELD(src, dest, pr_cpu);
2666 2668 PR_COPY_FIELD(src, dest, pr_pri);
2667 2669 PR_COPY_FIELD(src, dest, pr_pctcpu);
2668 2670 PR_COPY_TIMESPEC(src, dest, pr_start);
2669 2671 PR_COPY_BUF(src, dest, pr_clname);
2670 2672 PR_COPY_BUF(src, dest, pr_name);
2671 2673 PR_COPY_FIELD(src, dest, pr_onpro);
2672 2674 PR_COPY_FIELD(src, dest, pr_bindpro);
2673 2675 PR_COPY_FIELD(src, dest, pr_bindpset);
2674 2676 PR_COPY_FIELD(src, dest, pr_lgrp);
2675 2677 }
2676 2678
2677 2679 void
2678 2680 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2679 2681 {
2680 2682 bzero(dest, sizeof (*dest));
2681 2683
2682 2684 PR_COPY_FIELD(src, dest, pr_flag);
2683 2685 PR_COPY_FIELD(src, dest, pr_nlwp);
2684 2686 PR_COPY_FIELD(src, dest, pr_pid);
2685 2687 PR_COPY_FIELD(src, dest, pr_ppid);
2686 2688 PR_COPY_FIELD(src, dest, pr_pgid);
2687 2689 PR_COPY_FIELD(src, dest, pr_sid);
2688 2690 PR_COPY_FIELD(src, dest, pr_uid);
2689 2691 PR_COPY_FIELD(src, dest, pr_euid);
2690 2692 PR_COPY_FIELD(src, dest, pr_gid);
2691 2693 PR_COPY_FIELD(src, dest, pr_egid);
2692 2694 PR_IGNORE_FIELD(src, dest, pr_addr);
2693 2695 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2694 2696 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2695 2697 PR_COPY_FIELD(src, dest, pr_ttydev);
2696 2698 PR_COPY_FIELD(src, dest, pr_pctcpu);
2697 2699 PR_COPY_FIELD(src, dest, pr_pctmem);
2698 2700 PR_COPY_TIMESPEC(src, dest, pr_start);
2699 2701 PR_COPY_TIMESPEC(src, dest, pr_time);
2700 2702 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2701 2703 PR_COPY_BUF(src, dest, pr_fname);
2702 2704 PR_COPY_BUF(src, dest, pr_psargs);
2703 2705 PR_COPY_FIELD(src, dest, pr_wstat);
2704 2706 PR_COPY_FIELD(src, dest, pr_argc);
2705 2707 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2706 2708 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2707 2709 PR_COPY_FIELD(src, dest, pr_dmodel);
2708 2710 PR_COPY_FIELD(src, dest, pr_taskid);
2709 2711 PR_COPY_FIELD(src, dest, pr_projid);
2710 2712 PR_COPY_FIELD(src, dest, pr_nzomb);
2711 2713 PR_COPY_FIELD(src, dest, pr_poolid);
2712 2714 PR_COPY_FIELD(src, dest, pr_contract);
2713 2715 PR_COPY_FIELD(src, dest, pr_poolid);
2714 2716 PR_COPY_FIELD(src, dest, pr_poolid);
2715 2717
2716 2718 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2717 2719 }
2718 2720
2719 2721 #undef PR_COPY_FIELD
2720 2722 #undef PR_COPY_FIELD_ILP32
2721 2723 #undef PR_COPY_TIMESPEC
2722 2724 #undef PR_COPY_BUF
2723 2725 #undef PR_IGNORE_FIELD
2724 2726
2725 2727 #endif /* _SYSCALL32_IMPL */
2726 2728
2727 2729 /*
2728 2730 * This used to get called when microstate accounting was disabled but
2729 2731 * microstate information was requested. Since Microstate accounting is on
2730 2732 * regardless of the proc flags, this simply makes it appear to procfs that
2731 2733 * microstate accounting is on. This is relatively meaningless since you
2732 2734 * can't turn it off, but this is here for the sake of appearances.
2733 2735 */
2734 2736
2735 2737 /*ARGSUSED*/
2736 2738 void
2737 2739 estimate_msacct(kthread_t *t, hrtime_t curtime)
2738 2740 {
2739 2741 proc_t *p;
2740 2742
2741 2743 if (t == NULL)
2742 2744 return;
2743 2745
2744 2746 p = ttoproc(t);
2745 2747 ASSERT(MUTEX_HELD(&p->p_lock));
2746 2748
2747 2749 /*
2748 2750 * A system process (p0) could be referenced if the thread is
2749 2751 * in the process of exiting. Don't turn on microstate accounting
2750 2752 * in that case.
2751 2753 */
2752 2754 if (p->p_flag & SSYS)
2753 2755 return;
2754 2756
2755 2757 /*
2756 2758 * Loop through all the LWPs (kernel threads) in the process.
2757 2759 */
2758 2760 t = p->p_tlist;
2759 2761 do {
2760 2762 t->t_proc_flag |= TP_MSACCT;
2761 2763 } while ((t = t->t_forw) != p->p_tlist);
2762 2764
2763 2765 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2764 2766 }
2765 2767
2766 2768 /*
2767 2769 * It's not really possible to disable microstate accounting anymore.
2768 2770 * However, this routine simply turns off the ms accounting flags in a process
2769 2771 * This way procfs can still pretend to turn microstate accounting on and
2770 2772 * off for a process, but it actually doesn't do anything. This is
2771 2773 * a neutered form of preemptive idiot-proofing.
2772 2774 */
2773 2775 void
2774 2776 disable_msacct(proc_t *p)
2775 2777 {
2776 2778 kthread_t *t;
2777 2779
2778 2780 ASSERT(MUTEX_HELD(&p->p_lock));
2779 2781
2780 2782 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2781 2783 /*
2782 2784 * Loop through all the LWPs (kernel threads) in the process.
2783 2785 */
2784 2786 if ((t = p->p_tlist) != NULL) {
2785 2787 do {
2786 2788 /* clear per-thread flag */
2787 2789 t->t_proc_flag &= ~TP_MSACCT;
2788 2790 } while ((t = t->t_forw) != p->p_tlist);
2789 2791 }
2790 2792 }
2791 2793
2792 2794 /*
2793 2795 * Return resource usage information.
2794 2796 */
2795 2797 void
2796 2798 prgetusage(kthread_t *t, prhusage_t *pup)
2797 2799 {
2798 2800 klwp_t *lwp = ttolwp(t);
2799 2801 hrtime_t *mstimep;
2800 2802 struct mstate *ms = &lwp->lwp_mstate;
2801 2803 int state;
2802 2804 int i;
2803 2805 hrtime_t curtime;
2804 2806 hrtime_t waitrq;
2805 2807 hrtime_t tmp1;
2806 2808
2807 2809 curtime = gethrtime_unscaled();
2808 2810
2809 2811 pup->pr_lwpid = t->t_tid;
2810 2812 pup->pr_count = 1;
2811 2813 pup->pr_create = ms->ms_start;
2812 2814 pup->pr_term = ms->ms_term;
2813 2815 scalehrtime(&pup->pr_create);
2814 2816 scalehrtime(&pup->pr_term);
2815 2817 if (ms->ms_term == 0) {
2816 2818 pup->pr_rtime = curtime - ms->ms_start;
2817 2819 scalehrtime(&pup->pr_rtime);
2818 2820 } else {
2819 2821 pup->pr_rtime = ms->ms_term - ms->ms_start;
2820 2822 scalehrtime(&pup->pr_rtime);
2821 2823 }
2822 2824
2823 2825
2824 2826 pup->pr_utime = ms->ms_acct[LMS_USER];
2825 2827 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2826 2828 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2827 2829 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2828 2830 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2829 2831 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2830 2832 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2831 2833 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2832 2834 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2833 2835 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2834 2836
2835 2837 prscaleusage(pup);
2836 2838
2837 2839 /*
2838 2840 * Adjust for time waiting in the dispatcher queue.
2839 2841 */
2840 2842 waitrq = t->t_waitrq; /* hopefully atomic */
2841 2843 if (waitrq != 0) {
2842 2844 if (waitrq > curtime) {
2843 2845 curtime = gethrtime_unscaled();
2844 2846 }
2845 2847 tmp1 = curtime - waitrq;
2846 2848 scalehrtime(&tmp1);
2847 2849 pup->pr_wtime += tmp1;
2848 2850 curtime = waitrq;
2849 2851 }
2850 2852
2851 2853 /*
2852 2854 * Adjust for time spent in current microstate.
2853 2855 */
2854 2856 if (ms->ms_state_start > curtime) {
2855 2857 curtime = gethrtime_unscaled();
2856 2858 }
2857 2859
2858 2860 i = 0;
2859 2861 do {
2860 2862 switch (state = t->t_mstate) {
2861 2863 case LMS_SLEEP:
2862 2864 /*
2863 2865 * Update the timer for the current sleep state.
2864 2866 */
2865 2867 switch (state = ms->ms_prev) {
2866 2868 case LMS_TFAULT:
2867 2869 case LMS_DFAULT:
2868 2870 case LMS_KFAULT:
2869 2871 case LMS_USER_LOCK:
2870 2872 break;
2871 2873 default:
2872 2874 state = LMS_SLEEP;
2873 2875 break;
2874 2876 }
2875 2877 break;
2876 2878 case LMS_TFAULT:
2877 2879 case LMS_DFAULT:
2878 2880 case LMS_KFAULT:
2879 2881 case LMS_USER_LOCK:
2880 2882 state = LMS_SYSTEM;
2881 2883 break;
2882 2884 }
2883 2885 switch (state) {
2884 2886 case LMS_USER: mstimep = &pup->pr_utime; break;
2885 2887 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2886 2888 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2887 2889 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2888 2890 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2889 2891 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2890 2892 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2891 2893 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2892 2894 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2893 2895 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2894 2896 default: panic("prgetusage: unknown microstate");
2895 2897 }
2896 2898 tmp1 = curtime - ms->ms_state_start;
2897 2899 if (tmp1 < 0) {
2898 2900 curtime = gethrtime_unscaled();
2899 2901 i++;
2900 2902 continue;
2901 2903 }
2902 2904 scalehrtime(&tmp1);
2903 2905 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2904 2906
2905 2907 *mstimep += tmp1;
2906 2908
2907 2909 /* update pup timestamp */
2908 2910 pup->pr_tstamp = curtime;
2909 2911 scalehrtime(&pup->pr_tstamp);
2910 2912
2911 2913 /*
2912 2914 * Resource usage counters.
2913 2915 */
2914 2916 pup->pr_minf = lwp->lwp_ru.minflt;
2915 2917 pup->pr_majf = lwp->lwp_ru.majflt;
2916 2918 pup->pr_nswap = lwp->lwp_ru.nswap;
2917 2919 pup->pr_inblk = lwp->lwp_ru.inblock;
2918 2920 pup->pr_oublk = lwp->lwp_ru.oublock;
2919 2921 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2920 2922 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2921 2923 pup->pr_sigs = lwp->lwp_ru.nsignals;
2922 2924 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2923 2925 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2924 2926 pup->pr_sysc = lwp->lwp_ru.sysc;
2925 2927 pup->pr_ioch = lwp->lwp_ru.ioch;
2926 2928 }
2927 2929
2928 2930 /*
2929 2931 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2930 2932 */
2931 2933 void
2932 2934 prscaleusage(prhusage_t *usg)
2933 2935 {
2934 2936 scalehrtime(&usg->pr_utime);
2935 2937 scalehrtime(&usg->pr_stime);
2936 2938 scalehrtime(&usg->pr_ttime);
2937 2939 scalehrtime(&usg->pr_tftime);
2938 2940 scalehrtime(&usg->pr_dftime);
2939 2941 scalehrtime(&usg->pr_kftime);
2940 2942 scalehrtime(&usg->pr_ltime);
2941 2943 scalehrtime(&usg->pr_slptime);
2942 2944 scalehrtime(&usg->pr_wtime);
2943 2945 scalehrtime(&usg->pr_stoptime);
2944 2946 }
2945 2947
2946 2948
2947 2949 /*
2948 2950 * Sum resource usage information.
2949 2951 */
2950 2952 void
2951 2953 praddusage(kthread_t *t, prhusage_t *pup)
2952 2954 {
2953 2955 klwp_t *lwp = ttolwp(t);
2954 2956 hrtime_t *mstimep;
2955 2957 struct mstate *ms = &lwp->lwp_mstate;
2956 2958 int state;
2957 2959 int i;
2958 2960 hrtime_t curtime;
2959 2961 hrtime_t waitrq;
2960 2962 hrtime_t tmp;
2961 2963 prhusage_t conv;
2962 2964
2963 2965 curtime = gethrtime_unscaled();
2964 2966
2965 2967 if (ms->ms_term == 0) {
2966 2968 tmp = curtime - ms->ms_start;
2967 2969 scalehrtime(&tmp);
2968 2970 pup->pr_rtime += tmp;
2969 2971 } else {
2970 2972 tmp = ms->ms_term - ms->ms_start;
2971 2973 scalehrtime(&tmp);
2972 2974 pup->pr_rtime += tmp;
2973 2975 }
2974 2976
2975 2977 conv.pr_utime = ms->ms_acct[LMS_USER];
2976 2978 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2977 2979 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2978 2980 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2979 2981 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2980 2982 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2981 2983 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2982 2984 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2983 2985 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2984 2986 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2985 2987
2986 2988 prscaleusage(&conv);
2987 2989
2988 2990 pup->pr_utime += conv.pr_utime;
2989 2991 pup->pr_stime += conv.pr_stime;
2990 2992 pup->pr_ttime += conv.pr_ttime;
2991 2993 pup->pr_tftime += conv.pr_tftime;
2992 2994 pup->pr_dftime += conv.pr_dftime;
2993 2995 pup->pr_kftime += conv.pr_kftime;
2994 2996 pup->pr_ltime += conv.pr_ltime;
2995 2997 pup->pr_slptime += conv.pr_slptime;
2996 2998 pup->pr_wtime += conv.pr_wtime;
2997 2999 pup->pr_stoptime += conv.pr_stoptime;
2998 3000
2999 3001 /*
3000 3002 * Adjust for time waiting in the dispatcher queue.
3001 3003 */
3002 3004 waitrq = t->t_waitrq; /* hopefully atomic */
3003 3005 if (waitrq != 0) {
3004 3006 if (waitrq > curtime) {
3005 3007 curtime = gethrtime_unscaled();
3006 3008 }
3007 3009 tmp = curtime - waitrq;
3008 3010 scalehrtime(&tmp);
3009 3011 pup->pr_wtime += tmp;
3010 3012 curtime = waitrq;
3011 3013 }
3012 3014
3013 3015 /*
3014 3016 * Adjust for time spent in current microstate.
3015 3017 */
3016 3018 if (ms->ms_state_start > curtime) {
3017 3019 curtime = gethrtime_unscaled();
3018 3020 }
3019 3021
3020 3022 i = 0;
3021 3023 do {
3022 3024 switch (state = t->t_mstate) {
3023 3025 case LMS_SLEEP:
3024 3026 /*
3025 3027 * Update the timer for the current sleep state.
3026 3028 */
3027 3029 switch (state = ms->ms_prev) {
3028 3030 case LMS_TFAULT:
3029 3031 case LMS_DFAULT:
3030 3032 case LMS_KFAULT:
3031 3033 case LMS_USER_LOCK:
3032 3034 break;
3033 3035 default:
3034 3036 state = LMS_SLEEP;
3035 3037 break;
3036 3038 }
3037 3039 break;
3038 3040 case LMS_TFAULT:
3039 3041 case LMS_DFAULT:
3040 3042 case LMS_KFAULT:
3041 3043 case LMS_USER_LOCK:
3042 3044 state = LMS_SYSTEM;
3043 3045 break;
3044 3046 }
3045 3047 switch (state) {
3046 3048 case LMS_USER: mstimep = &pup->pr_utime; break;
3047 3049 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3048 3050 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3049 3051 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3050 3052 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3051 3053 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3052 3054 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3053 3055 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3054 3056 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3055 3057 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3056 3058 default: panic("praddusage: unknown microstate");
3057 3059 }
3058 3060 tmp = curtime - ms->ms_state_start;
3059 3061 if (tmp < 0) {
3060 3062 curtime = gethrtime_unscaled();
3061 3063 i++;
3062 3064 continue;
3063 3065 }
3064 3066 scalehrtime(&tmp);
3065 3067 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3066 3068
3067 3069 *mstimep += tmp;
3068 3070
3069 3071 /* update pup timestamp */
3070 3072 pup->pr_tstamp = curtime;
3071 3073 scalehrtime(&pup->pr_tstamp);
3072 3074
3073 3075 /*
3074 3076 * Resource usage counters.
3075 3077 */
3076 3078 pup->pr_minf += lwp->lwp_ru.minflt;
3077 3079 pup->pr_majf += lwp->lwp_ru.majflt;
3078 3080 pup->pr_nswap += lwp->lwp_ru.nswap;
3079 3081 pup->pr_inblk += lwp->lwp_ru.inblock;
3080 3082 pup->pr_oublk += lwp->lwp_ru.oublock;
3081 3083 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3082 3084 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3083 3085 pup->pr_sigs += lwp->lwp_ru.nsignals;
3084 3086 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3085 3087 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3086 3088 pup->pr_sysc += lwp->lwp_ru.sysc;
3087 3089 pup->pr_ioch += lwp->lwp_ru.ioch;
3088 3090 }
3089 3091
3090 3092 /*
3091 3093 * Convert a prhusage_t to a prusage_t.
3092 3094 * This means convert each hrtime_t to a timestruc_t
3093 3095 * and copy the count fields uint64_t => ulong_t.
3094 3096 */
3095 3097 void
3096 3098 prcvtusage(prhusage_t *pup, prusage_t *upup)
3097 3099 {
3098 3100 uint64_t *ullp;
3099 3101 ulong_t *ulp;
3100 3102 int i;
3101 3103
3102 3104 upup->pr_lwpid = pup->pr_lwpid;
3103 3105 upup->pr_count = pup->pr_count;
3104 3106
3105 3107 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3106 3108 hrt2ts(pup->pr_create, &upup->pr_create);
3107 3109 hrt2ts(pup->pr_term, &upup->pr_term);
3108 3110 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3109 3111 hrt2ts(pup->pr_utime, &upup->pr_utime);
3110 3112 hrt2ts(pup->pr_stime, &upup->pr_stime);
3111 3113 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3112 3114 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3113 3115 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3114 3116 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3115 3117 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3116 3118 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3117 3119 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3118 3120 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3119 3121 bzero(upup->filltime, sizeof (upup->filltime));
3120 3122
3121 3123 ullp = &pup->pr_minf;
3122 3124 ulp = &upup->pr_minf;
3123 3125 for (i = 0; i < 22; i++)
3124 3126 *ulp++ = (ulong_t)*ullp++;
3125 3127 }
3126 3128
3127 3129 #ifdef _SYSCALL32_IMPL
3128 3130 void
3129 3131 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3130 3132 {
3131 3133 uint64_t *ullp;
3132 3134 uint32_t *ulp;
3133 3135 int i;
3134 3136
3135 3137 upup->pr_lwpid = pup->pr_lwpid;
3136 3138 upup->pr_count = pup->pr_count;
3137 3139
3138 3140 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3139 3141 hrt2ts32(pup->pr_create, &upup->pr_create);
3140 3142 hrt2ts32(pup->pr_term, &upup->pr_term);
3141 3143 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3142 3144 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3143 3145 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3144 3146 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3145 3147 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3146 3148 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3147 3149 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3148 3150 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3149 3151 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3150 3152 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3151 3153 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3152 3154 bzero(upup->filltime, sizeof (upup->filltime));
3153 3155
3154 3156 ullp = &pup->pr_minf;
3155 3157 ulp = &upup->pr_minf;
3156 3158 for (i = 0; i < 22; i++)
3157 3159 *ulp++ = (uint32_t)*ullp++;
3158 3160 }
3159 3161 #endif /* _SYSCALL32_IMPL */
3160 3162
3161 3163 /*
3162 3164 * Determine whether a set is empty.
3163 3165 */
3164 3166 int
3165 3167 setisempty(uint32_t *sp, uint_t n)
3166 3168 {
3167 3169 while (n--)
3168 3170 if (*sp++)
3169 3171 return (0);
3170 3172 return (1);
3171 3173 }
3172 3174
3173 3175 /*
3174 3176 * Utility routine for establishing a watched area in the process.
3175 3177 * Keep the list of watched areas sorted by virtual address.
3176 3178 */
3177 3179 int
3178 3180 set_watched_area(proc_t *p, struct watched_area *pwa)
3179 3181 {
3180 3182 caddr_t vaddr = pwa->wa_vaddr;
3181 3183 caddr_t eaddr = pwa->wa_eaddr;
3182 3184 ulong_t flags = pwa->wa_flags;
3183 3185 struct watched_area *target;
3184 3186 avl_index_t where;
3185 3187 int error = 0;
3186 3188
3187 3189 /* we must not be holding p->p_lock, but the process must be locked */
3188 3190 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3189 3191 ASSERT(p->p_proc_flag & P_PR_LOCK);
3190 3192
3191 3193 /*
3192 3194 * If this is our first watchpoint, enable watchpoints for the process.
3193 3195 */
3194 3196 if (!pr_watch_active(p)) {
3195 3197 kthread_t *t;
3196 3198
3197 3199 mutex_enter(&p->p_lock);
3198 3200 if ((t = p->p_tlist) != NULL) {
3199 3201 do {
3200 3202 watch_enable(t);
3201 3203 } while ((t = t->t_forw) != p->p_tlist);
3202 3204 }
3203 3205 mutex_exit(&p->p_lock);
3204 3206 }
3205 3207
3206 3208 target = pr_find_watched_area(p, pwa, &where);
3207 3209 if (target != NULL) {
3208 3210 /*
3209 3211 * We discovered an existing, overlapping watched area.
3210 3212 * Allow it only if it is an exact match.
3211 3213 */
3212 3214 if (target->wa_vaddr != vaddr ||
3213 3215 target->wa_eaddr != eaddr)
3214 3216 error = EINVAL;
3215 3217 else if (target->wa_flags != flags) {
3216 3218 error = set_watched_page(p, vaddr, eaddr,
3217 3219 flags, target->wa_flags);
3218 3220 target->wa_flags = flags;
3219 3221 }
3220 3222 kmem_free(pwa, sizeof (struct watched_area));
3221 3223 } else {
3222 3224 avl_insert(&p->p_warea, pwa, where);
3223 3225 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3224 3226 }
3225 3227
3226 3228 return (error);
3227 3229 }
3228 3230
3229 3231 /*
3230 3232 * Utility routine for clearing a watched area in the process.
3231 3233 * Must be an exact match of the virtual address.
3232 3234 * size and flags don't matter.
3233 3235 */
3234 3236 int
3235 3237 clear_watched_area(proc_t *p, struct watched_area *pwa)
3236 3238 {
3237 3239 struct watched_area *found;
3238 3240
3239 3241 /* we must not be holding p->p_lock, but the process must be locked */
3240 3242 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3241 3243 ASSERT(p->p_proc_flag & P_PR_LOCK);
3242 3244
3243 3245
3244 3246 if (!pr_watch_active(p)) {
3245 3247 kmem_free(pwa, sizeof (struct watched_area));
3246 3248 return (0);
3247 3249 }
3248 3250
3249 3251 /*
3250 3252 * Look for a matching address in the watched areas. If a match is
3251 3253 * found, clear the old watched area and adjust the watched page(s). It
3252 3254 * is not an error if there is no match.
3253 3255 */
3254 3256 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3255 3257 found->wa_vaddr == pwa->wa_vaddr) {
3256 3258 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3257 3259 found->wa_flags);
3258 3260 avl_remove(&p->p_warea, found);
3259 3261 kmem_free(found, sizeof (struct watched_area));
3260 3262 }
3261 3263
3262 3264 kmem_free(pwa, sizeof (struct watched_area));
3263 3265
3264 3266 /*
3265 3267 * If we removed the last watched area from the process, disable
3266 3268 * watchpoints.
3267 3269 */
3268 3270 if (!pr_watch_active(p)) {
3269 3271 kthread_t *t;
3270 3272
3271 3273 mutex_enter(&p->p_lock);
3272 3274 if ((t = p->p_tlist) != NULL) {
3273 3275 do {
3274 3276 watch_disable(t);
3275 3277 } while ((t = t->t_forw) != p->p_tlist);
3276 3278 }
3277 3279 mutex_exit(&p->p_lock);
3278 3280 }
3279 3281
3280 3282 return (0);
3281 3283 }
3282 3284
3283 3285 /*
3284 3286 * Frees all the watched_area structures
3285 3287 */
3286 3288 void
3287 3289 pr_free_watchpoints(proc_t *p)
3288 3290 {
3289 3291 struct watched_area *delp;
3290 3292 void *cookie;
3291 3293
3292 3294 cookie = NULL;
3293 3295 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3294 3296 kmem_free(delp, sizeof (struct watched_area));
3295 3297
3296 3298 avl_destroy(&p->p_warea);
3297 3299 }
3298 3300
3299 3301 /*
3300 3302 * This one is called by the traced process to unwatch all the
3301 3303 * pages while deallocating the list of watched_page structs.
3302 3304 */
3303 3305 void
3304 3306 pr_free_watched_pages(proc_t *p)
3305 3307 {
3306 3308 struct as *as = p->p_as;
3307 3309 struct watched_page *pwp;
3308 3310 uint_t prot;
3309 3311 int retrycnt, err;
3310 3312 void *cookie;
3311 3313
3312 3314 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313 3315 return;
3314 3316
3315 3317 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316 3318 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3317 3319
3318 3320 pwp = avl_first(&as->a_wpage);
3319 3321
3320 3322 cookie = NULL;
3321 3323 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322 3324 retrycnt = 0;
3323 3325 if ((prot = pwp->wp_oprot) != 0) {
3324 3326 caddr_t addr = pwp->wp_vaddr;
3325 3327 struct seg *seg;
3326 3328 retry:
3327 3329
3328 3330 if ((pwp->wp_prot != prot ||
3329 3331 (pwp->wp_flags & WP_NOWATCH)) &&
3330 3332 (seg = as_segat(as, addr)) != NULL) {
3331 3333 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332 3334 if (err == IE_RETRY) {
3333 3335 ASSERT(retrycnt == 0);
3334 3336 retrycnt++;
3335 3337 goto retry;
3336 3338 }
3337 3339 }
3338 3340 }
3339 3341 kmem_free(pwp, sizeof (struct watched_page));
3340 3342 }
3341 3343
3342 3344 avl_destroy(&as->a_wpage);
3343 3345 p->p_wprot = NULL;
3344 3346
3345 3347 AS_LOCK_EXIT(as, &as->a_lock);
3346 3348 }
3347 3349
3348 3350 /*
3349 3351 * Insert a watched area into the list of watched pages.
3350 3352 * If oflags is zero then we are adding a new watched area.
3351 3353 * Otherwise we are changing the flags of an existing watched area.
3352 3354 */
3353 3355 static int
3354 3356 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3355 3357 ulong_t flags, ulong_t oflags)
3356 3358 {
3357 3359 struct as *as = p->p_as;
3358 3360 avl_tree_t *pwp_tree;
3359 3361 struct watched_page *pwp, *newpwp;
3360 3362 struct watched_page tpw;
3361 3363 avl_index_t where;
3362 3364 struct seg *seg;
3363 3365 uint_t prot;
3364 3366 caddr_t addr;
3365 3367
3366 3368 /*
3367 3369 * We need to pre-allocate a list of structures before we grab the
3368 3370 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3369 3371 * held.
3370 3372 */
3371 3373 newpwp = NULL;
3372 3374 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3373 3375 addr < eaddr; addr += PAGESIZE) {
3374 3376 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3375 3377 pwp->wp_list = newpwp;
3376 3378 newpwp = pwp;
3377 3379 }
3378 3380
3379 3381 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3380 3382
3381 3383 /*
3382 3384 * Search for an existing watched page to contain the watched area.
3383 3385 * If none is found, grab a new one from the available list
3384 3386 * and insert it in the active list, keeping the list sorted
3385 3387 * by user-level virtual address.
3386 3388 */
3387 3389 if (p->p_flag & SVFWAIT)
3388 3390 pwp_tree = &p->p_wpage;
3389 3391 else
3390 3392 pwp_tree = &as->a_wpage;
3391 3393
3392 3394 again:
3393 3395 if (avl_numnodes(pwp_tree) > prnwatch) {
3394 3396 AS_LOCK_EXIT(as, &as->a_lock);
3395 3397 while (newpwp != NULL) {
3396 3398 pwp = newpwp->wp_list;
3397 3399 kmem_free(newpwp, sizeof (struct watched_page));
3398 3400 newpwp = pwp;
3399 3401 }
3400 3402 return (E2BIG);
3401 3403 }
3402 3404
3403 3405 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3404 3406 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3405 3407 pwp = newpwp;
3406 3408 newpwp = newpwp->wp_list;
3407 3409 pwp->wp_list = NULL;
3408 3410 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3409 3411 (uintptr_t)PAGEMASK);
3410 3412 avl_insert(pwp_tree, pwp, where);
3411 3413 }
3412 3414
3413 3415 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3414 3416
3415 3417 if (oflags & WA_READ)
3416 3418 pwp->wp_read--;
3417 3419 if (oflags & WA_WRITE)
3418 3420 pwp->wp_write--;
3419 3421 if (oflags & WA_EXEC)
3420 3422 pwp->wp_exec--;
3421 3423
3422 3424 ASSERT(pwp->wp_read >= 0);
3423 3425 ASSERT(pwp->wp_write >= 0);
3424 3426 ASSERT(pwp->wp_exec >= 0);
3425 3427
3426 3428 if (flags & WA_READ)
3427 3429 pwp->wp_read++;
3428 3430 if (flags & WA_WRITE)
3429 3431 pwp->wp_write++;
3430 3432 if (flags & WA_EXEC)
3431 3433 pwp->wp_exec++;
3432 3434
3433 3435 if (!(p->p_flag & SVFWAIT)) {
3434 3436 vaddr = pwp->wp_vaddr;
3435 3437 if (pwp->wp_oprot == 0 &&
3436 3438 (seg = as_segat(as, vaddr)) != NULL) {
3437 3439 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3438 3440 pwp->wp_oprot = (uchar_t)prot;
3439 3441 pwp->wp_prot = (uchar_t)prot;
3440 3442 }
3441 3443 if (pwp->wp_oprot != 0) {
3442 3444 prot = pwp->wp_oprot;
3443 3445 if (pwp->wp_read)
3444 3446 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3445 3447 if (pwp->wp_write)
3446 3448 prot &= ~PROT_WRITE;
3447 3449 if (pwp->wp_exec)
3448 3450 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449 3451 if (!(pwp->wp_flags & WP_NOWATCH) &&
3450 3452 pwp->wp_prot != prot &&
3451 3453 (pwp->wp_flags & WP_SETPROT) == 0) {
3452 3454 pwp->wp_flags |= WP_SETPROT;
3453 3455 pwp->wp_list = p->p_wprot;
3454 3456 p->p_wprot = pwp;
3455 3457 }
3456 3458 pwp->wp_prot = (uchar_t)prot;
3457 3459 }
3458 3460 }
3459 3461
3460 3462 /*
3461 3463 * If the watched area extends into the next page then do
3462 3464 * it over again with the virtual address of the next page.
3463 3465 */
3464 3466 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3465 3467 goto again;
3466 3468
3467 3469 AS_LOCK_EXIT(as, &as->a_lock);
3468 3470
3469 3471 /*
3470 3472 * Free any pages we may have over-allocated
3471 3473 */
3472 3474 while (newpwp != NULL) {
3473 3475 pwp = newpwp->wp_list;
3474 3476 kmem_free(newpwp, sizeof (struct watched_page));
3475 3477 newpwp = pwp;
3476 3478 }
3477 3479
3478 3480 return (0);
3479 3481 }
3480 3482
3481 3483 /*
3482 3484 * Remove a watched area from the list of watched pages.
3483 3485 * A watched area may extend over more than one page.
3484 3486 */
3485 3487 static void
3486 3488 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3487 3489 {
3488 3490 struct as *as = p->p_as;
3489 3491 struct watched_page *pwp;
3490 3492 struct watched_page tpw;
3491 3493 avl_tree_t *tree;
3492 3494 avl_index_t where;
3493 3495
3494 3496 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
3495 3497
3496 3498 if (p->p_flag & SVFWAIT)
3497 3499 tree = &p->p_wpage;
3498 3500 else
3499 3501 tree = &as->a_wpage;
3500 3502
3501 3503 tpw.wp_vaddr = vaddr =
3502 3504 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3503 3505 pwp = avl_find(tree, &tpw, &where);
3504 3506 if (pwp == NULL)
3505 3507 pwp = avl_nearest(tree, where, AVL_AFTER);
3506 3508
3507 3509 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3508 3510 ASSERT(vaddr <= pwp->wp_vaddr);
3509 3511
3510 3512 if (flags & WA_READ)
3511 3513 pwp->wp_read--;
3512 3514 if (flags & WA_WRITE)
3513 3515 pwp->wp_write--;
3514 3516 if (flags & WA_EXEC)
3515 3517 pwp->wp_exec--;
3516 3518
3517 3519 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3518 3520 /*
3519 3521 * Reset the hat layer's protections on this page.
3520 3522 */
3521 3523 if (pwp->wp_oprot != 0) {
3522 3524 uint_t prot = pwp->wp_oprot;
3523 3525
3524 3526 if (pwp->wp_read)
3525 3527 prot &=
3526 3528 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3527 3529 if (pwp->wp_write)
3528 3530 prot &= ~PROT_WRITE;
3529 3531 if (pwp->wp_exec)
3530 3532 prot &=
3531 3533 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3532 3534 if (!(pwp->wp_flags & WP_NOWATCH) &&
3533 3535 pwp->wp_prot != prot &&
3534 3536 (pwp->wp_flags & WP_SETPROT) == 0) {
3535 3537 pwp->wp_flags |= WP_SETPROT;
3536 3538 pwp->wp_list = p->p_wprot;
3537 3539 p->p_wprot = pwp;
3538 3540 }
3539 3541 pwp->wp_prot = (uchar_t)prot;
3540 3542 }
3541 3543 } else {
3542 3544 /*
3543 3545 * No watched areas remain in this page.
3544 3546 * Reset everything to normal.
3545 3547 */
3546 3548 if (pwp->wp_oprot != 0) {
3547 3549 pwp->wp_prot = pwp->wp_oprot;
3548 3550 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3549 3551 pwp->wp_flags |= WP_SETPROT;
3550 3552 pwp->wp_list = p->p_wprot;
3551 3553 p->p_wprot = pwp;
3552 3554 }
3553 3555 }
3554 3556 }
3555 3557
3556 3558 pwp = AVL_NEXT(tree, pwp);
3557 3559 }
3558 3560
3559 3561 AS_LOCK_EXIT(as, &as->a_lock);
3560 3562 }
3561 3563
3562 3564 /*
3563 3565 * Return the original protections for the specified page.
3564 3566 */
3565 3567 static void
3566 3568 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3567 3569 {
3568 3570 struct watched_page *pwp;
3569 3571 struct watched_page tpw;
3570 3572
3571 3573 ASSERT(AS_LOCK_HELD(as, &as->a_lock));
3572 3574
3573 3575 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3574 3576 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3575 3577 *prot = pwp->wp_oprot;
3576 3578 }
3577 3579
3578 3580 static prpagev_t *
3579 3581 pr_pagev_create(struct seg *seg, int check_noreserve)
3580 3582 {
3581 3583 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3582 3584 size_t total_pages = seg_pages(seg);
3583 3585
3584 3586 /*
3585 3587 * Limit the size of our vectors to pagev_lim pages at a time. We need
3586 3588 * 4 or 5 bytes of storage per page, so this means we limit ourself
3587 3589 * to about a megabyte of kernel heap by default.
3588 3590 */
3589 3591 pagev->pg_npages = MIN(total_pages, pagev_lim);
3590 3592 pagev->pg_pnbase = 0;
3591 3593
3592 3594 pagev->pg_protv =
3593 3595 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3594 3596
3595 3597 if (check_noreserve)
3596 3598 pagev->pg_incore =
3597 3599 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3598 3600 else
3599 3601 pagev->pg_incore = NULL;
3600 3602
3601 3603 return (pagev);
3602 3604 }
3603 3605
3604 3606 static void
3605 3607 pr_pagev_destroy(prpagev_t *pagev)
3606 3608 {
3607 3609 if (pagev->pg_incore != NULL)
3608 3610 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3609 3611
3610 3612 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3611 3613 kmem_free(pagev, sizeof (prpagev_t));
3612 3614 }
3613 3615
3614 3616 static caddr_t
3615 3617 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3616 3618 {
3617 3619 ulong_t lastpg = seg_page(seg, eaddr - 1);
3618 3620 ulong_t pn, pnlim;
3619 3621 caddr_t saddr;
3620 3622 size_t len;
3621 3623
3622 3624 ASSERT(addr >= seg->s_base && addr <= eaddr);
3623 3625
3624 3626 if (addr == eaddr)
3625 3627 return (eaddr);
3626 3628
3627 3629 refill:
3628 3630 ASSERT(addr < eaddr);
3629 3631 pagev->pg_pnbase = seg_page(seg, addr);
3630 3632 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3631 3633 saddr = addr;
3632 3634
3633 3635 if (lastpg < pnlim)
3634 3636 len = (size_t)(eaddr - addr);
3635 3637 else
3636 3638 len = pagev->pg_npages * PAGESIZE;
3637 3639
3638 3640 if (pagev->pg_incore != NULL) {
3639 3641 /*
3640 3642 * INCORE cleverly has different semantics than GETPROT:
3641 3643 * it returns info on pages up to but NOT including addr + len.
3642 3644 */
3643 3645 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3644 3646 pn = pagev->pg_pnbase;
3645 3647
3646 3648 do {
3647 3649 /*
3648 3650 * Guilty knowledge here: We know that segvn_incore
3649 3651 * returns more than just the low-order bit that
3650 3652 * indicates the page is actually in memory. If any
3651 3653 * bits are set, then the page has backing store.
3652 3654 */
3653 3655 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3654 3656 goto out;
3655 3657
3656 3658 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3657 3659
3658 3660 /*
3659 3661 * If we examined all the pages in the vector but we're not
3660 3662 * at the end of the segment, take another lap.
3661 3663 */
3662 3664 if (addr < eaddr)
3663 3665 goto refill;
3664 3666 }
3665 3667
3666 3668 /*
3667 3669 * Need to take len - 1 because addr + len is the address of the
3668 3670 * first byte of the page just past the end of what we want.
3669 3671 */
3670 3672 out:
3671 3673 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3672 3674 return (addr);
3673 3675 }
3674 3676
3675 3677 static caddr_t
3676 3678 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3677 3679 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3678 3680 {
3679 3681 /*
3680 3682 * Our starting address is either the specified address, or the base
3681 3683 * address from the start of the pagev. If the latter is greater,
3682 3684 * this means a previous call to pr_pagev_fill has already scanned
3683 3685 * further than the end of the previous mapping.
3684 3686 */
3685 3687 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3686 3688 caddr_t addr = MAX(*saddrp, base);
3687 3689 ulong_t pn = seg_page(seg, addr);
3688 3690 uint_t prot, nprot;
3689 3691
3690 3692 /*
3691 3693 * If we're dealing with noreserve pages, then advance addr to
3692 3694 * the address of the next page which has backing store.
3693 3695 */
3694 3696 if (pagev->pg_incore != NULL) {
3695 3697 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3696 3698 if ((addr += PAGESIZE) == eaddr) {
3697 3699 *saddrp = addr;
3698 3700 prot = 0;
3699 3701 goto out;
3700 3702 }
3701 3703 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3702 3704 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3703 3705 if (addr == eaddr) {
3704 3706 *saddrp = addr;
3705 3707 prot = 0;
3706 3708 goto out;
3707 3709 }
3708 3710 pn = seg_page(seg, addr);
3709 3711 }
3710 3712 }
3711 3713 }
3712 3714
3713 3715 /*
3714 3716 * Get the protections on the page corresponding to addr.
3715 3717 */
3716 3718 pn = seg_page(seg, addr);
3717 3719 ASSERT(pn >= pagev->pg_pnbase);
3718 3720 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3719 3721
3720 3722 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3721 3723 getwatchprot(seg->s_as, addr, &prot);
3722 3724 *saddrp = addr;
3723 3725
3724 3726 /*
3725 3727 * Now loop until we find a backed page with different protections
3726 3728 * or we reach the end of this segment.
3727 3729 */
3728 3730 while ((addr += PAGESIZE) < eaddr) {
3729 3731 /*
3730 3732 * If pn has advanced to the page number following what we
3731 3733 * have information on, refill the page vector and reset
3732 3734 * addr and pn. If pr_pagev_fill does not return the
3733 3735 * address of the next page, we have a discontiguity and
3734 3736 * thus have reached the end of the current mapping.
3735 3737 */
3736 3738 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3737 3739 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3738 3740 if (naddr != addr)
3739 3741 goto out;
3740 3742 pn = seg_page(seg, addr);
3741 3743 }
3742 3744
3743 3745 /*
3744 3746 * The previous page's protections are in prot, and it has
3745 3747 * backing. If this page is MAP_NORESERVE and has no backing,
3746 3748 * then end this mapping and return the previous protections.
3747 3749 */
3748 3750 if (pagev->pg_incore != NULL &&
3749 3751 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3750 3752 break;
3751 3753
3752 3754 /*
3753 3755 * Otherwise end the mapping if this page's protections (nprot)
3754 3756 * are different than those in the previous page (prot).
3755 3757 */
3756 3758 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3757 3759 getwatchprot(seg->s_as, addr, &nprot);
3758 3760
3759 3761 if (nprot != prot)
3760 3762 break;
3761 3763 }
3762 3764
3763 3765 out:
3764 3766 *protp = prot;
3765 3767 return (addr);
3766 3768 }
3767 3769
3768 3770 size_t
3769 3771 pr_getsegsize(struct seg *seg, int reserved)
3770 3772 {
3771 3773 size_t size = seg->s_size;
3772 3774
3773 3775 /*
3774 3776 * If we're interested in the reserved space, return the size of the
3775 3777 * segment itself. Everything else in this function is a special case
3776 3778 * to determine the actual underlying size of various segment types.
3777 3779 */
3778 3780 if (reserved)
3779 3781 return (size);
3780 3782
3781 3783 /*
3782 3784 * If this is a segvn mapping of a regular file, return the smaller
3783 3785 * of the segment size and the remaining size of the file beyond
3784 3786 * the file offset corresponding to seg->s_base.
3785 3787 */
3786 3788 if (seg->s_ops == &segvn_ops) {
3787 3789 vattr_t vattr;
3788 3790 vnode_t *vp;
3789 3791
3790 3792 vattr.va_mask = AT_SIZE;
3791 3793
3792 3794 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3793 3795 vp != NULL && vp->v_type == VREG &&
3794 3796 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3795 3797
3796 3798 u_offset_t fsize = vattr.va_size;
3797 3799 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3798 3800
3799 3801 if (fsize < offset)
3800 3802 fsize = 0;
3801 3803 else
3802 3804 fsize -= offset;
3803 3805
3804 3806 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3805 3807
3806 3808 if (fsize < (u_offset_t)size)
3807 3809 size = (size_t)fsize;
3808 3810 }
3809 3811
3810 3812 return (size);
3811 3813 }
3812 3814
3813 3815 /*
3814 3816 * If this is an ISM shared segment, don't include pages that are
3815 3817 * beyond the real size of the spt segment that backs it.
3816 3818 */
3817 3819 if (seg->s_ops == &segspt_shmops)
3818 3820 return (MIN(spt_realsize(seg), size));
3819 3821
3820 3822 /*
3821 3823 * If this is segment is a mapping from /dev/null, then this is a
3822 3824 * reservation of virtual address space and has no actual size.
3823 3825 * Such segments are backed by segdev and have type set to neither
3824 3826 * MAP_SHARED nor MAP_PRIVATE.
3825 3827 */
3826 3828 if (seg->s_ops == &segdev_ops &&
3827 3829 ((SEGOP_GETTYPE(seg, seg->s_base) &
3828 3830 (MAP_SHARED | MAP_PRIVATE)) == 0))
3829 3831 return (0);
3830 3832
3831 3833 /*
3832 3834 * If this segment doesn't match one of the special types we handle,
3833 3835 * just return the size of the segment itself.
3834 3836 */
3835 3837 return (size);
3836 3838 }
3837 3839
3838 3840 uint_t
3839 3841 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840 3842 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3841 3843 {
3842 3844 struct as *as = seg->s_as;
3843 3845
3844 3846 caddr_t saddr = *saddrp;
3845 3847 caddr_t naddr;
3846 3848
3847 3849 int check_noreserve;
3848 3850 uint_t prot;
3849 3851
3850 3852 union {
3851 3853 struct segvn_data *svd;
3852 3854 struct segdev_data *sdp;
3853 3855 void *data;
3854 3856 } s;
3855 3857
3856 3858 s.data = seg->s_data;
3857 3859
3858 3860 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
3859 3861 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3860 3862 ASSERT(eaddr <= seg->s_base + seg->s_size);
3861 3863
3862 3864 /*
3863 3865 * Don't include MAP_NORESERVE pages in the address range
3864 3866 * unless their mappings have actually materialized.
3865 3867 * We cheat by knowing that segvn is the only segment
3866 3868 * driver that supports MAP_NORESERVE.
3867 3869 */
3868 3870 check_noreserve =
3869 3871 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3870 3872 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3871 3873 (s.svd->flags & MAP_NORESERVE));
3872 3874
3873 3875 /*
3874 3876 * Examine every page only as a last resort. We use guilty knowledge
3875 3877 * of segvn and segdev to avoid this: if there are no per-page
3876 3878 * protections present in the segment and we don't care about
3877 3879 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3878 3880 */
3879 3881 if (!check_noreserve && saddr == seg->s_base &&
3880 3882 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3881 3883 prot = s.svd->prot;
3882 3884 getwatchprot(as, saddr, &prot);
3883 3885 naddr = eaddr;
3884 3886
3885 3887 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3886 3888 s.sdp != NULL && s.sdp->pageprot == 0) {
3887 3889 prot = s.sdp->prot;
3888 3890 getwatchprot(as, saddr, &prot);
3889 3891 naddr = eaddr;
3890 3892
3891 3893 } else {
3892 3894 prpagev_t *pagev;
3893 3895
3894 3896 /*
3895 3897 * If addr is sitting at the start of the segment, then
3896 3898 * create a page vector to store protection and incore
3897 3899 * information for pages in the segment, and fill it.
3898 3900 * Otherwise, we expect *tmp to address the prpagev_t
3899 3901 * allocated by a previous call to this function.
3900 3902 */
3901 3903 if (saddr == seg->s_base) {
3902 3904 pagev = pr_pagev_create(seg, check_noreserve);
3903 3905 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3904 3906
3905 3907 ASSERT(*tmp == NULL);
3906 3908 *tmp = pagev;
3907 3909
3908 3910 ASSERT(saddr <= eaddr);
3909 3911 *saddrp = saddr;
3910 3912
3911 3913 if (saddr == eaddr) {
3912 3914 naddr = saddr;
3913 3915 prot = 0;
3914 3916 goto out;
3915 3917 }
3916 3918
3917 3919 } else {
3918 3920 ASSERT(*tmp != NULL);
3919 3921 pagev = (prpagev_t *)*tmp;
3920 3922 }
3921 3923
3922 3924 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3923 3925 ASSERT(naddr <= eaddr);
3924 3926 }
3925 3927
3926 3928 out:
3927 3929 if (naddr == eaddr)
3928 3930 pr_getprot_done(tmp);
3929 3931 *naddrp = naddr;
3930 3932 return (prot);
3931 3933 }
3932 3934
3933 3935 void
3934 3936 pr_getprot_done(void **tmp)
3935 3937 {
3936 3938 if (*tmp != NULL) {
3937 3939 pr_pagev_destroy((prpagev_t *)*tmp);
3938 3940 *tmp = NULL;
3939 3941 }
3940 3942 }
3941 3943
3942 3944 /*
3943 3945 * Return true iff the vnode is a /proc file from the object directory.
3944 3946 */
3945 3947 int
3946 3948 pr_isobject(vnode_t *vp)
3947 3949 {
3948 3950 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3949 3951 }
3950 3952
3951 3953 /*
3952 3954 * Return true iff the vnode is a /proc file opened by the process itself.
3953 3955 */
3954 3956 int
3955 3957 pr_isself(vnode_t *vp)
3956 3958 {
3957 3959 /*
3958 3960 * XXX: To retain binary compatibility with the old
3959 3961 * ioctl()-based version of /proc, we exempt self-opens
3960 3962 * of /proc/<pid> from being marked close-on-exec.
3961 3963 */
3962 3964 return (vn_matchops(vp, prvnodeops) &&
3963 3965 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3964 3966 VTOP(vp)->pr_type != PR_PIDDIR);
3965 3967 }
3966 3968
3967 3969 static ssize_t
3968 3970 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3969 3971 {
3970 3972 ssize_t pagesize, hatsize;
3971 3973
3972 3974 ASSERT(AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
3973 3975 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3974 3976 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3975 3977 ASSERT(saddr < eaddr);
3976 3978
3977 3979 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3978 3980 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3979 3981 ASSERT(pagesize != 0);
3980 3982
3981 3983 if (pagesize == -1)
3982 3984 pagesize = PAGESIZE;
3983 3985
3984 3986 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3985 3987
3986 3988 while (saddr < eaddr) {
3987 3989 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3988 3990 break;
3989 3991 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3990 3992 saddr += pagesize;
3991 3993 }
3992 3994
3993 3995 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3994 3996 return (hatsize);
3995 3997 }
3996 3998
3997 3999 /*
3998 4000 * Return an array of structures with extended memory map information.
3999 4001 * We allocate here; the caller must deallocate.
4000 4002 */
4001 4003 int
4002 4004 prgetxmap(proc_t *p, list_t *iolhead)
4003 4005 {
4004 4006 struct as *as = p->p_as;
4005 4007 prxmap_t *mp;
4006 4008 struct seg *seg;
4007 4009 struct seg *brkseg, *stkseg;
4008 4010 struct vnode *vp;
4009 4011 struct vattr vattr;
4010 4012 uint_t prot;
4011 4013
4012 4014 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4013 4015
4014 4016 /*
4015 4017 * Request an initial buffer size that doesn't waste memory
4016 4018 * if the address space has only a small number of segments.
4017 4019 */
4018 4020 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4019 4021
4020 4022 if ((seg = AS_SEGFIRST(as)) == NULL)
4021 4023 return (0);
4022 4024
4023 4025 brkseg = break_seg(p);
4024 4026 stkseg = as_segat(as, prgetstackbase(p));
4025 4027
4026 4028 do {
4027 4029 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4028 4030 caddr_t saddr, naddr, baddr;
4029 4031 void *tmp = NULL;
4030 4032 ssize_t psz;
4031 4033 char *parr;
4032 4034 uint64_t npages;
4033 4035 uint64_t pagenum;
4034 4036
4035 4037 /*
4036 4038 * Segment loop part one: iterate from the base of the segment
4037 4039 * to its end, pausing at each address boundary (baddr) between
4038 4040 * ranges that have different virtual memory protections.
4039 4041 */
4040 4042 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4041 4043 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4042 4044 ASSERT(baddr >= saddr && baddr <= eaddr);
4043 4045
4044 4046 /*
4045 4047 * Segment loop part two: iterate from the current
4046 4048 * position to the end of the protection boundary,
4047 4049 * pausing at each address boundary (naddr) between
4048 4050 * ranges that have different underlying page sizes.
4049 4051 */
4050 4052 for (; saddr < baddr; saddr = naddr) {
4051 4053 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4052 4054 ASSERT(naddr >= saddr && naddr <= baddr);
4053 4055
4054 4056 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4055 4057
4056 4058 mp->pr_vaddr = (uintptr_t)saddr;
4057 4059 mp->pr_size = naddr - saddr;
4058 4060 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4059 4061 mp->pr_mflags = 0;
4060 4062 if (prot & PROT_READ)
4061 4063 mp->pr_mflags |= MA_READ;
4062 4064 if (prot & PROT_WRITE)
4063 4065 mp->pr_mflags |= MA_WRITE;
4064 4066 if (prot & PROT_EXEC)
4065 4067 mp->pr_mflags |= MA_EXEC;
4066 4068 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4067 4069 mp->pr_mflags |= MA_SHARED;
4068 4070 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4069 4071 mp->pr_mflags |= MA_NORESERVE;
4070 4072 if (seg->s_ops == &segspt_shmops ||
4071 4073 (seg->s_ops == &segvn_ops &&
4072 4074 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4073 4075 vp == NULL)))
4074 4076 mp->pr_mflags |= MA_ANON;
4075 4077 if (seg == brkseg)
4076 4078 mp->pr_mflags |= MA_BREAK;
4077 4079 else if (seg == stkseg)
4078 4080 mp->pr_mflags |= MA_STACK;
4079 4081 if (seg->s_ops == &segspt_shmops)
4080 4082 mp->pr_mflags |= MA_ISM | MA_SHM;
4081 4083
4082 4084 mp->pr_pagesize = PAGESIZE;
4083 4085 if (psz == -1) {
4084 4086 mp->pr_hatpagesize = 0;
4085 4087 } else {
4086 4088 mp->pr_hatpagesize = psz;
4087 4089 }
4088 4090
4089 4091 /*
4090 4092 * Manufacture a filename for the "object" dir.
4091 4093 */
4092 4094 mp->pr_dev = PRNODEV;
4093 4095 vattr.va_mask = AT_FSID|AT_NODEID;
4094 4096 if (seg->s_ops == &segvn_ops &&
4095 4097 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4096 4098 vp != NULL && vp->v_type == VREG &&
4097 4099 VOP_GETATTR(vp, &vattr, 0, CRED(),
4098 4100 NULL) == 0) {
4099 4101 mp->pr_dev = vattr.va_fsid;
4100 4102 mp->pr_ino = vattr.va_nodeid;
4101 4103 if (vp == p->p_exec)
4102 4104 (void) strcpy(mp->pr_mapname,
4103 4105 "a.out");
4104 4106 else
4105 4107 pr_object_name(mp->pr_mapname,
4106 4108 vp, &vattr);
4107 4109 }
4108 4110
4109 4111 /*
4110 4112 * Get the SysV shared memory id, if any.
4111 4113 */
4112 4114 if ((mp->pr_mflags & MA_SHARED) &&
4113 4115 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4114 4116 seg->s_base)) != SHMID_NONE) {
4115 4117 if (mp->pr_shmid == SHMID_FREE)
4116 4118 mp->pr_shmid = -1;
4117 4119
4118 4120 mp->pr_mflags |= MA_SHM;
4119 4121 } else {
4120 4122 mp->pr_shmid = -1;
4121 4123 }
4122 4124
4123 4125 npages = ((uintptr_t)(naddr - saddr)) >>
4124 4126 PAGESHIFT;
4125 4127 parr = kmem_zalloc(npages, KM_SLEEP);
4126 4128
4127 4129 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4128 4130
4129 4131 for (pagenum = 0; pagenum < npages; pagenum++) {
4130 4132 if (parr[pagenum] & SEG_PAGE_INCORE)
4131 4133 mp->pr_rss++;
4132 4134 if (parr[pagenum] & SEG_PAGE_ANON)
4133 4135 mp->pr_anon++;
4134 4136 if (parr[pagenum] & SEG_PAGE_LOCKED)
4135 4137 mp->pr_locked++;
4136 4138 }
4137 4139 kmem_free(parr, npages);
4138 4140 }
4139 4141 }
4140 4142 ASSERT(tmp == NULL);
4141 4143 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4142 4144
4143 4145 return (0);
4144 4146 }
4145 4147
4146 4148 /*
4147 4149 * Return the process's credentials. We don't need a 32-bit equivalent of
4148 4150 * this function because prcred_t and prcred32_t are actually the same.
4149 4151 */
4150 4152 void
4151 4153 prgetcred(proc_t *p, prcred_t *pcrp)
4152 4154 {
4153 4155 mutex_enter(&p->p_crlock);
4154 4156 cred2prcred(p->p_cred, pcrp);
4155 4157 mutex_exit(&p->p_crlock);
4156 4158 }
4157 4159
4158 4160 /*
4159 4161 * Compute actual size of the prpriv_t structure.
4160 4162 */
4161 4163
4162 4164 size_t
4163 4165 prgetprivsize(void)
4164 4166 {
4165 4167 return (priv_prgetprivsize(NULL));
4166 4168 }
4167 4169
4168 4170 /*
4169 4171 * Return the process's privileges. We don't need a 32-bit equivalent of
4170 4172 * this function because prpriv_t and prpriv32_t are actually the same.
4171 4173 */
4172 4174 void
4173 4175 prgetpriv(proc_t *p, prpriv_t *pprp)
4174 4176 {
4175 4177 mutex_enter(&p->p_crlock);
4176 4178 cred2prpriv(p->p_cred, pprp);
4177 4179 mutex_exit(&p->p_crlock);
4178 4180 }
4179 4181
4180 4182 #ifdef _SYSCALL32_IMPL
4181 4183 /*
4182 4184 * Return an array of structures with HAT memory map information.
4183 4185 * We allocate here; the caller must deallocate.
4184 4186 */
4185 4187 int
4186 4188 prgetxmap32(proc_t *p, list_t *iolhead)
4187 4189 {
4188 4190 struct as *as = p->p_as;
4189 4191 prxmap32_t *mp;
4190 4192 struct seg *seg;
4191 4193 struct seg *brkseg, *stkseg;
4192 4194 struct vnode *vp;
4193 4195 struct vattr vattr;
4194 4196 uint_t prot;
4195 4197
4196 4198 ASSERT(as != &kas && AS_WRITE_HELD(as, &as->a_lock));
4197 4199
4198 4200 /*
4199 4201 * Request an initial buffer size that doesn't waste memory
4200 4202 * if the address space has only a small number of segments.
4201 4203 */
4202 4204 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4203 4205
4204 4206 if ((seg = AS_SEGFIRST(as)) == NULL)
4205 4207 return (0);
4206 4208
4207 4209 brkseg = break_seg(p);
4208 4210 stkseg = as_segat(as, prgetstackbase(p));
4209 4211
4210 4212 do {
4211 4213 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4212 4214 caddr_t saddr, naddr, baddr;
4213 4215 void *tmp = NULL;
4214 4216 ssize_t psz;
4215 4217 char *parr;
4216 4218 uint64_t npages;
4217 4219 uint64_t pagenum;
4218 4220
4219 4221 /*
4220 4222 * Segment loop part one: iterate from the base of the segment
4221 4223 * to its end, pausing at each address boundary (baddr) between
4222 4224 * ranges that have different virtual memory protections.
4223 4225 */
4224 4226 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4225 4227 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4226 4228 ASSERT(baddr >= saddr && baddr <= eaddr);
4227 4229
4228 4230 /*
4229 4231 * Segment loop part two: iterate from the current
4230 4232 * position to the end of the protection boundary,
4231 4233 * pausing at each address boundary (naddr) between
4232 4234 * ranges that have different underlying page sizes.
4233 4235 */
4234 4236 for (; saddr < baddr; saddr = naddr) {
4235 4237 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4236 4238 ASSERT(naddr >= saddr && naddr <= baddr);
4237 4239
4238 4240 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4239 4241
4240 4242 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4241 4243 mp->pr_size = (size32_t)(naddr - saddr);
4242 4244 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4243 4245 mp->pr_mflags = 0;
4244 4246 if (prot & PROT_READ)
4245 4247 mp->pr_mflags |= MA_READ;
4246 4248 if (prot & PROT_WRITE)
4247 4249 mp->pr_mflags |= MA_WRITE;
4248 4250 if (prot & PROT_EXEC)
4249 4251 mp->pr_mflags |= MA_EXEC;
4250 4252 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4251 4253 mp->pr_mflags |= MA_SHARED;
4252 4254 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4253 4255 mp->pr_mflags |= MA_NORESERVE;
4254 4256 if (seg->s_ops == &segspt_shmops ||
4255 4257 (seg->s_ops == &segvn_ops &&
4256 4258 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4257 4259 vp == NULL)))
4258 4260 mp->pr_mflags |= MA_ANON;
4259 4261 if (seg == brkseg)
4260 4262 mp->pr_mflags |= MA_BREAK;
4261 4263 else if (seg == stkseg)
4262 4264 mp->pr_mflags |= MA_STACK;
4263 4265 if (seg->s_ops == &segspt_shmops)
4264 4266 mp->pr_mflags |= MA_ISM | MA_SHM;
4265 4267
4266 4268 mp->pr_pagesize = PAGESIZE;
4267 4269 if (psz == -1) {
4268 4270 mp->pr_hatpagesize = 0;
4269 4271 } else {
4270 4272 mp->pr_hatpagesize = psz;
4271 4273 }
4272 4274
4273 4275 /*
4274 4276 * Manufacture a filename for the "object" dir.
4275 4277 */
4276 4278 mp->pr_dev = PRNODEV32;
4277 4279 vattr.va_mask = AT_FSID|AT_NODEID;
4278 4280 if (seg->s_ops == &segvn_ops &&
4279 4281 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4280 4282 vp != NULL && vp->v_type == VREG &&
4281 4283 VOP_GETATTR(vp, &vattr, 0, CRED(),
4282 4284 NULL) == 0) {
4283 4285 (void) cmpldev(&mp->pr_dev,
4284 4286 vattr.va_fsid);
4285 4287 mp->pr_ino = vattr.va_nodeid;
4286 4288 if (vp == p->p_exec)
4287 4289 (void) strcpy(mp->pr_mapname,
4288 4290 "a.out");
4289 4291 else
4290 4292 pr_object_name(mp->pr_mapname,
4291 4293 vp, &vattr);
4292 4294 }
4293 4295
4294 4296 /*
4295 4297 * Get the SysV shared memory id, if any.
4296 4298 */
4297 4299 if ((mp->pr_mflags & MA_SHARED) &&
4298 4300 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4299 4301 seg->s_base)) != SHMID_NONE) {
4300 4302 if (mp->pr_shmid == SHMID_FREE)
4301 4303 mp->pr_shmid = -1;
4302 4304
4303 4305 mp->pr_mflags |= MA_SHM;
4304 4306 } else {
4305 4307 mp->pr_shmid = -1;
4306 4308 }
4307 4309
4308 4310 npages = ((uintptr_t)(naddr - saddr)) >>
4309 4311 PAGESHIFT;
4310 4312 parr = kmem_zalloc(npages, KM_SLEEP);
4311 4313
4312 4314 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4313 4315
4314 4316 for (pagenum = 0; pagenum < npages; pagenum++) {
4315 4317 if (parr[pagenum] & SEG_PAGE_INCORE)
4316 4318 mp->pr_rss++;
4317 4319 if (parr[pagenum] & SEG_PAGE_ANON)
4318 4320 mp->pr_anon++;
4319 4321 if (parr[pagenum] & SEG_PAGE_LOCKED)
4320 4322 mp->pr_locked++;
4321 4323 }
4322 4324 kmem_free(parr, npages);
4323 4325 }
4324 4326 }
4325 4327 ASSERT(tmp == NULL);
4326 4328 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4327 4329
4328 4330 return (0);
4329 4331 }
4330 4332 #endif /* _SYSCALL32_IMPL */
↓ open down ↓ |
3202 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX