Print this page
11909 THREAD_KPRI_RELEASE does nothing of the sort
Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/ia32/os/syscall.c
+++ new/usr/src/uts/intel/ia32/os/syscall.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright 2019 Joyent, Inc.
24 25 */
25 26
26 27 #include <sys/param.h>
27 28 #include <sys/vmparam.h>
28 29 #include <sys/types.h>
29 30 #include <sys/sysmacros.h>
30 31 #include <sys/systm.h>
31 32 #include <sys/signal.h>
32 33 #include <sys/stack.h>
33 34 #include <sys/cred.h>
34 35 #include <sys/cmn_err.h>
35 36 #include <sys/user.h>
36 37 #include <sys/privregs.h>
37 38 #include <sys/psw.h>
38 39 #include <sys/debug.h>
39 40 #include <sys/errno.h>
40 41 #include <sys/proc.h>
41 42 #include <sys/modctl.h>
42 43 #include <sys/var.h>
43 44 #include <sys/inline.h>
44 45 #include <sys/syscall.h>
45 46 #include <sys/ucontext.h>
46 47 #include <sys/cpuvar.h>
47 48 #include <sys/siginfo.h>
48 49 #include <sys/trap.h>
49 50 #include <sys/vtrace.h>
50 51 #include <sys/sysinfo.h>
51 52 #include <sys/procfs.h>
52 53 #include <sys/prsystm.h>
53 54 #include <c2/audit.h>
54 55 #include <sys/modctl.h>
55 56 #include <sys/aio_impl.h>
56 57 #include <sys/tnf.h>
57 58 #include <sys/tnf_probe.h>
58 59 #include <sys/copyops.h>
59 60 #include <sys/priv.h>
60 61 #include <sys/msacct.h>
61 62
62 63 int syscalltrace = 0;
63 64 #ifdef SYSCALLTRACE
64 65 static kmutex_t systrace_lock; /* syscall tracing lock */
65 66 #else
66 67 #define syscalltrace 0
67 68 #endif /* SYSCALLTRACE */
68 69
69 70 typedef int64_t (*llfcn_t)(); /* function returning long long */
70 71
71 72 int pre_syscall(void);
72 73 void post_syscall(long rval1, long rval2);
73 74 static krwlock_t *lock_syscall(struct sysent *, uint_t);
74 75 void deferred_singlestep_trap(caddr_t);
75 76
76 77 #ifdef _SYSCALL32_IMPL
77 78 #define LWP_GETSYSENT(lwp) \
78 79 (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE ? sysent : sysent32)
79 80 #else
80 81 #define LWP_GETSYSENT(lwp) (sysent)
81 82 #endif
82 83
83 84 /*
84 85 * If watchpoints are active, don't make copying in of
85 86 * system call arguments take a read watchpoint trap.
86 87 */
87 88 static int
88 89 copyin_args(struct regs *rp, long *ap, uint_t nargs)
89 90 {
90 91 greg_t *sp = 1 + (greg_t *)rp->r_sp; /* skip ret addr */
91 92
92 93 ASSERT(nargs <= MAXSYSARGS);
93 94
94 95 return (copyin_nowatch(sp, ap, nargs * sizeof (*sp)));
95 96 }
96 97
97 98 #if defined(_SYSCALL32_IMPL)
98 99 static int
99 100 copyin_args32(struct regs *rp, long *ap, uint_t nargs)
100 101 {
101 102 greg32_t *sp = 1 + (greg32_t *)rp->r_sp; /* skip ret addr */
102 103 uint32_t a32[MAXSYSARGS];
103 104 int rc;
104 105
105 106 ASSERT(nargs <= MAXSYSARGS);
106 107
107 108 if ((rc = copyin_nowatch(sp, a32, nargs * sizeof (*sp))) == 0) {
108 109 uint32_t *a32p = &a32[0];
109 110
110 111 while (nargs--)
111 112 *ap++ = (ulong_t)*a32p++;
112 113 }
113 114 return (rc);
114 115 }
115 116 #define COPYIN_ARGS32 copyin_args32
116 117 #else
117 118 #define COPYIN_ARGS32 copyin_args
118 119 #endif
119 120
120 121 /*
121 122 * Error handler for system calls where arg copy gets fault.
122 123 */
123 124 static longlong_t
124 125 syscall_err()
125 126 {
126 127 return (0);
127 128 }
128 129
129 130 /*
130 131 * Corresponding sysent entry to allow syscall_entry caller
131 132 * to invoke syscall_err.
132 133 */
133 134 static struct sysent sysent_err = {
134 135 0, SE_32RVAL1, NULL, NULL, (llfcn_t)syscall_err
135 136 };
136 137
137 138 /*
138 139 * Called from syscall() when a non-trivial 32-bit system call occurs.
139 140 * Sets up the args and returns a pointer to the handler.
140 141 */
141 142 struct sysent *
142 143 syscall_entry(kthread_t *t, long *argp)
143 144 {
144 145 klwp_t *lwp = ttolwp(t);
145 146 struct regs *rp = lwptoregs(lwp);
146 147 unsigned int code;
147 148 struct sysent *callp;
148 149 struct sysent *se = LWP_GETSYSENT(lwp);
149 150 int error = 0;
150 151 uint_t nargs;
151 152
152 153 ASSERT(t == curthread && curthread->t_schedflag & TS_DONT_SWAP);
153 154
154 155 lwp->lwp_ru.sysc++;
155 156 lwp->lwp_eosys = NORMALRETURN; /* assume this will be normal */
156 157
157 158 /*
158 159 * Set lwp_ap to point to the args, even if none are needed for this
159 160 * system call. This is for the loadable-syscall case where the
160 161 * number of args won't be known until the system call is loaded, and
161 162 * also maintains a non-NULL lwp_ap setup for get_syscall_args(). Note
162 163 * that lwp_ap MUST be set to a non-NULL value _BEFORE_ t_sysnum is
163 164 * set to non-zero; otherwise get_syscall_args(), seeing a non-zero
164 165 * t_sysnum for this thread, will charge ahead and dereference lwp_ap.
165 166 */
166 167 lwp->lwp_ap = argp; /* for get_syscall_args */
167 168
168 169 code = rp->r_r0;
169 170 t->t_sysnum = (short)code;
170 171 callp = code >= NSYSCALL ? &nosys_ent : se + code;
171 172
172 173 if ((t->t_pre_sys | syscalltrace) != 0) {
173 174 error = pre_syscall();
174 175
175 176 /*
176 177 * pre_syscall() has taken care so that lwp_ap is current;
177 178 * it either points to syscall-entry-saved amd64 regs,
178 179 * or it points to lwp_arg[], which has been re-copied from
179 180 * the ia32 ustack, but either way, it's a current copy after
180 181 * /proc has possibly mucked with the syscall args.
181 182 */
182 183
183 184 if (error)
184 185 return (&sysent_err); /* use dummy handler */
185 186 }
186 187
187 188 /*
188 189 * Fetch the system call arguments to the kernel stack copy used
189 190 * for syscall handling.
190 191 * Note: for loadable system calls the number of arguments required
191 192 * may not be known at this point, and will be zero if the system call
192 193 * was never loaded. Once the system call has been loaded, the number
193 194 * of args is not allowed to be changed.
194 195 */
195 196 if ((nargs = (uint_t)callp->sy_narg) != 0 &&
196 197 COPYIN_ARGS32(rp, argp, nargs)) {
197 198 (void) set_errno(EFAULT);
198 199 return (&sysent_err); /* use dummy handler */
199 200 }
200 201
201 202 return (callp); /* return sysent entry for caller */
202 203 }
203 204
204 205 void
205 206 syscall_exit(kthread_t *t, long rval1, long rval2)
206 207 {
207 208 /*
208 209 * Handle signals and other post-call events if necessary.
209 210 */
210 211 if ((t->t_post_sys_ast | syscalltrace) == 0) {
211 212 klwp_t *lwp = ttolwp(t);
212 213 struct regs *rp = lwptoregs(lwp);
213 214
214 215 /*
215 216 * Normal return.
216 217 * Clear error indication and set return values.
217 218 */
218 219 rp->r_ps &= ~PS_C; /* reset carry bit */
219 220 rp->r_r0 = rval1;
220 221 rp->r_r1 = rval2;
221 222 lwp->lwp_state = LWP_USER;
222 223 } else {
223 224 post_syscall(rval1, rval2);
224 225 }
225 226 t->t_sysnum = 0; /* invalidate args */
226 227 }
227 228
228 229 /*
229 230 * Perform pre-system-call processing, including stopping for tracing,
230 231 * auditing, etc.
231 232 *
232 233 * This routine is called only if the t_pre_sys flag is set. Any condition
233 234 * requiring pre-syscall handling must set the t_pre_sys flag. If the
234 235 * condition is persistent, this routine will repost t_pre_sys.
235 236 */
236 237 int
237 238 pre_syscall()
238 239 {
239 240 kthread_t *t = curthread;
240 241 unsigned code = t->t_sysnum;
241 242 klwp_t *lwp = ttolwp(t);
242 243 proc_t *p = ttoproc(t);
243 244 int repost;
244 245
245 246 t->t_pre_sys = repost = 0; /* clear pre-syscall processing flag */
246 247
247 248 ASSERT(t->t_schedflag & TS_DONT_SWAP);
248 249
249 250 #if defined(DEBUG)
250 251 /*
251 252 * On the i386 kernel, lwp_ap points at the piece of the thread
252 253 * stack that we copy the users arguments into.
253 254 *
254 255 * On the amd64 kernel, the syscall arguments in the rdi..r9
255 256 * registers should be pointed at by lwp_ap. If the args need to
256 257 * be copied so that those registers can be changed without losing
257 258 * the ability to get the args for /proc, they can be saved by
258 259 * save_syscall_args(), and lwp_ap will be restored by post_syscall().
259 260 */
260 261 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
261 262 #if defined(_LP64)
262 263 ASSERT(lwp->lwp_ap == (long *)&lwptoregs(lwp)->r_rdi);
263 264 } else {
264 265 #endif
265 266 ASSERT((caddr_t)lwp->lwp_ap > t->t_stkbase &&
266 267 (caddr_t)lwp->lwp_ap < t->t_stk);
267 268 }
268 269 #endif /* DEBUG */
269 270
270 271 /*
271 272 * Make sure the thread is holding the latest credentials for the
272 273 * process. The credentials in the process right now apply to this
273 274 * thread for the entire system call.
274 275 */
275 276 if (t->t_cred != p->p_cred) {
276 277 cred_t *oldcred = t->t_cred;
277 278 /*
278 279 * DTrace accesses t_cred in probe context. t_cred must
279 280 * always be either NULL, or point to a valid, allocated cred
280 281 * structure.
281 282 */
282 283 t->t_cred = crgetcred();
283 284 crfree(oldcred);
284 285 }
285 286
286 287 /*
287 288 * From the proc(4) manual page:
288 289 * When entry to a system call is being traced, the traced process
289 290 * stops after having begun the call to the system but before the
290 291 * system call arguments have been fetched from the process.
291 292 */
292 293 if (PTOU(p)->u_systrap) {
293 294 if (prismember(&PTOU(p)->u_entrymask, code)) {
294 295 mutex_enter(&p->p_lock);
295 296 /*
296 297 * Recheck stop condition, now that lock is held.
297 298 */
298 299 if (PTOU(p)->u_systrap &&
299 300 prismember(&PTOU(p)->u_entrymask, code)) {
300 301 stop(PR_SYSENTRY, code);
301 302
302 303 /*
303 304 * /proc may have modified syscall args,
304 305 * either in regs for amd64 or on ustack
305 306 * for ia32. Either way, arrange to
306 307 * copy them again, both for the syscall
307 308 * handler and for other consumers in
308 309 * post_syscall (like audit). Here, we
309 310 * only do amd64, and just set lwp_ap
310 311 * back to the kernel-entry stack copy;
311 312 * the syscall ml code redoes
312 313 * move-from-regs to set up for the
313 314 * syscall handler after we return. For
314 315 * ia32, save_syscall_args() below makes
315 316 * an lwp_ap-accessible copy.
316 317 */
317 318 #if defined(_LP64)
318 319 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
319 320 lwp->lwp_argsaved = 0;
320 321 lwp->lwp_ap =
321 322 (long *)&lwptoregs(lwp)->r_rdi;
322 323 }
323 324 #endif
324 325 }
325 326 mutex_exit(&p->p_lock);
326 327 }
327 328 repost = 1;
328 329 }
329 330
330 331 /*
331 332 * ia32 kernel, or ia32 proc on amd64 kernel: keep args in
332 333 * lwp_arg for post-syscall processing, regardless of whether
333 334 * they might have been changed in /proc above.
334 335 */
335 336 #if defined(_LP64)
336 337 if (lwp_getdatamodel(lwp) != DATAMODEL_NATIVE)
337 338 #endif
338 339 (void) save_syscall_args();
339 340
340 341 if (lwp->lwp_sysabort) {
341 342 /*
342 343 * lwp_sysabort may have been set via /proc while the process
343 344 * was stopped on PR_SYSENTRY. If so, abort the system call.
344 345 * Override any error from the copyin() of the arguments.
345 346 */
346 347 lwp->lwp_sysabort = 0;
347 348 (void) set_errno(EINTR); /* forces post_sys */
348 349 t->t_pre_sys = 1; /* repost anyway */
349 350 return (1); /* don't do system call, return EINTR */
350 351 }
351 352
352 353 /*
353 354 * begin auditing for this syscall if the c2audit module is loaded
354 355 * and auditing is enabled
355 356 */
356 357 if (audit_active == C2AUDIT_LOADED) {
357 358 uint32_t auditing = au_zone_getstate(NULL);
358 359
359 360 if (auditing & AU_AUDIT_MASK) {
360 361 int error;
361 362 if (error = audit_start(T_SYSCALL, code, auditing, \
362 363 0, lwp)) {
363 364 t->t_pre_sys = 1; /* repost anyway */
364 365 (void) set_errno(error);
365 366 return (1);
366 367 }
367 368 repost = 1;
368 369 }
369 370 }
370 371
371 372 #ifndef NPROBE
372 373 /* Kernel probe */
373 374 if (tnf_tracing_active) {
374 375 TNF_PROBE_1(syscall_start, "syscall thread", /* CSTYLED */,
375 376 tnf_sysnum, sysnum, t->t_sysnum);
376 377 t->t_post_sys = 1; /* make sure post_syscall runs */
377 378 repost = 1;
378 379 }
379 380 #endif /* NPROBE */
380 381
381 382 #ifdef SYSCALLTRACE
382 383 if (syscalltrace) {
383 384 int i;
384 385 long *ap;
385 386 char *cp;
386 387 char *sysname;
387 388 struct sysent *callp;
388 389
389 390 if (code >= NSYSCALL)
390 391 callp = &nosys_ent; /* nosys has no args */
391 392 else
392 393 callp = LWP_GETSYSENT(lwp) + code;
393 394 (void) save_syscall_args();
394 395 mutex_enter(&systrace_lock);
395 396 printf("%d: ", p->p_pid);
396 397 if (code >= NSYSCALL) {
397 398 printf("0x%x", code);
398 399 } else {
399 400 sysname = mod_getsysname(code);
400 401 printf("%s[0x%x/0x%p]", sysname == NULL ? "NULL" :
401 402 sysname, code, callp->sy_callc);
402 403 }
403 404 cp = "(";
404 405 for (i = 0, ap = lwp->lwp_ap; i < callp->sy_narg; i++, ap++) {
405 406 printf("%s%lx", cp, *ap);
406 407 cp = ", ";
407 408 }
408 409 if (i)
409 410 printf(")");
410 411 printf(" %s id=0x%p\n", PTOU(p)->u_comm, curthread);
411 412 mutex_exit(&systrace_lock);
412 413 }
413 414 #endif /* SYSCALLTRACE */
414 415
415 416 /*
416 417 * If there was a continuing reason for pre-syscall processing,
417 418 * set the t_pre_sys flag for the next system call.
418 419 */
419 420 if (repost)
420 421 t->t_pre_sys = 1;
421 422 lwp->lwp_error = 0; /* for old drivers */
422 423 lwp->lwp_badpriv = PRIV_NONE;
423 424 return (0);
424 425 }
425 426
426 427
427 428 /*
428 429 * Post-syscall processing. Perform abnormal system call completion
429 430 * actions such as /proc tracing, profiling, signals, preemption, etc.
430 431 *
431 432 * This routine is called only if t_post_sys, t_sig_check, or t_astflag is set.
432 433 * Any condition requiring pre-syscall handling must set one of these.
433 434 * If the condition is persistent, this routine will repost t_post_sys.
434 435 */
435 436 void
436 437 post_syscall(long rval1, long rval2)
437 438 {
438 439 kthread_t *t = curthread;
439 440 klwp_t *lwp = ttolwp(t);
440 441 proc_t *p = ttoproc(t);
441 442 struct regs *rp = lwptoregs(lwp);
442 443 uint_t error;
443 444 uint_t code = t->t_sysnum;
444 445 int repost = 0;
445 446 int proc_stop = 0; /* non-zero if stopping */
446 447 int sigprof = 0; /* non-zero if sending SIGPROF */
447 448
448 449 t->t_post_sys = 0;
449 450
450 451 error = lwp->lwp_errno;
451 452
452 453 /*
453 454 * Code can be zero if this is a new LWP returning after a forkall(),
454 455 * other than the one which matches the one in the parent which called
455 456 * forkall(). In these LWPs, skip most of post-syscall activity.
456 457 */
457 458 if (code == 0)
458 459 goto sig_check;
459 460 /*
460 461 * If the trace flag is set, mark the lwp to take a single-step trap
461 462 * on return to user level (below). The x86 lcall interface and
462 463 * sysenter has already done this, and turned off the flag, but
463 464 * amd64 syscall interface has not.
464 465 */
465 466 if (rp->r_ps & PS_T) {
466 467 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
467 468 rp->r_ps &= ~PS_T;
468 469 aston(curthread);
469 470 }
470 471
471 472 /* put out audit record for this syscall */
472 473 if (AU_AUDITING()) {
473 474 rval_t rval;
474 475
475 476 /* XX64 -- truncation of 64-bit return values? */
476 477 rval.r_val1 = (int)rval1;
477 478 rval.r_val2 = (int)rval2;
478 479 audit_finish(T_SYSCALL, code, error, &rval);
479 480 repost = 1;
480 481 }
481 482
482 483 if (curthread->t_pdmsg != NULL) {
483 484 char *m = curthread->t_pdmsg;
484 485
485 486 uprintf("%s", m);
486 487 kmem_free(m, strlen(m) + 1);
487 488 curthread->t_pdmsg = NULL;
488 489 }
489 490
490 491 /*
491 492 * If we're going to stop for /proc tracing, set the flag and
492 493 * save the arguments so that the return values don't smash them.
493 494 */
494 495 if (PTOU(p)->u_systrap) {
495 496 if (prismember(&PTOU(p)->u_exitmask, code)) {
496 497 if (lwp_getdatamodel(lwp) == DATAMODEL_LP64)
497 498 (void) save_syscall_args();
498 499 proc_stop = 1;
499 500 }
500 501 repost = 1;
501 502 }
502 503
503 504 /*
504 505 * Similarly check to see if SIGPROF might be sent.
505 506 */
506 507 if (curthread->t_rprof != NULL &&
507 508 curthread->t_rprof->rp_anystate != 0) {
508 509 if (lwp_getdatamodel(lwp) == DATAMODEL_LP64)
509 510 (void) save_syscall_args();
510 511 sigprof = 1;
511 512 }
512 513
513 514 if (lwp->lwp_eosys == NORMALRETURN) {
514 515 if (error == 0) {
515 516 #ifdef SYSCALLTRACE
516 517 if (syscalltrace) {
517 518 mutex_enter(&systrace_lock);
518 519 printf(
519 520 "%d: r_val1=0x%lx, r_val2=0x%lx, id 0x%p\n",
520 521 p->p_pid, rval1, rval2, curthread);
521 522 mutex_exit(&systrace_lock);
522 523 }
523 524 #endif /* SYSCALLTRACE */
524 525 rp->r_ps &= ~PS_C;
525 526 rp->r_r0 = rval1;
526 527 rp->r_r1 = rval2;
527 528 } else {
528 529 int sig;
529 530 #ifdef SYSCALLTRACE
530 531 if (syscalltrace) {
531 532 mutex_enter(&systrace_lock);
532 533 printf("%d: error=%d, id 0x%p\n",
533 534 p->p_pid, error, curthread);
534 535 mutex_exit(&systrace_lock);
535 536 }
536 537 #endif /* SYSCALLTRACE */
537 538 if (error == EINTR && t->t_activefd.a_stale)
538 539 error = EBADF;
539 540 if (error == EINTR &&
540 541 (sig = lwp->lwp_cursig) != 0 &&
541 542 sigismember(&PTOU(p)->u_sigrestart, sig) &&
542 543 PTOU(p)->u_signal[sig - 1] != SIG_DFL &&
543 544 PTOU(p)->u_signal[sig - 1] != SIG_IGN)
544 545 error = ERESTART;
545 546 rp->r_r0 = error;
546 547 rp->r_ps |= PS_C;
547 548 }
548 549 }
549 550
550 551 /*
551 552 * From the proc(4) manual page:
552 553 * When exit from a system call is being traced, the traced process
553 554 * stops on completion of the system call just prior to checking for
554 555 * signals and returning to user level. At this point all return
555 556 * values have been stored into the traced process's saved registers.
556 557 */
557 558 if (proc_stop) {
558 559 mutex_enter(&p->p_lock);
559 560 if (PTOU(p)->u_systrap &&
560 561 prismember(&PTOU(p)->u_exitmask, code))
561 562 stop(PR_SYSEXIT, code);
562 563 mutex_exit(&p->p_lock);
563 564 }
564 565
565 566 /*
566 567 * If we are the parent returning from a successful
567 568 * vfork, wait for the child to exec or exit.
568 569 * This code must be here and not in the bowels of the system
569 570 * so that /proc can intercept exit from vfork in a timely way.
570 571 */
571 572 if (t->t_flag & T_VFPARENT) {
572 573 ASSERT(code == SYS_vfork || code == SYS_forksys);
573 574 ASSERT(rp->r_r1 == 0 && error == 0);
574 575 vfwait((pid_t)rval1);
575 576 t->t_flag &= ~T_VFPARENT;
576 577 }
577 578
578 579 /*
579 580 * If profiling is active, bill the current PC in user-land
580 581 * and keep reposting until profiling is disabled.
581 582 */
582 583 if (p->p_prof.pr_scale) {
583 584 if (lwp->lwp_oweupc)
584 585 profil_tick(rp->r_pc);
585 586 repost = 1;
586 587 }
587 588
588 589 sig_check:
589 590 /*
590 591 * Reset flag for next time.
591 592 * We must do this after stopping on PR_SYSEXIT
592 593 * because /proc uses the information in lwp_eosys.
593 594 */
594 595 lwp->lwp_eosys = NORMALRETURN;
595 596 clear_stale_fd();
596 597 t->t_flag &= ~T_FORKALL;
597 598
598 599 if (t->t_astflag | t->t_sig_check) {
599 600 /*
600 601 * Turn off the AST flag before checking all the conditions that
601 602 * may have caused an AST. This flag is on whenever a signal or
602 603 * unusual condition should be handled after the next trap or
603 604 * syscall.
604 605 */
605 606 astoff(t);
606 607 /*
607 608 * If a single-step trap occurred on a syscall (see trap())
608 609 * recognize it now. Do this before checking for signals
609 610 * because deferred_singlestep_trap() may generate a SIGTRAP to
610 611 * the LWP or may otherwise mark the LWP to call issig(FORREAL).
611 612 */
612 613 if (lwp->lwp_pcb.pcb_flags & DEBUG_PENDING)
613 614 deferred_singlestep_trap((caddr_t)rp->r_pc);
614 615
615 616 t->t_sig_check = 0;
616 617
617 618 /*
618 619 * The following check is legal for the following reasons:
619 620 * 1) The thread we are checking, is ourselves, so there is
620 621 * no way the proc can go away.
621 622 * 2) The only time we need to be protected by the
622 623 * lock is if the binding is changed.
623 624 *
624 625 * Note we will still take the lock and check the binding
625 626 * if the condition was true without the lock held. This
626 627 * prevents lock contention among threads owned by the
627 628 * same proc.
628 629 */
629 630
630 631 if (curthread->t_proc_flag & TP_CHANGEBIND) {
631 632 mutex_enter(&p->p_lock);
632 633 if (curthread->t_proc_flag & TP_CHANGEBIND) {
633 634 timer_lwpbind();
634 635 curthread->t_proc_flag &= ~TP_CHANGEBIND;
635 636 }
636 637 mutex_exit(&p->p_lock);
637 638 }
638 639
639 640 /*
640 641 * for kaio requests on the special kaio poll queue,
641 642 * copyout their results to user memory.
642 643 */
643 644 if (p->p_aio)
644 645 aio_cleanup(0);
645 646 /*
646 647 * If this LWP was asked to hold, call holdlwp(), which will
647 648 * stop. holdlwps() sets this up and calls pokelwps() which
648 649 * sets the AST flag.
649 650 *
650 651 * Also check TP_EXITLWP, since this is used by fresh new LWPs
651 652 * through lwp_rtt(). That flag is set if the lwp_create(2)
652 653 * syscall failed after creating the LWP.
653 654 */
654 655 if (ISHOLD(p) || (t->t_proc_flag & TP_EXITLWP))
655 656 holdlwp();
656 657
657 658 /*
658 659 * All code that sets signals and makes ISSIG_PENDING
659 660 * evaluate true must set t_sig_check afterwards.
660 661 */
661 662 if (ISSIG_PENDING(t, lwp, p)) {
662 663 if (issig(FORREAL))
663 664 psig();
664 665 t->t_sig_check = 1; /* recheck next time */
665 666 }
666 667
667 668 if (sigprof) {
668 669 int nargs = (code > 0 && code < NSYSCALL)?
669 670 LWP_GETSYSENT(lwp)[code].sy_narg : 0;
670 671 realsigprof(code, nargs, error);
671 672 t->t_sig_check = 1; /* recheck next time */
672 673 }
673 674
674 675 /*
675 676 * If a performance counter overflow interrupt was
676 677 * delivered *during* the syscall, then re-enable the
677 678 * AST so that we take a trip through trap() to cause
678 679 * the SIGEMT to be delivered.
679 680 */
680 681 if (lwp->lwp_pcb.pcb_flags & CPC_OVERFLOW)
681 682 aston(t);
682 683
683 684 /*
684 685 * /proc can't enable/disable the trace bit itself
685 686 * because that could race with the call gate used by
686 687 * system calls via "lcall". If that happened, an
687 688 * invalid EFLAGS would result. prstep()/prnostep()
688 689 * therefore schedule an AST for the purpose.
689 690 */
690 691 if (lwp->lwp_pcb.pcb_flags & REQUEST_STEP) {
691 692 lwp->lwp_pcb.pcb_flags &= ~REQUEST_STEP;
692 693 rp->r_ps |= PS_T;
693 694 }
694 695 if (lwp->lwp_pcb.pcb_flags & REQUEST_NOSTEP) {
695 696 lwp->lwp_pcb.pcb_flags &= ~REQUEST_NOSTEP;
696 697 rp->r_ps &= ~PS_T;
697 698 }
698 699 }
699 700
700 701 lwp->lwp_errno = 0; /* clear error for next time */
701 702
702 703 #ifndef NPROBE
703 704 /* Kernel probe */
704 705 if (tnf_tracing_active) {
705 706 TNF_PROBE_3(syscall_end, "syscall thread", /* CSTYLED */,
706 707 tnf_long, rval1, rval1,
707 708 tnf_long, rval2, rval2,
708 709 tnf_long, errno, (long)error);
709 710 repost = 1;
710 711 }
711 712 #endif /* NPROBE */
712 713
713 714 /*
714 715 * Set state to LWP_USER here so preempt won't give us a kernel
715 716 * priority if it occurs after this point. Call CL_TRAPRET() to
716 717 * restore the user-level priority.
717 718 *
718 719 * It is important that no locks (other than spinlocks) be entered
719 720 * after this point before returning to user mode (unless lwp_state
720 721 * is set back to LWP_SYS).
721 722 *
722 723 * XXX Sampled times past this point are charged to the user.
723 724 */
724 725 lwp->lwp_state = LWP_USER;
725 726
726 727 if (t->t_trapret) {
727 728 t->t_trapret = 0;
728 729 thread_lock(t);
729 730 CL_TRAPRET(t);
730 731 thread_unlock(t);
731 732 }
732 733 if (CPU->cpu_runrun || t->t_schedflag & TS_ANYWAITQ)
733 734 preempt();
734 735 prunstop();
735 736
736 737 lwp->lwp_errno = 0; /* clear error for next time */
737 738
738 739 /*
739 740 * The thread lock must be held in order to clear sysnum and reset
740 741 * lwp_ap atomically with respect to other threads in the system that
741 742 * may be looking at the args via lwp_ap from get_syscall_args().
742 743 */
743 744
744 745 thread_lock(t);
745 746 t->t_sysnum = 0; /* no longer in a system call */
746 747
747 748 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
748 749 #if defined(_LP64)
749 750 /*
750 751 * In case the args were copied to the lwp, reset the
751 752 * pointer so the next syscall will have the right
752 753 * lwp_ap pointer.
753 754 */
754 755 lwp->lwp_ap = (long *)&rp->r_rdi;
755 756 } else {
756 757 #endif
757 758 lwp->lwp_ap = NULL; /* reset on every syscall entry */
758 759 }
759 760 thread_unlock(t);
760 761
761 762 lwp->lwp_argsaved = 0;
762 763
763 764 /*
764 765 * If there was a continuing reason for post-syscall processing,
765 766 * set the t_post_sys flag for the next system call.
766 767 */
767 768 if (repost)
768 769 t->t_post_sys = 1;
769 770
770 771 /*
771 772 * If there is a ustack registered for this lwp, and the stack rlimit
772 773 * has been altered, read in the ustack. If the saved stack rlimit
773 774 * matches the bounds of the ustack, update the ustack to reflect
774 775 * the new rlimit. If the new stack rlimit is RLIM_INFINITY, disable
775 776 * stack checking by setting the size to 0.
776 777 */
777 778 if (lwp->lwp_ustack != 0 && lwp->lwp_old_stk_ctl != 0) {
778 779 rlim64_t new_size;
779 780 caddr_t top;
780 781 stack_t stk;
781 782 struct rlimit64 rl;
782 783
783 784 mutex_enter(&p->p_lock);
784 785 new_size = p->p_stk_ctl;
785 786 top = p->p_usrstack;
786 787 (void) rctl_rlimit_get(rctlproc_legacy[RLIMIT_STACK], p, &rl);
787 788 mutex_exit(&p->p_lock);
788 789
789 790 if (rl.rlim_cur == RLIM64_INFINITY)
790 791 new_size = 0;
791 792
792 793 if (copyin((stack_t *)lwp->lwp_ustack, &stk,
793 794 sizeof (stack_t)) == 0 &&
794 795 (stk.ss_size == lwp->lwp_old_stk_ctl ||
795 796 stk.ss_size == 0) &&
796 797 stk.ss_sp == top - stk.ss_size) {
797 798 stk.ss_sp = (void *)((uintptr_t)stk.ss_sp +
798 799 stk.ss_size - (uintptr_t)new_size);
799 800 stk.ss_size = new_size;
800 801
801 802 (void) copyout(&stk, (stack_t *)lwp->lwp_ustack,
802 803 sizeof (stack_t));
803 804 }
804 805
805 806 lwp->lwp_old_stk_ctl = 0;
806 807 }
807 808 }
808 809
809 810 /*
810 811 * Called from post_syscall() when a deferred singlestep is to be taken.
811 812 */
812 813 void
813 814 deferred_singlestep_trap(caddr_t pc)
814 815 {
815 816 proc_t *p = ttoproc(curthread);
816 817 klwp_t *lwp = ttolwp(curthread);
817 818 pcb_t *pcb = &lwp->lwp_pcb;
818 819 uint_t fault = 0;
819 820 k_siginfo_t siginfo;
820 821
821 822 bzero(&siginfo, sizeof (siginfo));
822 823
823 824 /*
824 825 * If both NORMAL_STEP and WATCH_STEP are in
825 826 * effect, give precedence to WATCH_STEP.
826 827 * If neither is set, user must have set the
827 828 * PS_T bit in %efl; treat this as NORMAL_STEP.
828 829 */
829 830 if ((fault = undo_watch_step(&siginfo)) == 0 &&
830 831 ((pcb->pcb_flags & NORMAL_STEP) ||
831 832 !(pcb->pcb_flags & WATCH_STEP))) {
832 833 siginfo.si_signo = SIGTRAP;
833 834 siginfo.si_code = TRAP_TRACE;
834 835 siginfo.si_addr = pc;
835 836 fault = FLTTRACE;
836 837 }
837 838 pcb->pcb_flags &= ~(DEBUG_PENDING|NORMAL_STEP|WATCH_STEP);
838 839
839 840 if (fault) {
840 841 /*
841 842 * Remember the fault and fault adddress
842 843 * for real-time (SIGPROF) profiling.
843 844 */
844 845 lwp->lwp_lastfault = fault;
845 846 lwp->lwp_lastfaddr = siginfo.si_addr;
846 847 /*
847 848 * If a debugger has declared this fault to be an
848 849 * event of interest, stop the lwp. Otherwise just
849 850 * deliver the associated signal.
850 851 */
851 852 if (prismember(&p->p_fltmask, fault) &&
852 853 stop_on_fault(fault, &siginfo) == 0)
853 854 siginfo.si_signo = 0;
854 855 }
855 856
856 857 if (siginfo.si_signo)
857 858 trapsig(&siginfo, 1);
858 859 }
859 860
860 861 /*
861 862 * nonexistent system call-- signal lwp (may want to handle it)
862 863 * flag error if lwp won't see signal immediately
863 864 */
864 865 int64_t
865 866 nosys(void)
866 867 {
867 868 tsignal(curthread, SIGSYS);
868 869 return (set_errno(ENOSYS));
869 870 }
870 871
871 872 int
872 873 nosys32(void)
873 874 {
874 875 return (nosys());
875 876 }
876 877
877 878 /*
878 879 * Execute a 32-bit system call on behalf of the current thread.
879 880 */
880 881 void
881 882 dosyscall(void)
882 883 {
883 884 /*
884 885 * Need space on the stack to store syscall arguments.
885 886 */
886 887 long syscall_args[MAXSYSARGS];
887 888 struct sysent *se;
888 889 int64_t ret;
889 890
890 891 syscall_mstate(LMS_TRAP, LMS_SYSTEM);
891 892
892 893 ASSERT(curproc->p_model == DATAMODEL_ILP32);
893 894
894 895 CPU_STATS_ENTER_K();
895 896 CPU_STATS_ADDQ(CPU, sys, syscall, 1);
896 897 CPU_STATS_EXIT_K();
897 898
898 899 se = syscall_entry(curthread, syscall_args);
899 900
900 901 /*
901 902 * syscall_entry() copied all 8 arguments into syscall_args.
902 903 */
903 904 ret = se->sy_callc(syscall_args[0], syscall_args[1], syscall_args[2],
904 905 syscall_args[3], syscall_args[4], syscall_args[5], syscall_args[6],
905 906 syscall_args[7]);
906 907
907 908 syscall_exit(curthread, (int)ret & 0xffffffffu, (int)(ret >> 32));
908 909 syscall_mstate(LMS_SYSTEM, LMS_TRAP);
909 910 }
910 911
911 912 /*
912 913 * Get the arguments to the current system call. See comment atop
913 914 * save_syscall_args() regarding lwp_ap usage.
914 915 */
915 916
916 917 uint_t
917 918 get_syscall_args(klwp_t *lwp, long *argp, int *nargsp)
918 919 {
919 920 kthread_t *t = lwptot(lwp);
920 921 ulong_t mask = 0xfffffffful;
921 922 uint_t code;
922 923 long *ap;
923 924 int nargs;
924 925
925 926 #if defined(_LP64)
926 927 if (lwp_getdatamodel(lwp) == DATAMODEL_LP64)
927 928 mask = 0xfffffffffffffffful;
928 929 #endif
929 930
930 931 /*
931 932 * The thread lock must be held while looking at the arguments to ensure
932 933 * they don't go away via post_syscall().
933 934 * get_syscall_args() is the only routine to read them which is callable
934 935 * outside the LWP in question and hence the only one that must be
935 936 * synchronized in this manner.
936 937 */
937 938 thread_lock(t);
938 939
939 940 code = t->t_sysnum;
940 941 ap = lwp->lwp_ap;
941 942
942 943 thread_unlock(t);
943 944
944 945 if (code != 0 && code < NSYSCALL) {
945 946 nargs = LWP_GETSYSENT(lwp)[code].sy_narg;
946 947
947 948 ASSERT(nargs <= MAXSYSARGS);
948 949
949 950 *nargsp = nargs;
950 951 while (nargs-- > 0)
951 952 *argp++ = *ap++ & mask;
952 953 } else {
953 954 *nargsp = 0;
954 955 }
955 956
956 957 return (code);
957 958 }
958 959
959 960 #ifdef _SYSCALL32_IMPL
960 961 /*
961 962 * Get the arguments to the current 32-bit system call.
962 963 */
963 964 uint_t
964 965 get_syscall32_args(klwp_t *lwp, int *argp, int *nargsp)
965 966 {
966 967 long args[MAXSYSARGS];
967 968 uint_t i, code;
968 969
969 970 code = get_syscall_args(lwp, args, nargsp);
970 971
971 972 for (i = 0; i != *nargsp; i++)
972 973 *argp++ = (int)args[i];
973 974 return (code);
974 975 }
975 976 #endif
976 977
977 978 /*
978 979 * Save the system call arguments in a safe place.
979 980 *
980 981 * On the i386 kernel:
981 982 *
982 983 * Copy the users args prior to changing the stack or stack pointer.
983 984 * This is so /proc will be able to get a valid copy of the
984 985 * args from the user stack even after the user stack has been changed.
985 986 * Note that the kernel stack copy of the args may also have been
986 987 * changed by a system call handler which takes C-style arguments.
987 988 *
988 989 * Note that this may be called by stop() from trap(). In that case
989 990 * t_sysnum will be zero (syscall_exit clears it), so no args will be
990 991 * copied.
991 992 *
992 993 * On the amd64 kernel:
993 994 *
994 995 * For 64-bit applications, lwp->lwp_ap normally points to %rdi..%r9
995 996 * in the reg structure. If the user is going to change the argument
996 997 * registers, rax, or the stack and might want to get the args (for
997 998 * /proc tracing), it must copy the args elsewhere via save_syscall_args().
998 999 *
999 1000 * For 32-bit applications, lwp->lwp_ap normally points to a copy of
1000 1001 * the system call arguments on the kernel stack made from the user
1001 1002 * stack. Copy the args prior to change the stack or stack pointer.
1002 1003 * This is so /proc will be able to get a valid copy of the args
1003 1004 * from the user stack even after that stack has been changed.
1004 1005 *
1005 1006 * This may be called from stop() even when we're not in a system call.
1006 1007 * Since there's no easy way to tell, this must be safe (not panic).
1007 1008 * If the copyins get data faults, return non-zero.
1008 1009 */
1009 1010 int
1010 1011 save_syscall_args()
1011 1012 {
1012 1013 kthread_t *t = curthread;
1013 1014 klwp_t *lwp = ttolwp(t);
1014 1015 uint_t code = t->t_sysnum;
1015 1016 uint_t nargs;
1016 1017
1017 1018 if (lwp->lwp_argsaved || code == 0)
1018 1019 return (0); /* args already saved or not needed */
1019 1020
1020 1021 if (code >= NSYSCALL) {
1021 1022 nargs = 0; /* illegal syscall */
1022 1023 } else {
1023 1024 struct sysent *se = LWP_GETSYSENT(lwp);
1024 1025 struct sysent *callp = se + code;
1025 1026
1026 1027 nargs = callp->sy_narg;
1027 1028 if (LOADABLE_SYSCALL(callp) && nargs == 0) {
1028 1029 krwlock_t *module_lock;
1029 1030
1030 1031 /*
1031 1032 * Find out how many arguments the system
1032 1033 * call uses.
1033 1034 *
1034 1035 * We have the property that loaded syscalls
1035 1036 * never change the number of arguments they
1036 1037 * use after they've been loaded once. This
1037 1038 * allows us to stop for /proc tracing without
1038 1039 * holding the module lock.
1039 1040 * /proc is assured that sy_narg is valid.
1040 1041 */
1041 1042 module_lock = lock_syscall(se, code);
1042 1043 nargs = callp->sy_narg;
1043 1044 rw_exit(module_lock);
1044 1045 }
1045 1046 }
1046 1047
1047 1048 /*
1048 1049 * Fetch the system call arguments.
1049 1050 */
1050 1051 if (nargs == 0)
1051 1052 goto out;
1052 1053
1053 1054 ASSERT(nargs <= MAXSYSARGS);
1054 1055
1055 1056 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
1056 1057 #if defined(_LP64)
1057 1058 struct regs *rp = lwptoregs(lwp);
1058 1059
1059 1060 lwp->lwp_arg[0] = rp->r_rdi;
1060 1061 lwp->lwp_arg[1] = rp->r_rsi;
1061 1062 lwp->lwp_arg[2] = rp->r_rdx;
1062 1063 lwp->lwp_arg[3] = rp->r_rcx;
1063 1064 lwp->lwp_arg[4] = rp->r_r8;
1064 1065 lwp->lwp_arg[5] = rp->r_r9;
1065 1066 if (nargs > 6 && copyin_args(rp, &lwp->lwp_arg[6], nargs - 6))
1066 1067 return (-1);
1067 1068 } else {
1068 1069 #endif
1069 1070 if (COPYIN_ARGS32(lwptoregs(lwp), lwp->lwp_arg, nargs))
1070 1071 return (-1);
1071 1072 }
1072 1073 out:
1073 1074 lwp->lwp_ap = lwp->lwp_arg;
1074 1075 lwp->lwp_argsaved = 1;
1075 1076 t->t_post_sys = 1; /* so lwp_ap will be reset */
1076 1077 return (0);
1077 1078 }
1078 1079
1079 1080 void
1080 1081 reset_syscall_args(void)
1081 1082 {
1082 1083 ttolwp(curthread)->lwp_argsaved = 0;
1083 1084 }
1084 1085
1085 1086 /*
1086 1087 * Call a system call which takes a pointer to the user args struct and
1087 1088 * a pointer to the return values. This is a bit slower than the standard
1088 1089 * C arg-passing method in some cases.
1089 1090 */
1090 1091 int64_t
1091 1092 syscall_ap(void)
1092 1093 {
1093 1094 uint_t error;
1094 1095 struct sysent *callp;
1095 1096 rval_t rval;
1096 1097 kthread_t *t = curthread;
1097 1098 klwp_t *lwp = ttolwp(t);
1098 1099 struct regs *rp = lwptoregs(lwp);
1099 1100
1100 1101 callp = LWP_GETSYSENT(lwp) + t->t_sysnum;
1101 1102
1102 1103 #if defined(__amd64)
1103 1104 /*
1104 1105 * If the arguments don't fit in registers %rdi-%r9, make sure they
1105 1106 * have been copied to the lwp_arg array.
1106 1107 */
1107 1108 if (callp->sy_narg > 6 && save_syscall_args())
1108 1109 return ((int64_t)set_errno(EFAULT));
1109 1110 #endif
1110 1111
1111 1112 rval.r_val1 = 0;
1112 1113 rval.r_val2 = rp->r_r1;
1113 1114 lwp->lwp_error = 0; /* for old drivers */
1114 1115 error = (*(callp->sy_call))(lwp->lwp_ap, &rval);
1115 1116 if (error)
1116 1117 return ((longlong_t)set_errno(error));
1117 1118 return (rval.r_vals);
1118 1119 }
1119 1120
1120 1121 /*
1121 1122 * Load system call module.
1122 1123 * Returns with pointer to held read lock for module.
1123 1124 */
1124 1125 static krwlock_t *
1125 1126 lock_syscall(struct sysent *table, uint_t code)
1126 1127 {
1127 1128 krwlock_t *module_lock;
1128 1129 struct modctl *modp;
1129 1130 int id;
1130 1131 struct sysent *callp;
1131 1132
1132 1133 callp = table + code;
1133 1134 module_lock = callp->sy_lock;
1134 1135
1135 1136 /*
1136 1137 * Optimization to only call modload if we don't have a loaded
1137 1138 * syscall.
1138 1139 */
1139 1140 rw_enter(module_lock, RW_READER);
1140 1141 if (LOADED_SYSCALL(callp))
1141 1142 return (module_lock);
1142 1143 rw_exit(module_lock);
1143 1144
1144 1145 for (;;) {
1145 1146 if ((id = modload("sys", syscallnames[code])) == -1)
1146 1147 break;
1147 1148
1148 1149 /*
1149 1150 * If we loaded successfully at least once, the modctl
1150 1151 * will still be valid, so we try to grab it by filename.
1151 1152 * If this call fails, it's because the mod_filename
1152 1153 * was changed after the call to modload() (mod_hold_by_name()
1153 1154 * is the likely culprit). We can safely just take
1154 1155 * another lap if this is the case; the modload() will
1155 1156 * change the mod_filename back to one by which we can
1156 1157 * find the modctl.
1157 1158 */
1158 1159 modp = mod_find_by_filename("sys", syscallnames[code]);
1159 1160
1160 1161 if (modp == NULL)
1161 1162 continue;
1162 1163
1163 1164 mutex_enter(&mod_lock);
1164 1165
1165 1166 if (!modp->mod_installed) {
1166 1167 mutex_exit(&mod_lock);
1167 1168 continue;
1168 1169 }
1169 1170 break;
1170 1171 }
1171 1172 rw_enter(module_lock, RW_READER);
1172 1173
1173 1174 if (id != -1)
1174 1175 mutex_exit(&mod_lock);
1175 1176
1176 1177 return (module_lock);
1177 1178 }
1178 1179
1179 1180 /*
1180 1181 * Loadable syscall support.
1181 1182 * If needed, load the module, then reserve it by holding a read
1182 1183 * lock for the duration of the call.
1183 1184 * Later, if the syscall is not unloadable, it could patch the vector.
1184 1185 */
1185 1186 /*ARGSUSED*/
1186 1187 int64_t
1187 1188 loadable_syscall(
1188 1189 long a0, long a1, long a2, long a3,
1189 1190 long a4, long a5, long a6, long a7)
1190 1191 {
1191 1192 klwp_t *lwp = ttolwp(curthread);
1192 1193 int64_t rval;
1193 1194 struct sysent *callp;
1194 1195 struct sysent *se = LWP_GETSYSENT(lwp);
↓ open down ↓ |
1161 lines elided |
↑ open up ↑ |
1195 1196 krwlock_t *module_lock;
1196 1197 int code, error = 0;
1197 1198
1198 1199 code = curthread->t_sysnum;
1199 1200 callp = se + code;
1200 1201
1201 1202 /*
1202 1203 * Try to autoload the system call if necessary
1203 1204 */
1204 1205 module_lock = lock_syscall(se, code);
1205 - THREAD_KPRI_RELEASE(); /* drop priority given by rw_enter */
1206 1206
1207 1207 /*
1208 1208 * we've locked either the loaded syscall or nosys
1209 1209 */
1210 1210
1211 1211 if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
1212 1212 #if defined(_LP64)
1213 1213 if (callp->sy_flags & SE_ARGC) {
1214 1214 rval = (int64_t)(*callp->sy_call)(a0, a1, a2, a3,
1215 1215 a4, a5);
1216 1216 } else {
1217 1217 rval = syscall_ap();
1218 1218 }
1219 1219 } else {
1220 1220 #endif
1221 1221 /*
1222 1222 * Now that it's loaded, make sure enough args were copied.
1223 1223 */
1224 1224 if (COPYIN_ARGS32(lwptoregs(lwp), lwp->lwp_ap, callp->sy_narg))
1225 1225 error = EFAULT;
1226 1226 if (error) {
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
1227 1227 rval = set_errno(error);
1228 1228 } else if (callp->sy_flags & SE_ARGC) {
1229 1229 rval = (int64_t)(*callp->sy_call)(lwp->lwp_ap[0],
1230 1230 lwp->lwp_ap[1], lwp->lwp_ap[2], lwp->lwp_ap[3],
1231 1231 lwp->lwp_ap[4], lwp->lwp_ap[5]);
1232 1232 } else {
1233 1233 rval = syscall_ap();
1234 1234 }
1235 1235 }
1236 1236
1237 - THREAD_KPRI_REQUEST(); /* regain priority from read lock */
1238 1237 rw_exit(module_lock);
1239 1238 return (rval);
1240 1239 }
1241 1240
1242 1241 /*
1243 1242 * Indirect syscall handled in libc on x86 architectures
1244 1243 */
1245 1244 int64_t
1246 1245 indir()
1247 1246 {
1248 1247 return (nosys());
1249 1248 }
1250 1249
1251 1250 /*
1252 1251 * set_errno - set an error return from the current system call.
1253 1252 * This could be a macro.
1254 1253 * This returns the value it is passed, so that the caller can
1255 1254 * use tail-recursion-elimination and do return (set_errno(ERRNO));
1256 1255 */
1257 1256 uint_t
1258 1257 set_errno(uint_t error)
1259 1258 {
1260 1259 ASSERT(error != 0); /* must not be used to clear errno */
1261 1260
1262 1261 curthread->t_post_sys = 1; /* have post_syscall do error return */
1263 1262 return (ttolwp(curthread)->lwp_errno = error);
1264 1263 }
1265 1264
1266 1265 /*
1267 1266 * set_proc_pre_sys - Set pre-syscall processing for entire process.
1268 1267 */
1269 1268 void
1270 1269 set_proc_pre_sys(proc_t *p)
1271 1270 {
1272 1271 kthread_t *t;
1273 1272 kthread_t *first;
1274 1273
1275 1274 ASSERT(MUTEX_HELD(&p->p_lock));
1276 1275
1277 1276 t = first = p->p_tlist;
1278 1277 do {
1279 1278 t->t_pre_sys = 1;
1280 1279 } while ((t = t->t_forw) != first);
1281 1280 }
1282 1281
1283 1282 /*
1284 1283 * set_proc_post_sys - Set post-syscall processing for entire process.
1285 1284 */
1286 1285 void
1287 1286 set_proc_post_sys(proc_t *p)
1288 1287 {
1289 1288 kthread_t *t;
1290 1289 kthread_t *first;
1291 1290
1292 1291 ASSERT(MUTEX_HELD(&p->p_lock));
1293 1292
1294 1293 t = first = p->p_tlist;
1295 1294 do {
1296 1295 t->t_post_sys = 1;
1297 1296 } while ((t = t->t_forw) != first);
1298 1297 }
1299 1298
1300 1299 /*
1301 1300 * set_proc_sys - Set pre- and post-syscall processing for entire process.
1302 1301 */
1303 1302 void
1304 1303 set_proc_sys(proc_t *p)
1305 1304 {
1306 1305 kthread_t *t;
1307 1306 kthread_t *first;
1308 1307
1309 1308 ASSERT(MUTEX_HELD(&p->p_lock));
1310 1309
1311 1310 t = first = p->p_tlist;
1312 1311 do {
1313 1312 t->t_pre_sys = 1;
1314 1313 t->t_post_sys = 1;
1315 1314 } while ((t = t->t_forw) != first);
1316 1315 }
1317 1316
1318 1317 /*
1319 1318 * set_all_proc_sys - set pre- and post-syscall processing flags for all
1320 1319 * user processes.
1321 1320 *
1322 1321 * This is needed when auditing, tracing, or other facilities which affect
1323 1322 * all processes are turned on.
1324 1323 */
1325 1324 void
1326 1325 set_all_proc_sys()
1327 1326 {
1328 1327 kthread_t *t;
1329 1328 kthread_t *first;
1330 1329
1331 1330 mutex_enter(&pidlock);
1332 1331 t = first = curthread;
1333 1332 do {
1334 1333 t->t_pre_sys = 1;
1335 1334 t->t_post_sys = 1;
1336 1335 } while ((t = t->t_next) != first);
1337 1336 mutex_exit(&pidlock);
1338 1337 }
1339 1338
1340 1339 /*
1341 1340 * set_all_zone_usr_proc_sys - set pre- and post-syscall processing flags for
1342 1341 * all user processes running in the zone of the current process
1343 1342 *
1344 1343 * This is needed when auditing, tracing, or other facilities which affect
1345 1344 * all processes are turned on.
1346 1345 */
1347 1346 void
1348 1347 set_all_zone_usr_proc_sys(zoneid_t zoneid)
1349 1348 {
1350 1349 proc_t *p;
1351 1350 kthread_t *t;
1352 1351
1353 1352 mutex_enter(&pidlock);
1354 1353 for (p = practive; p != NULL; p = p->p_next) {
1355 1354 /* skip kernel and incomplete processes */
1356 1355 if (p->p_exec == NULLVP || p->p_as == &kas ||
1357 1356 p->p_stat == SIDL || p->p_stat == SZOMB ||
1358 1357 (p->p_flag & (SSYS | SEXITING | SEXITLWPS)))
1359 1358 continue;
1360 1359 /*
1361 1360 * Only processes in the given zone (eventually in
1362 1361 * all zones) are taken into account
1363 1362 */
1364 1363 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) {
1365 1364 mutex_enter(&p->p_lock);
1366 1365 if ((t = p->p_tlist) == NULL) {
1367 1366 mutex_exit(&p->p_lock);
1368 1367 continue;
1369 1368 }
1370 1369 /*
1371 1370 * Set pre- and post-syscall processing flags
1372 1371 * for all threads of the process
1373 1372 */
1374 1373 do {
1375 1374 t->t_pre_sys = 1;
1376 1375 t->t_post_sys = 1;
1377 1376 } while (p->p_tlist != (t = t->t_forw));
1378 1377 mutex_exit(&p->p_lock);
1379 1378 }
1380 1379 }
1381 1380 mutex_exit(&pidlock);
1382 1381 }
1383 1382
1384 1383 /*
1385 1384 * set_proc_ast - Set asynchronous service trap (AST) flag for all
1386 1385 * threads in process.
1387 1386 */
1388 1387 void
1389 1388 set_proc_ast(proc_t *p)
1390 1389 {
1391 1390 kthread_t *t;
1392 1391 kthread_t *first;
1393 1392
1394 1393 ASSERT(MUTEX_HELD(&p->p_lock));
1395 1394
1396 1395 t = first = p->p_tlist;
1397 1396 do {
1398 1397 aston(t);
1399 1398 } while ((t = t->t_forw) != first);
1400 1399 }
↓ open down ↓ |
153 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX