Print this page
7029 want per-process exploit mitigation features (secflags)
7030 want basic address space layout randomization (aslr)
7031 noexec_user_stack should be a secflag
7032 want a means to forbid mappings around NULL.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/exec.c
+++ new/usr/src/uts/common/os/exec.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /* Copyright (c) 1988 AT&T */
27 27 /* All Rights Reserved */
28 28 /*
29 29 * Copyright 2014, Joyent, Inc. All rights reserved.
30 30 */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/param.h>
34 34 #include <sys/sysmacros.h>
35 35 #include <sys/systm.h>
36 36 #include <sys/signal.h>
37 37 #include <sys/cred_impl.h>
38 38 #include <sys/policy.h>
39 39 #include <sys/user.h>
40 40 #include <sys/errno.h>
41 41 #include <sys/file.h>
42 42 #include <sys/vfs.h>
43 43 #include <sys/vnode.h>
44 44 #include <sys/mman.h>
45 45 #include <sys/acct.h>
46 46 #include <sys/cpuvar.h>
47 47 #include <sys/proc.h>
48 48 #include <sys/cmn_err.h>
49 49 #include <sys/debug.h>
50 50 #include <sys/pathname.h>
51 51 #include <sys/vm.h>
52 52 #include <sys/lgrp.h>
53 53 #include <sys/vtrace.h>
54 54 #include <sys/exec.h>
55 55 #include <sys/exechdr.h>
56 56 #include <sys/kmem.h>
57 57 #include <sys/prsystm.h>
58 58 #include <sys/modctl.h>
59 59 #include <sys/vmparam.h>
60 60 #include <sys/door.h>
61 61 #include <sys/schedctl.h>
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
62 62 #include <sys/utrap.h>
63 63 #include <sys/systeminfo.h>
64 64 #include <sys/stack.h>
65 65 #include <sys/rctl.h>
66 66 #include <sys/dtrace.h>
67 67 #include <sys/lwpchan_impl.h>
68 68 #include <sys/pool.h>
69 69 #include <sys/sdt.h>
70 70 #include <sys/brand.h>
71 71 #include <sys/klpd.h>
72 +#include <sys/random.h>
72 73
73 74 #include <c2/audit.h>
74 75
75 76 #include <vm/hat.h>
76 77 #include <vm/anon.h>
77 78 #include <vm/as.h>
78 79 #include <vm/seg.h>
79 80 #include <vm/seg_vn.h>
80 81
81 82 #define PRIV_RESET 0x01 /* needs to reset privs */
82 83 #define PRIV_SETID 0x02 /* needs to change uids */
83 84 #define PRIV_SETUGID 0x04 /* is setuid/setgid/forced privs */
84 85 #define PRIV_INCREASE 0x08 /* child runs with more privs */
85 86 #define MAC_FLAGS 0x10 /* need to adjust MAC flags */
86 87 #define PRIV_FORCED 0x20 /* has forced privileges */
87 88
88 89 static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *,
89 90 priv_set_t *, cred_t *, const char *);
90 91 static int hold_execsw(struct execsw *);
91 92
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
92 93 uint_t auxv_hwcap = 0; /* auxv AT_SUN_HWCAP value; determined on the fly */
93 94 uint_t auxv_hwcap_2 = 0; /* AT_SUN_HWCAP2 */
94 95 #if defined(_SYSCALL32_IMPL)
95 96 uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */
96 97 uint_t auxv_hwcap32_2 = 0; /* 32-bit version of auxv_hwcap2 */
97 98 #endif
98 99
99 100 #define PSUIDFLAGS (SNOCD|SUGID)
100 101
101 102 /*
103 + * These are consumed within the specific exec modules, but are defined here
104 + * because
105 + *
106 + * 1) The exec modules are unloadable, which would make this near useless.
107 + *
108 + * 2) We want them to be common across all of them, should more than ELF come
109 + * to support them.
110 + *
111 + * All must be powers of 2.
112 + */
113 +size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */
114 +#pragma weak exec_stackgap = aslr_max_stack_skew /* Old, compatible name */
115 +size_t aslr_max_stack_skew = 64 * 1024; /* 64KB */
116 +
117 +/*
102 118 * exece() - system call wrapper around exec_common()
103 119 */
104 120 int
105 121 exece(const char *fname, const char **argp, const char **envp)
106 122 {
107 123 int error;
108 124
109 125 error = exec_common(fname, argp, envp, EBA_NONE);
110 126 return (error ? (set_errno(error)) : 0);
111 127 }
112 128
113 129 int
114 130 exec_common(const char *fname, const char **argp, const char **envp,
115 131 int brand_action)
116 132 {
117 133 vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
118 134 proc_t *p = ttoproc(curthread);
119 135 klwp_t *lwp = ttolwp(curthread);
120 136 struct user *up = PTOU(p);
121 137 long execsz; /* temporary count of exec size */
122 138 int i;
123 139 int error;
124 140 char exec_file[MAXCOMLEN+1];
125 141 struct pathname pn;
126 142 struct pathname resolvepn;
127 143 struct uarg args;
128 144 struct execa ua;
129 145 k_sigset_t savedmask;
130 146 lwpdir_t *lwpdir = NULL;
131 147 tidhash_t *tidhash;
132 148 lwpdir_t *old_lwpdir = NULL;
133 149 uint_t old_lwpdir_sz;
134 150 tidhash_t *old_tidhash;
135 151 uint_t old_tidhash_sz;
136 152 ret_tidhash_t *ret_tidhash;
137 153 lwpent_t *lep;
138 154 boolean_t brandme = B_FALSE;
139 155
140 156 /*
141 157 * exec() is not supported for the /proc agent lwp.
142 158 */
143 159 if (curthread == p->p_agenttp)
144 160 return (ENOTSUP);
145 161
146 162 if (brand_action != EBA_NONE) {
147 163 /*
148 164 * Brand actions are not supported for processes that are not
149 165 * running in a branded zone.
150 166 */
151 167 if (!ZONE_IS_BRANDED(p->p_zone))
152 168 return (ENOTSUP);
153 169
154 170 if (brand_action == EBA_NATIVE) {
155 171 /* Only branded processes can be unbranded */
156 172 if (!PROC_IS_BRANDED(p))
157 173 return (ENOTSUP);
158 174 } else {
159 175 /* Only unbranded processes can be branded */
160 176 if (PROC_IS_BRANDED(p))
161 177 return (ENOTSUP);
162 178 brandme = B_TRUE;
163 179 }
164 180 } else {
165 181 /*
166 182 * If this is a native zone, or if the process is already
167 183 * branded, then we don't need to do anything. If this is
168 184 * a native process in a branded zone, we need to brand the
169 185 * process as it exec()s the new binary.
170 186 */
171 187 if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p))
172 188 brandme = B_TRUE;
173 189 }
174 190
175 191 /*
176 192 * Inform /proc that an exec() has started.
177 193 * Hold signals that are ignored by default so that we will
178 194 * not be interrupted by a signal that will be ignored after
179 195 * successful completion of gexec().
180 196 */
181 197 mutex_enter(&p->p_lock);
182 198 prexecstart();
183 199 schedctl_finish_sigblock(curthread);
184 200 savedmask = curthread->t_hold;
185 201 sigorset(&curthread->t_hold, &ignoredefault);
186 202 mutex_exit(&p->p_lock);
187 203
188 204 /*
189 205 * Look up path name and remember last component for later.
190 206 * To help coreadm expand its %d token, we attempt to save
191 207 * the directory containing the executable in p_execdir. The
192 208 * first call to lookuppn() may fail and return EINVAL because
193 209 * dirvpp is non-NULL. In that case, we make a second call to
194 210 * lookuppn() with dirvpp set to NULL; p_execdir will be NULL,
195 211 * but coreadm is allowed to expand %d to the empty string and
196 212 * there are other cases in which that failure may occur.
197 213 */
198 214 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0)
199 215 goto out;
200 216 pn_alloc(&resolvepn);
201 217 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, &dir, &vp)) != 0) {
202 218 pn_free(&resolvepn);
203 219 pn_free(&pn);
204 220 if (error != EINVAL)
205 221 goto out;
206 222
207 223 dir = NULL;
208 224 if ((error = pn_get((char *)fname, UIO_USERSPACE, &pn)) != 0)
209 225 goto out;
210 226 pn_alloc(&resolvepn);
211 227 if ((error = lookuppn(&pn, &resolvepn, FOLLOW, NULLVPP,
212 228 &vp)) != 0) {
213 229 pn_free(&resolvepn);
214 230 pn_free(&pn);
215 231 goto out;
216 232 }
217 233 }
218 234 if (vp == NULL) {
219 235 if (dir != NULL)
220 236 VN_RELE(dir);
221 237 error = ENOENT;
222 238 pn_free(&resolvepn);
223 239 pn_free(&pn);
224 240 goto out;
225 241 }
226 242
227 243 if ((error = secpolicy_basic_exec(CRED(), vp)) != 0) {
228 244 if (dir != NULL)
229 245 VN_RELE(dir);
230 246 pn_free(&resolvepn);
231 247 pn_free(&pn);
232 248 VN_RELE(vp);
233 249 goto out;
234 250 }
235 251
236 252 /*
237 253 * We do not allow executing files in attribute directories.
238 254 * We test this by determining whether the resolved path
239 255 * contains a "/" when we're in an attribute directory;
240 256 * only if the pathname does not contain a "/" the resolved path
241 257 * points to a file in the current working (attribute) directory.
242 258 */
243 259 if ((p->p_user.u_cdir->v_flag & V_XATTRDIR) != 0 &&
244 260 strchr(resolvepn.pn_path, '/') == NULL) {
245 261 if (dir != NULL)
246 262 VN_RELE(dir);
247 263 error = EACCES;
248 264 pn_free(&resolvepn);
249 265 pn_free(&pn);
250 266 VN_RELE(vp);
251 267 goto out;
252 268 }
253 269
254 270 bzero(exec_file, MAXCOMLEN+1);
255 271 (void) strncpy(exec_file, pn.pn_path, MAXCOMLEN);
256 272 bzero(&args, sizeof (args));
257 273 args.pathname = resolvepn.pn_path;
258 274 /* don't free resolvepn until we are done with args */
259 275 pn_free(&pn);
260 276
261 277 /*
262 278 * If we're running in a profile shell, then call pfexecd.
263 279 */
264 280 if ((CR_FLAGS(p->p_cred) & PRIV_PFEXEC) != 0) {
265 281 error = pfexec_call(p->p_cred, &resolvepn, &args.pfcred,
266 282 &args.scrubenv);
267 283
268 284 /* Returning errno in case we're not allowed to execute. */
269 285 if (error > 0) {
270 286 if (dir != NULL)
271 287 VN_RELE(dir);
272 288 pn_free(&resolvepn);
273 289 VN_RELE(vp);
274 290 goto out;
275 291 }
276 292
277 293 /* Don't change the credentials when using old ptrace. */
278 294 if (args.pfcred != NULL &&
279 295 (p->p_proc_flag & P_PR_PTRACE) != 0) {
280 296 crfree(args.pfcred);
281 297 args.pfcred = NULL;
282 298 args.scrubenv = B_FALSE;
283 299 }
284 300 }
285 301
286 302 /*
287 303 * Specific exec handlers, or policies determined via
288 304 * /etc/system may override the historical default.
289 305 */
290 306 args.stk_prot = PROT_ZFOD;
291 307 args.dat_prot = PROT_ZFOD;
292 308
293 309 CPU_STATS_ADD_K(sys, sysexec, 1);
294 310 DTRACE_PROC1(exec, char *, args.pathname);
295 311
296 312 ua.fname = fname;
297 313 ua.argp = argp;
298 314 ua.envp = envp;
299 315
300 316 /* If necessary, brand this process before we start the exec. */
301 317 if (brandme)
302 318 brand_setbrand(p);
303 319
304 320 if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
305 321 exec_file, p->p_cred, brand_action)) != 0) {
306 322 if (brandme)
307 323 brand_clearbrand(p, B_FALSE);
308 324 VN_RELE(vp);
309 325 if (dir != NULL)
310 326 VN_RELE(dir);
311 327 pn_free(&resolvepn);
312 328 goto fail;
313 329 }
314 330
315 331 /*
316 332 * Free floating point registers (sun4u only)
317 333 */
318 334 ASSERT(lwp != NULL);
319 335 lwp_freeregs(lwp, 1);
320 336
321 337 /*
322 338 * Free thread and process context ops.
323 339 */
324 340 if (curthread->t_ctx)
325 341 freectx(curthread, 1);
326 342 if (p->p_pctx)
327 343 freepctx(p, 1);
328 344
329 345 /*
330 346 * Remember file name for accounting; clear any cached DTrace predicate.
331 347 */
332 348 up->u_acflag &= ~AFORK;
333 349 bcopy(exec_file, up->u_comm, MAXCOMLEN+1);
334 350 curthread->t_predcache = NULL;
335 351
336 352 /*
337 353 * Clear contract template state
338 354 */
339 355 lwp_ctmpl_clear(lwp);
340 356
341 357 /*
342 358 * Save the directory in which we found the executable for expanding
343 359 * the %d token used in core file patterns.
344 360 */
345 361 mutex_enter(&p->p_lock);
346 362 tmpvp = p->p_execdir;
347 363 p->p_execdir = dir;
348 364 if (p->p_execdir != NULL)
349 365 VN_HOLD(p->p_execdir);
350 366 mutex_exit(&p->p_lock);
351 367
352 368 if (tmpvp != NULL)
353 369 VN_RELE(tmpvp);
354 370
355 371 /*
356 372 * Reset stack state to the user stack, clear set of signals
357 373 * caught on the signal stack, and reset list of signals that
358 374 * restart system calls; the new program's environment should
359 375 * not be affected by detritus from the old program. Any
360 376 * pending held signals remain held, so don't clear t_hold.
361 377 */
362 378 mutex_enter(&p->p_lock);
363 379 lwp->lwp_oldcontext = 0;
364 380 lwp->lwp_ustack = 0;
365 381 lwp->lwp_old_stk_ctl = 0;
366 382 sigemptyset(&up->u_signodefer);
367 383 sigemptyset(&up->u_sigonstack);
368 384 sigemptyset(&up->u_sigresethand);
369 385 lwp->lwp_sigaltstack.ss_sp = 0;
370 386 lwp->lwp_sigaltstack.ss_size = 0;
371 387 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
372 388
373 389 /*
374 390 * Make saved resource limit == current resource limit.
375 391 */
376 392 for (i = 0; i < RLIM_NLIMITS; i++) {
377 393 /*CONSTCOND*/
378 394 if (RLIM_SAVED(i)) {
379 395 (void) rctl_rlimit_get(rctlproc_legacy[i], p,
380 396 &up->u_saved_rlimit[i]);
381 397 }
382 398 }
383 399
384 400 /*
385 401 * If the action was to catch the signal, then the action
386 402 * must be reset to SIG_DFL.
387 403 */
388 404 sigdefault(p);
389 405 p->p_flag &= ~(SNOWAIT|SJCTL);
390 406 p->p_flag |= (SEXECED|SMSACCT|SMSFORK);
391 407 up->u_signal[SIGCLD - 1] = SIG_DFL;
392 408
393 409 /*
394 410 * Delete the dot4 sigqueues/signotifies.
395 411 */
396 412 sigqfree(p);
397 413
398 414 mutex_exit(&p->p_lock);
399 415
400 416 mutex_enter(&p->p_pflock);
401 417 p->p_prof.pr_base = NULL;
402 418 p->p_prof.pr_size = 0;
403 419 p->p_prof.pr_off = 0;
404 420 p->p_prof.pr_scale = 0;
405 421 p->p_prof.pr_samples = 0;
406 422 mutex_exit(&p->p_pflock);
407 423
408 424 ASSERT(curthread->t_schedctl == NULL);
409 425
410 426 #if defined(__sparc)
411 427 if (p->p_utraps != NULL)
412 428 utrap_free(p);
413 429 #endif /* __sparc */
414 430
415 431 /*
416 432 * Close all close-on-exec files.
417 433 */
418 434 close_exec(P_FINFO(p));
419 435 TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
420 436
421 437 /* Unbrand ourself if necessary. */
422 438 if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
423 439 brand_clearbrand(p, B_FALSE);
424 440
425 441 setregs(&args);
426 442
427 443 /* Mark this as an executable vnode */
428 444 mutex_enter(&vp->v_lock);
429 445 vp->v_flag |= VVMEXEC;
430 446 mutex_exit(&vp->v_lock);
431 447
432 448 VN_RELE(vp);
433 449 if (dir != NULL)
434 450 VN_RELE(dir);
435 451 pn_free(&resolvepn);
436 452
437 453 /*
438 454 * Allocate a new lwp directory and lwpid hash table if necessary.
439 455 */
440 456 if (curthread->t_tid != 1 || p->p_lwpdir_sz != 2) {
441 457 lwpdir = kmem_zalloc(2 * sizeof (lwpdir_t), KM_SLEEP);
442 458 lwpdir->ld_next = lwpdir + 1;
443 459 tidhash = kmem_zalloc(2 * sizeof (tidhash_t), KM_SLEEP);
444 460 if (p->p_lwpdir != NULL)
445 461 lep = p->p_lwpdir[curthread->t_dslot].ld_entry;
446 462 else
447 463 lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
448 464 }
449 465
450 466 if (PROC_IS_BRANDED(p))
451 467 BROP(p)->b_exec();
452 468
453 469 mutex_enter(&p->p_lock);
454 470 prbarrier(p);
455 471
456 472 /*
457 473 * Reset lwp id to the default value of 1.
458 474 * This is a single-threaded process now
459 475 * and lwp #1 is lwp_wait()able by default.
460 476 * The t_unpark flag should not be inherited.
461 477 */
462 478 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
463 479 curthread->t_tid = 1;
464 480 kpreempt_disable();
465 481 ASSERT(curthread->t_lpl != NULL);
466 482 p->p_t1_lgrpid = curthread->t_lpl->lpl_lgrpid;
467 483 kpreempt_enable();
468 484 if (p->p_tr_lgrpid != LGRP_NONE && p->p_tr_lgrpid != p->p_t1_lgrpid) {
469 485 lgrp_update_trthr_migrations(1);
470 486 }
471 487 curthread->t_unpark = 0;
472 488 curthread->t_proc_flag |= TP_TWAIT;
473 489 curthread->t_proc_flag &= ~TP_DAEMON; /* daemons shouldn't exec */
474 490 p->p_lwpdaemon = 0; /* but oh well ... */
475 491 p->p_lwpid = 1;
476 492
477 493 /*
478 494 * Install the newly-allocated lwp directory and lwpid hash table
479 495 * and insert the current thread into the new hash table.
480 496 */
481 497 if (lwpdir != NULL) {
482 498 old_lwpdir = p->p_lwpdir;
483 499 old_lwpdir_sz = p->p_lwpdir_sz;
484 500 old_tidhash = p->p_tidhash;
485 501 old_tidhash_sz = p->p_tidhash_sz;
486 502 p->p_lwpdir = p->p_lwpfree = lwpdir;
487 503 p->p_lwpdir_sz = 2;
488 504 lep->le_thread = curthread;
489 505 lep->le_lwpid = curthread->t_tid;
490 506 lep->le_start = curthread->t_start;
491 507 lwp_hash_in(p, lep, tidhash, 2, 0);
492 508 p->p_tidhash = tidhash;
493 509 p->p_tidhash_sz = 2;
494 510 }
495 511 ret_tidhash = p->p_ret_tidhash;
496 512 p->p_ret_tidhash = NULL;
497 513
498 514 /*
499 515 * Restore the saved signal mask and
500 516 * inform /proc that the exec() has finished.
501 517 */
502 518 curthread->t_hold = savedmask;
503 519 prexecend();
504 520 mutex_exit(&p->p_lock);
505 521 if (old_lwpdir) {
506 522 kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t));
507 523 kmem_free(old_tidhash, old_tidhash_sz * sizeof (tidhash_t));
508 524 }
509 525 while (ret_tidhash != NULL) {
510 526 ret_tidhash_t *next = ret_tidhash->rth_next;
511 527 kmem_free(ret_tidhash->rth_tidhash,
512 528 ret_tidhash->rth_tidhash_sz * sizeof (tidhash_t));
513 529 kmem_free(ret_tidhash, sizeof (*ret_tidhash));
514 530 ret_tidhash = next;
515 531 }
516 532
517 533 ASSERT(error == 0);
518 534 DTRACE_PROC(exec__success);
519 535 return (0);
520 536
521 537 fail:
522 538 DTRACE_PROC1(exec__failure, int, error);
523 539 out: /* error return */
524 540 mutex_enter(&p->p_lock);
525 541 curthread->t_hold = savedmask;
526 542 prexecend();
527 543 mutex_exit(&p->p_lock);
528 544 ASSERT(error != 0);
529 545 return (error);
530 546 }
531 547
532 548
533 549 /*
534 550 * Perform generic exec duties and switchout to object-file specific
535 551 * handler.
536 552 */
537 553 int
538 554 gexec(
539 555 struct vnode **vpp,
540 556 struct execa *uap,
541 557 struct uarg *args,
542 558 struct intpdata *idatap,
543 559 int level,
544 560 long *execsz,
545 561 caddr_t exec_file,
546 562 struct cred *cred,
547 563 int brand_action)
548 564 {
549 565 struct vnode *vp, *execvp = NULL;
550 566 proc_t *pp = ttoproc(curthread);
551 567 struct execsw *eswp;
552 568 int error = 0;
↓ open down ↓ |
441 lines elided |
↑ open up ↑ |
553 569 int suidflags = 0;
554 570 ssize_t resid;
555 571 uid_t uid, gid;
556 572 struct vattr vattr;
557 573 char magbuf[MAGIC_BYTES];
558 574 int setid;
559 575 cred_t *oldcred, *newcred = NULL;
560 576 int privflags = 0;
561 577 int setidfl;
562 578 priv_set_t fset;
579 + secflagset_t old_secflags;
580 +
581 + secflags_copy(&old_secflags, &pp->p_secflags.psf_effective);
563 582
564 583 /*
565 584 * If the SNOCD or SUGID flag is set, turn it off and remember the
566 585 * previous setting so we can restore it if we encounter an error.
567 586 */
568 587 if (level == 0 && (pp->p_flag & PSUIDFLAGS)) {
569 588 mutex_enter(&pp->p_lock);
570 589 suidflags = pp->p_flag & PSUIDFLAGS;
571 590 pp->p_flag &= ~PSUIDFLAGS;
572 591 mutex_exit(&pp->p_lock);
573 592 }
574 593
575 594 if ((error = execpermissions(*vpp, &vattr, args)) != 0)
576 595 goto bad_noclose;
577 596
578 597 /* need to open vnode for stateful file systems */
579 598 if ((error = VOP_OPEN(vpp, FREAD, CRED(), NULL)) != 0)
580 599 goto bad_noclose;
581 600 vp = *vpp;
582 601
583 602 /*
584 603 * Note: to support binary compatibility with SunOS a.out
585 604 * executables, we read in the first four bytes, as the
586 605 * magic number is in bytes 2-3.
587 606 */
588 607 if (error = vn_rdwr(UIO_READ, vp, magbuf, sizeof (magbuf),
589 608 (offset_t)0, UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid))
590 609 goto bad;
591 610 if (resid != 0)
592 611 goto bad;
593 612
594 613 if ((eswp = findexec_by_hdr(magbuf)) == NULL)
595 614 goto bad;
596 615
597 616 if (level == 0 &&
598 617 (privflags = execsetid(vp, &vattr, &uid, &gid, &fset,
599 618 args->pfcred == NULL ? cred : args->pfcred, args->pathname)) != 0) {
600 619
601 620 /* Pfcred is a credential with a ref count of 1 */
602 621
603 622 if (args->pfcred != NULL) {
604 623 privflags |= PRIV_INCREASE|PRIV_RESET;
605 624 newcred = cred = args->pfcred;
606 625 } else {
607 626 newcred = cred = crdup(cred);
608 627 }
609 628
610 629 /* If we can, drop the PA bit */
611 630 if ((privflags & PRIV_RESET) != 0)
612 631 priv_adjust_PA(cred);
613 632
614 633 if (privflags & PRIV_SETID) {
615 634 cred->cr_uid = uid;
616 635 cred->cr_gid = gid;
617 636 cred->cr_suid = uid;
618 637 cred->cr_sgid = gid;
619 638 }
620 639
621 640 if (privflags & MAC_FLAGS) {
622 641 if (!(CR_FLAGS(cred) & NET_MAC_AWARE_INHERIT))
623 642 CR_FLAGS(cred) &= ~NET_MAC_AWARE;
624 643 CR_FLAGS(cred) &= ~NET_MAC_AWARE_INHERIT;
625 644 }
626 645
627 646 /*
628 647 * Implement the privilege updates:
629 648 *
630 649 * Restrict with L:
631 650 *
632 651 * I' = I & L
633 652 *
634 653 * E' = P' = (I' + F) & A
635 654 *
636 655 * But if running under ptrace, we cap I and F with P.
637 656 */
638 657 if ((privflags & (PRIV_RESET|PRIV_FORCED)) != 0) {
639 658 if ((privflags & PRIV_INCREASE) != 0 &&
640 659 (pp->p_proc_flag & P_PR_PTRACE) != 0) {
641 660 priv_intersect(&CR_OPPRIV(cred),
642 661 &CR_IPRIV(cred));
643 662 priv_intersect(&CR_OPPRIV(cred), &fset);
644 663 }
645 664 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
646 665 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
647 666 if (privflags & PRIV_FORCED) {
648 667 priv_set_PA(cred);
649 668 priv_union(&fset, &CR_EPRIV(cred));
650 669 priv_union(&fset, &CR_PPRIV(cred));
651 670 }
652 671 priv_adjust_PA(cred);
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
653 672 }
654 673 } else if (level == 0 && args->pfcred != NULL) {
655 674 newcred = cred = args->pfcred;
656 675 privflags |= PRIV_INCREASE;
657 676 /* pfcred is not forced to adhere to these settings */
658 677 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
659 678 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
660 679 priv_adjust_PA(cred);
661 680 }
662 681
682 + /* The new image gets the inheritable secflags as its secflags */
683 + secflags_promote(pp);
684 +
663 685 /* SunOS 4.x buy-back */
664 686 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
665 687 (vattr.va_mode & (VSUID|VSGID))) {
666 688 char path[MAXNAMELEN];
667 689 refstr_t *mntpt = NULL;
668 690 int ret = -1;
669 691
670 692 bzero(path, sizeof (path));
671 693 zone_hold(pp->p_zone);
672 694
673 695 ret = vnodetopath(pp->p_zone->zone_rootvp, vp, path,
674 696 sizeof (path), cred);
675 697
676 698 /* fallback to mountpoint if a path can't be found */
677 699 if ((ret != 0) || (ret == 0 && path[0] == '\0'))
678 700 mntpt = vfs_getmntpoint(vp->v_vfsp);
679 701
680 702 if (mntpt == NULL)
681 703 zcmn_err(pp->p_zone->zone_id, CE_NOTE,
682 704 "!uid %d: setuid execution not allowed, "
683 705 "file=%s", cred->cr_uid, path);
684 706 else
685 707 zcmn_err(pp->p_zone->zone_id, CE_NOTE,
686 708 "!uid %d: setuid execution not allowed, "
687 709 "fs=%s, file=%s", cred->cr_uid,
688 710 ZONE_PATH_TRANSLATE(refstr_value(mntpt),
689 711 pp->p_zone), exec_file);
690 712
691 713 if (!INGLOBALZONE(pp)) {
692 714 /* zone_rootpath always has trailing / */
693 715 if (mntpt == NULL)
694 716 cmn_err(CE_NOTE, "!zone: %s, uid: %d "
695 717 "setuid execution not allowed, file=%s%s",
696 718 pp->p_zone->zone_name, cred->cr_uid,
697 719 pp->p_zone->zone_rootpath, path + 1);
698 720 else
699 721 cmn_err(CE_NOTE, "!zone: %s, uid: %d "
700 722 "setuid execution not allowed, fs=%s, "
701 723 "file=%s", pp->p_zone->zone_name,
702 724 cred->cr_uid, refstr_value(mntpt),
703 725 exec_file);
704 726 }
705 727
706 728 if (mntpt != NULL)
707 729 refstr_rele(mntpt);
708 730
709 731 zone_rele(pp->p_zone);
710 732 }
711 733
712 734 /*
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
713 735 * execsetid() told us whether or not we had to change the
714 736 * credentials of the process. In privflags, it told us
715 737 * whether we gained any privileges or executed a set-uid executable.
716 738 */
717 739 setid = (privflags & (PRIV_SETUGID|PRIV_INCREASE|PRIV_FORCED));
718 740
719 741 /*
720 742 * Use /etc/system variable to determine if the stack
721 743 * should be marked as executable by default.
722 744 */
723 - if (noexec_user_stack)
745 + if ((noexec_user_stack != 0) ||
746 + secflag_enabled(pp, PROC_SEC_NOEXECSTACK))
724 747 args->stk_prot &= ~PROT_EXEC;
725 748
726 749 args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */
727 750 args->ex_vp = vp;
728 751
729 752 /*
730 753 * Traditionally, the setid flags told the sub processes whether
731 754 * the file just executed was set-uid or set-gid; this caused
732 755 * some confusion as the 'setid' flag did not match the SUGID
733 756 * process flag which is only set when the uids/gids do not match.
734 757 * A script set-gid/set-uid to the real uid/gid would start with
735 758 * /dev/fd/X but an executable would happily trust LD_LIBRARY_PATH.
736 759 * Now we flag those cases where the calling process cannot
737 760 * be trusted to influence the newly exec'ed process, either
738 761 * because it runs with more privileges or when the uids/gids
739 762 * do in fact not match.
740 763 * This also makes the runtime linker agree with the on exec
741 764 * values of SNOCD and SUGID.
742 765 */
743 766 setidfl = 0;
744 767 if (cred->cr_uid != cred->cr_ruid || (cred->cr_rgid != cred->cr_gid &&
745 768 !supgroupmember(cred->cr_gid, cred))) {
746 769 setidfl |= EXECSETID_UGIDS;
747 770 }
748 771 if (setid & PRIV_SETUGID)
749 772 setidfl |= EXECSETID_SETID;
750 773 if (setid & PRIV_FORCED)
751 774 setidfl |= EXECSETID_PRIVS;
752 775
753 776 execvp = pp->p_exec;
754 777 if (execvp)
755 778 VN_HOLD(execvp);
756 779
757 780 error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz,
758 781 setidfl, exec_file, cred, brand_action);
759 782 rw_exit(eswp->exec_lock);
760 783 if (error != 0) {
761 784 if (execvp)
762 785 VN_RELE(execvp);
763 786 /*
764 787 * If this process's p_exec has been set to the vp of
765 788 * the executable by exec_func, we will return without
766 789 * calling VOP_CLOSE because proc_exit will close it
767 790 * on exit.
768 791 */
769 792 if (pp->p_exec == vp)
770 793 goto bad_noclose;
771 794 else
772 795 goto bad;
773 796 }
774 797
775 798 if (level == 0) {
776 799 uid_t oruid;
777 800
778 801 if (execvp != NULL) {
779 802 /*
780 803 * Close the previous executable only if we are
781 804 * at level 0.
782 805 */
783 806 (void) VOP_CLOSE(execvp, FREAD, 1, (offset_t)0,
784 807 cred, NULL);
785 808 }
786 809
787 810 mutex_enter(&pp->p_crlock);
788 811
789 812 oruid = pp->p_cred->cr_ruid;
790 813
791 814 if (newcred != NULL) {
792 815 /*
793 816 * Free the old credentials, and set the new ones.
794 817 * Do this for both the process and the (single) thread.
795 818 */
796 819 crfree(pp->p_cred);
797 820 pp->p_cred = cred; /* cred already held for proc */
798 821 crhold(cred); /* hold new cred for thread */
↓ open down ↓ |
65 lines elided |
↑ open up ↑ |
799 822 /*
800 823 * DTrace accesses t_cred in probe context. t_cred
801 824 * must always be either NULL, or point to a valid,
802 825 * allocated cred structure.
803 826 */
804 827 oldcred = curthread->t_cred;
805 828 curthread->t_cred = cred;
806 829 crfree(oldcred);
807 830
808 831 if (priv_basic_test >= 0 &&
809 - !PRIV_ISASSERT(&CR_IPRIV(newcred),
832 + !PRIV_ISMEMBER(&CR_IPRIV(newcred),
810 833 priv_basic_test)) {
811 834 pid_t pid = pp->p_pid;
812 835 char *fn = PTOU(pp)->u_comm;
813 836
814 837 cmn_err(CE_WARN, "%s[%d]: exec: basic_test "
815 838 "privilege removed from E/I", fn, pid);
816 839 }
817 840 }
818 841 /*
819 842 * On emerging from a successful exec(), the saved
820 843 * uid and gid equal the effective uid and gid.
821 844 */
822 845 cred->cr_suid = cred->cr_uid;
823 846 cred->cr_sgid = cred->cr_gid;
824 847
825 848 /*
826 849 * If the real and effective ids do not match, this
827 850 * is a setuid process that should not dump core.
828 851 * The group comparison is tricky; we prevent the code
829 852 * from flagging SNOCD when executing with an effective gid
830 853 * which is a supplementary group.
831 854 */
832 855 if (cred->cr_ruid != cred->cr_uid ||
833 856 (cred->cr_rgid != cred->cr_gid &&
834 857 !supgroupmember(cred->cr_gid, cred)) ||
835 858 (privflags & PRIV_INCREASE) != 0)
836 859 suidflags = PSUIDFLAGS;
837 860 else
838 861 suidflags = 0;
839 862
840 863 mutex_exit(&pp->p_crlock);
841 864 if (newcred != NULL && oruid != newcred->cr_ruid) {
842 865 /* Note that the process remains in the same zone. */
843 866 mutex_enter(&pidlock);
844 867 upcount_dec(oruid, crgetzoneid(newcred));
845 868 upcount_inc(newcred->cr_ruid, crgetzoneid(newcred));
846 869 mutex_exit(&pidlock);
847 870 }
848 871 if (suidflags) {
849 872 mutex_enter(&pp->p_lock);
850 873 pp->p_flag |= suidflags;
851 874 mutex_exit(&pp->p_lock);
852 875 }
853 876 if (setid && (pp->p_proc_flag & P_PR_PTRACE) == 0) {
854 877 /*
855 878 * If process is traced via /proc, arrange to
856 879 * invalidate the associated /proc vnode.
857 880 */
858 881 if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
859 882 args->traceinval = 1;
860 883 }
861 884 if (pp->p_proc_flag & P_PR_PTRACE)
862 885 psignal(pp, SIGTRAP);
863 886 if (args->traceinval)
864 887 prinvalidate(&pp->p_user);
865 888 }
866 889 if (execvp)
867 890 VN_RELE(execvp);
868 891 return (0);
↓ open down ↓ |
49 lines elided |
↑ open up ↑ |
869 892
870 893 bad:
871 894 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, cred, NULL);
872 895
873 896 bad_noclose:
874 897 if (newcred != NULL)
875 898 crfree(newcred);
876 899 if (error == 0)
877 900 error = ENOEXEC;
878 901
902 + mutex_enter(&pp->p_lock);
879 903 if (suidflags) {
880 - mutex_enter(&pp->p_lock);
881 904 pp->p_flag |= suidflags;
882 - mutex_exit(&pp->p_lock);
883 905 }
906 + /*
907 + * Restore the effective secflags, to maintain the invariant they
908 + * never change for a given process
909 + */
910 + secflags_copy(&pp->p_secflags.psf_effective, &old_secflags);
911 + mutex_exit(&pp->p_lock);
912 +
884 913 return (error);
885 914 }
886 915
887 916 extern char *execswnames[];
888 917
889 918 struct execsw *
890 919 allocate_execsw(char *name, char *magic, size_t magic_size)
891 920 {
892 921 int i, j;
893 922 char *ename;
894 923 char *magicp;
895 924
896 925 mutex_enter(&execsw_lock);
897 926 for (i = 0; i < nexectype; i++) {
898 927 if (execswnames[i] == NULL) {
899 928 ename = kmem_alloc(strlen(name) + 1, KM_SLEEP);
900 929 (void) strcpy(ename, name);
901 930 execswnames[i] = ename;
902 931 /*
903 932 * Set the magic number last so that we
904 933 * don't need to hold the execsw_lock in
905 934 * findexectype().
906 935 */
907 936 magicp = kmem_alloc(magic_size, KM_SLEEP);
908 937 for (j = 0; j < magic_size; j++)
909 938 magicp[j] = magic[j];
910 939 execsw[i].exec_magic = magicp;
911 940 mutex_exit(&execsw_lock);
912 941 return (&execsw[i]);
913 942 }
914 943 }
915 944 mutex_exit(&execsw_lock);
916 945 return (NULL);
917 946 }
918 947
919 948 /*
920 949 * Find the exec switch table entry with the corresponding magic string.
921 950 */
922 951 struct execsw *
923 952 findexecsw(char *magic)
924 953 {
925 954 struct execsw *eswp;
926 955
927 956 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
928 957 ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
929 958 if (magic && eswp->exec_maglen != 0 &&
930 959 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0)
931 960 return (eswp);
932 961 }
933 962 return (NULL);
934 963 }
935 964
936 965 /*
937 966 * Find the execsw[] index for the given exec header string by looking for the
938 967 * magic string at a specified offset and length for each kind of executable
939 968 * file format until one matches. If no execsw[] entry is found, try to
940 969 * autoload a module for this magic string.
941 970 */
942 971 struct execsw *
943 972 findexec_by_hdr(char *header)
944 973 {
945 974 struct execsw *eswp;
946 975
947 976 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
948 977 ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
949 978 if (header && eswp->exec_maglen != 0 &&
950 979 bcmp(&header[eswp->exec_magoff], eswp->exec_magic,
951 980 eswp->exec_maglen) == 0) {
952 981 if (hold_execsw(eswp) != 0)
953 982 return (NULL);
954 983 return (eswp);
955 984 }
956 985 }
957 986 return (NULL); /* couldn't find the type */
958 987 }
959 988
960 989 /*
961 990 * Find the execsw[] index for the given magic string. If no execsw[] entry
962 991 * is found, try to autoload a module for this magic string.
963 992 */
964 993 struct execsw *
965 994 findexec_by_magic(char *magic)
966 995 {
967 996 struct execsw *eswp;
968 997
969 998 for (eswp = execsw; eswp < &execsw[nexectype]; eswp++) {
970 999 ASSERT(eswp->exec_maglen <= MAGIC_BYTES);
971 1000 if (magic && eswp->exec_maglen != 0 &&
972 1001 bcmp(magic, eswp->exec_magic, eswp->exec_maglen) == 0) {
973 1002 if (hold_execsw(eswp) != 0)
974 1003 return (NULL);
975 1004 return (eswp);
976 1005 }
977 1006 }
978 1007 return (NULL); /* couldn't find the type */
979 1008 }
980 1009
981 1010 static int
982 1011 hold_execsw(struct execsw *eswp)
983 1012 {
984 1013 char *name;
985 1014
986 1015 rw_enter(eswp->exec_lock, RW_READER);
987 1016 while (!LOADED_EXEC(eswp)) {
988 1017 rw_exit(eswp->exec_lock);
989 1018 name = execswnames[eswp-execsw];
990 1019 ASSERT(name);
991 1020 if (modload("exec", name) == -1)
992 1021 return (-1);
993 1022 rw_enter(eswp->exec_lock, RW_READER);
994 1023 }
995 1024 return (0);
996 1025 }
997 1026
998 1027 static int
999 1028 execsetid(struct vnode *vp, struct vattr *vattrp, uid_t *uidp, uid_t *gidp,
1000 1029 priv_set_t *fset, cred_t *cr, const char *pathname)
1001 1030 {
1002 1031 proc_t *pp = ttoproc(curthread);
1003 1032 uid_t uid, gid;
1004 1033 int privflags = 0;
1005 1034
1006 1035 /*
1007 1036 * Remember credentials.
1008 1037 */
1009 1038 uid = cr->cr_uid;
1010 1039 gid = cr->cr_gid;
1011 1040
1012 1041 /* Will try to reset the PRIV_AWARE bit later. */
1013 1042 if ((CR_FLAGS(cr) & (PRIV_AWARE|PRIV_AWARE_INHERIT)) == PRIV_AWARE)
1014 1043 privflags |= PRIV_RESET;
1015 1044
1016 1045 if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) == 0) {
1017 1046 /*
1018 1047 * If it's a set-uid root program we perform the
1019 1048 * forced privilege look-aside. This has three possible
1020 1049 * outcomes:
1021 1050 * no look aside information -> treat as before
1022 1051 * look aside in Limit set -> apply forced privs
1023 1052 * look aside not in Limit set -> ignore set-uid root
1024 1053 *
1025 1054 * Ordinary set-uid root execution only allowed if the limit
1026 1055 * set holds all unsafe privileges.
1027 1056 */
1028 1057 if (vattrp->va_mode & VSUID) {
1029 1058 if (vattrp->va_uid == 0) {
1030 1059 int res = get_forced_privs(cr, pathname, fset);
1031 1060
1032 1061 switch (res) {
1033 1062 case -1:
1034 1063 if (priv_issubset(&priv_unsafe,
1035 1064 &CR_LPRIV(cr))) {
1036 1065 uid = vattrp->va_uid;
1037 1066 privflags |= PRIV_SETUGID;
1038 1067 }
1039 1068 break;
1040 1069 case 0:
1041 1070 privflags |= PRIV_FORCED|PRIV_INCREASE;
1042 1071 break;
1043 1072 default:
1044 1073 break;
1045 1074 }
1046 1075 } else {
1047 1076 uid = vattrp->va_uid;
1048 1077 privflags |= PRIV_SETUGID;
1049 1078 }
1050 1079 }
1051 1080 if (vattrp->va_mode & VSGID) {
1052 1081 gid = vattrp->va_gid;
1053 1082 privflags |= PRIV_SETUGID;
1054 1083 }
1055 1084 }
1056 1085
1057 1086 /*
1058 1087 * Do we need to change our credential anyway?
1059 1088 * This is the case when E != I or P != I, as
1060 1089 * we need to do the assignments (with F empty and A full)
1061 1090 * Or when I is not a subset of L; in that case we need to
1062 1091 * enforce L.
1063 1092 *
1064 1093 * I' = L & I
1065 1094 *
1066 1095 * E' = P' = (I' + F) & A
1067 1096 * or
1068 1097 * E' = P' = I'
1069 1098 */
1070 1099 if (!priv_isequalset(&CR_EPRIV(cr), &CR_IPRIV(cr)) ||
1071 1100 !priv_issubset(&CR_IPRIV(cr), &CR_LPRIV(cr)) ||
1072 1101 !priv_isequalset(&CR_PPRIV(cr), &CR_IPRIV(cr)))
1073 1102 privflags |= PRIV_RESET;
1074 1103
1075 1104 /* Child has more privileges than parent */
1076 1105 if (!priv_issubset(&CR_IPRIV(cr), &CR_PPRIV(cr)))
1077 1106 privflags |= PRIV_INCREASE;
1078 1107
1079 1108 /* If MAC-aware flag(s) are on, need to update cred to remove. */
1080 1109 if ((CR_FLAGS(cr) & NET_MAC_AWARE) ||
1081 1110 (CR_FLAGS(cr) & NET_MAC_AWARE_INHERIT))
1082 1111 privflags |= MAC_FLAGS;
1083 1112 /*
1084 1113 * Set setuid/setgid protections if no ptrace() compatibility.
1085 1114 * For privileged processes, honor setuid/setgid even in
1086 1115 * the presence of ptrace() compatibility.
1087 1116 */
1088 1117 if (((pp->p_proc_flag & P_PR_PTRACE) == 0 ||
1089 1118 PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, (uid == 0))) &&
1090 1119 (cr->cr_uid != uid ||
1091 1120 cr->cr_gid != gid ||
1092 1121 cr->cr_suid != uid ||
1093 1122 cr->cr_sgid != gid)) {
1094 1123 *uidp = uid;
1095 1124 *gidp = gid;
1096 1125 privflags |= PRIV_SETID;
1097 1126 }
1098 1127 return (privflags);
1099 1128 }
1100 1129
1101 1130 int
1102 1131 execpermissions(struct vnode *vp, struct vattr *vattrp, struct uarg *args)
1103 1132 {
1104 1133 int error;
1105 1134 proc_t *p = ttoproc(curthread);
1106 1135
1107 1136 vattrp->va_mask = AT_MODE | AT_UID | AT_GID | AT_SIZE;
1108 1137 if (error = VOP_GETATTR(vp, vattrp, ATTR_EXEC, p->p_cred, NULL))
1109 1138 return (error);
1110 1139 /*
1111 1140 * Check the access mode.
1112 1141 * If VPROC, ask /proc if the file is an object file.
1113 1142 */
1114 1143 if ((error = VOP_ACCESS(vp, VEXEC, 0, p->p_cred, NULL)) != 0 ||
1115 1144 !(vp->v_type == VREG || (vp->v_type == VPROC && pr_isobject(vp))) ||
1116 1145 (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0 ||
1117 1146 (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) {
1118 1147 if (error == 0)
1119 1148 error = EACCES;
1120 1149 return (error);
1121 1150 }
1122 1151
1123 1152 if ((p->p_plist || (p->p_proc_flag & (P_PR_PTRACE|P_PR_TRACE))) &&
1124 1153 (error = VOP_ACCESS(vp, VREAD, 0, p->p_cred, NULL))) {
1125 1154 /*
1126 1155 * If process is under ptrace(2) compatibility,
1127 1156 * fail the exec(2).
1128 1157 */
1129 1158 if (p->p_proc_flag & P_PR_PTRACE)
1130 1159 goto bad;
1131 1160 /*
1132 1161 * Process is traced via /proc.
1133 1162 * Arrange to invalidate the /proc vnode.
1134 1163 */
1135 1164 args->traceinval = 1;
1136 1165 }
1137 1166 return (0);
1138 1167 bad:
1139 1168 if (error == 0)
1140 1169 error = ENOEXEC;
1141 1170 return (error);
1142 1171 }
1143 1172
1144 1173 /*
1145 1174 * Map a section of an executable file into the user's
1146 1175 * address space.
1147 1176 */
1148 1177 int
1149 1178 execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen,
1150 1179 off_t offset, int prot, int page, uint_t szc)
1151 1180 {
1152 1181 int error = 0;
1153 1182 off_t oldoffset;
1154 1183 caddr_t zfodbase, oldaddr;
1155 1184 size_t end, oldlen;
1156 1185 size_t zfoddiff;
1157 1186 label_t ljb;
1158 1187 proc_t *p = ttoproc(curthread);
1159 1188
1160 1189 oldaddr = addr;
1161 1190 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1162 1191 if (len) {
1163 1192 oldlen = len;
1164 1193 len += ((size_t)oldaddr - (size_t)addr);
1165 1194 oldoffset = offset;
1166 1195 offset = (off_t)((uintptr_t)offset & PAGEMASK);
1167 1196 if (page) {
1168 1197 spgcnt_t prefltmem, availm, npages;
1169 1198 int preread;
1170 1199 uint_t mflag = MAP_PRIVATE | MAP_FIXED;
1171 1200
1172 1201 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) {
1173 1202 mflag |= MAP_TEXT;
1174 1203 } else {
1175 1204 mflag |= MAP_INITDATA;
1176 1205 }
1177 1206
1178 1207 if (valid_usr_range(addr, len, prot, p->p_as,
1179 1208 p->p_as->a_userlimit) != RANGE_OKAY) {
1180 1209 error = ENOMEM;
1181 1210 goto bad;
1182 1211 }
1183 1212 if (error = VOP_MAP(vp, (offset_t)offset,
1184 1213 p->p_as, &addr, len, prot, PROT_ALL,
1185 1214 mflag, CRED(), NULL))
1186 1215 goto bad;
1187 1216
1188 1217 /*
1189 1218 * If the segment can fit, then we prefault
1190 1219 * the entire segment in. This is based on the
1191 1220 * model that says the best working set of a
1192 1221 * small program is all of its pages.
1193 1222 */
1194 1223 npages = (spgcnt_t)btopr(len);
1195 1224 prefltmem = freemem - desfree;
1196 1225 preread =
1197 1226 (npages < prefltmem && len < PGTHRESH) ? 1 : 0;
1198 1227
1199 1228 /*
1200 1229 * If we aren't prefaulting the segment,
1201 1230 * increment "deficit", if necessary to ensure
1202 1231 * that pages will become available when this
1203 1232 * process starts executing.
1204 1233 */
1205 1234 availm = freemem - lotsfree;
1206 1235 if (preread == 0 && npages > availm &&
1207 1236 deficit < lotsfree) {
1208 1237 deficit += MIN((pgcnt_t)(npages - availm),
1209 1238 lotsfree - deficit);
1210 1239 }
1211 1240
1212 1241 if (preread) {
1213 1242 TRACE_2(TR_FAC_PROC, TR_EXECMAP_PREREAD,
1214 1243 "execmap preread:freemem %d size %lu",
1215 1244 freemem, len);
1216 1245 (void) as_fault(p->p_as->a_hat, p->p_as,
1217 1246 (caddr_t)addr, len, F_INVAL, S_READ);
1218 1247 }
1219 1248 } else {
1220 1249 if (valid_usr_range(addr, len, prot, p->p_as,
1221 1250 p->p_as->a_userlimit) != RANGE_OKAY) {
1222 1251 error = ENOMEM;
1223 1252 goto bad;
1224 1253 }
1225 1254
1226 1255 if (error = as_map(p->p_as, addr, len,
1227 1256 segvn_create, zfod_argsp))
1228 1257 goto bad;
1229 1258 /*
1230 1259 * Read in the segment in one big chunk.
1231 1260 */
1232 1261 if (error = vn_rdwr(UIO_READ, vp, (caddr_t)oldaddr,
1233 1262 oldlen, (offset_t)oldoffset, UIO_USERSPACE, 0,
1234 1263 (rlim64_t)0, CRED(), (ssize_t *)0))
1235 1264 goto bad;
1236 1265 /*
1237 1266 * Now set protections.
1238 1267 */
1239 1268 if (prot != PROT_ZFOD) {
1240 1269 (void) as_setprot(p->p_as, (caddr_t)addr,
1241 1270 len, prot);
1242 1271 }
1243 1272 }
1244 1273 }
1245 1274
1246 1275 if (zfodlen) {
1247 1276 struct as *as = curproc->p_as;
1248 1277 struct seg *seg;
1249 1278 uint_t zprot = 0;
1250 1279
1251 1280 end = (size_t)addr + len;
1252 1281 zfodbase = (caddr_t)roundup(end, PAGESIZE);
1253 1282 zfoddiff = (uintptr_t)zfodbase - end;
1254 1283 if (zfoddiff) {
1255 1284 /*
1256 1285 * Before we go to zero the remaining space on the last
1257 1286 * page, make sure we have write permission.
1258 1287 *
1259 1288 * Normal illumos binaries don't even hit the case
1260 1289 * where we have to change permission on the last page
1261 1290 * since their protection is typically either
1262 1291 * PROT_USER | PROT_WRITE | PROT_READ
1263 1292 * or
1264 1293 * PROT_ZFOD (same as PROT_ALL).
1265 1294 *
1266 1295 * We need to be careful how we zero-fill the last page
1267 1296 * if the segment protection does not include
1268 1297 * PROT_WRITE. Using as_setprot() can cause the VM
1269 1298 * segment code to call segvn_vpage(), which must
1270 1299 * allocate a page struct for each page in the segment.
1271 1300 * If we have a very large segment, this may fail, so
1272 1301 * we have to check for that, even though we ignore
1273 1302 * other return values from as_setprot.
1274 1303 */
1275 1304
1276 1305 AS_LOCK_ENTER(as, RW_READER);
1277 1306 seg = as_segat(curproc->p_as, (caddr_t)end);
1278 1307 if (seg != NULL)
1279 1308 SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1,
1280 1309 &zprot);
1281 1310 AS_LOCK_EXIT(as);
1282 1311
1283 1312 if (seg != NULL && (zprot & PROT_WRITE) == 0) {
1284 1313 if (as_setprot(as, (caddr_t)end, zfoddiff - 1,
1285 1314 zprot | PROT_WRITE) == ENOMEM) {
1286 1315 error = ENOMEM;
1287 1316 goto bad;
1288 1317 }
1289 1318 }
1290 1319
1291 1320 if (on_fault(&ljb)) {
1292 1321 no_fault();
1293 1322 if (seg != NULL && (zprot & PROT_WRITE) == 0)
1294 1323 (void) as_setprot(as, (caddr_t)end,
1295 1324 zfoddiff - 1, zprot);
1296 1325 error = EFAULT;
1297 1326 goto bad;
1298 1327 }
1299 1328 uzero((void *)end, zfoddiff);
1300 1329 no_fault();
1301 1330 if (seg != NULL && (zprot & PROT_WRITE) == 0)
1302 1331 (void) as_setprot(as, (caddr_t)end,
1303 1332 zfoddiff - 1, zprot);
1304 1333 }
1305 1334 if (zfodlen > zfoddiff) {
1306 1335 struct segvn_crargs crargs =
1307 1336 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
1308 1337
1309 1338 zfodlen -= zfoddiff;
1310 1339 if (valid_usr_range(zfodbase, zfodlen, prot, p->p_as,
1311 1340 p->p_as->a_userlimit) != RANGE_OKAY) {
1312 1341 error = ENOMEM;
1313 1342 goto bad;
1314 1343 }
1315 1344 if (szc > 0) {
1316 1345 /*
1317 1346 * ASSERT alignment because the mapelfexec()
1318 1347 * caller for the szc > 0 case extended zfod
1319 1348 * so it's end is pgsz aligned.
1320 1349 */
1321 1350 size_t pgsz = page_get_pagesize(szc);
1322 1351 ASSERT(IS_P2ALIGNED(zfodbase + zfodlen, pgsz));
1323 1352
1324 1353 if (IS_P2ALIGNED(zfodbase, pgsz)) {
1325 1354 crargs.szc = szc;
1326 1355 } else {
1327 1356 crargs.szc = AS_MAP_HEAP;
1328 1357 }
1329 1358 } else {
1330 1359 crargs.szc = AS_MAP_NO_LPOOB;
1331 1360 }
1332 1361 if (error = as_map(p->p_as, (caddr_t)zfodbase,
1333 1362 zfodlen, segvn_create, &crargs))
1334 1363 goto bad;
1335 1364 if (prot != PROT_ZFOD) {
1336 1365 (void) as_setprot(p->p_as, (caddr_t)zfodbase,
1337 1366 zfodlen, prot);
1338 1367 }
1339 1368 }
1340 1369 }
1341 1370 return (0);
1342 1371 bad:
1343 1372 return (error);
1344 1373 }
1345 1374
1346 1375 void
1347 1376 setexecenv(struct execenv *ep)
1348 1377 {
1349 1378 proc_t *p = ttoproc(curthread);
1350 1379 klwp_t *lwp = ttolwp(curthread);
1351 1380 struct vnode *vp;
1352 1381
1353 1382 p->p_bssbase = ep->ex_bssbase;
1354 1383 p->p_brkbase = ep->ex_brkbase;
1355 1384 p->p_brksize = ep->ex_brksize;
1356 1385 if (p->p_exec)
1357 1386 VN_RELE(p->p_exec); /* out with the old */
1358 1387 vp = p->p_exec = ep->ex_vp;
1359 1388 if (vp != NULL)
1360 1389 VN_HOLD(vp); /* in with the new */
1361 1390
1362 1391 lwp->lwp_sigaltstack.ss_sp = 0;
1363 1392 lwp->lwp_sigaltstack.ss_size = 0;
1364 1393 lwp->lwp_sigaltstack.ss_flags = SS_DISABLE;
1365 1394 }
1366 1395
1367 1396 int
1368 1397 execopen(struct vnode **vpp, int *fdp)
1369 1398 {
1370 1399 struct vnode *vp = *vpp;
1371 1400 file_t *fp;
1372 1401 int error = 0;
1373 1402 int filemode = FREAD;
1374 1403
1375 1404 VN_HOLD(vp); /* open reference */
1376 1405 if (error = falloc(NULL, filemode, &fp, fdp)) {
1377 1406 VN_RELE(vp);
1378 1407 *fdp = -1; /* just in case falloc changed value */
1379 1408 return (error);
1380 1409 }
1381 1410 if (error = VOP_OPEN(&vp, filemode, CRED(), NULL)) {
1382 1411 VN_RELE(vp);
1383 1412 setf(*fdp, NULL);
1384 1413 unfalloc(fp);
1385 1414 *fdp = -1;
1386 1415 return (error);
1387 1416 }
1388 1417 *vpp = vp; /* vnode should not have changed */
1389 1418 fp->f_vnode = vp;
1390 1419 mutex_exit(&fp->f_tlock);
1391 1420 setf(*fdp, fp);
1392 1421 return (0);
1393 1422 }
1394 1423
1395 1424 int
1396 1425 execclose(int fd)
1397 1426 {
1398 1427 return (closeandsetf(fd, NULL));
1399 1428 }
1400 1429
1401 1430
1402 1431 /*
1403 1432 * noexec stub function.
1404 1433 */
1405 1434 /*ARGSUSED*/
1406 1435 int
1407 1436 noexec(
1408 1437 struct vnode *vp,
1409 1438 struct execa *uap,
1410 1439 struct uarg *args,
1411 1440 struct intpdata *idatap,
1412 1441 int level,
1413 1442 long *execsz,
1414 1443 int setid,
1415 1444 caddr_t exec_file,
1416 1445 struct cred *cred)
1417 1446 {
1418 1447 cmn_err(CE_WARN, "missing exec capability for %s", uap->fname);
1419 1448 return (ENOEXEC);
1420 1449 }
1421 1450
1422 1451 /*
1423 1452 * Support routines for building a user stack.
1424 1453 *
1425 1454 * execve(path, argv, envp) must construct a new stack with the specified
1426 1455 * arguments and environment variables (see exec_args() for a description
1427 1456 * of the user stack layout). To do this, we copy the arguments and
1428 1457 * environment variables from the old user address space into the kernel,
1429 1458 * free the old as, create the new as, and copy our buffered information
1430 1459 * to the new stack. Our kernel buffer has the following structure:
1431 1460 *
1432 1461 * +-----------------------+ <--- stk_base + stk_size
1433 1462 * | string offsets |
1434 1463 * +-----------------------+ <--- stk_offp
1435 1464 * | |
1436 1465 * | STK_AVAIL() space |
1437 1466 * | |
1438 1467 * +-----------------------+ <--- stk_strp
1439 1468 * | strings |
1440 1469 * +-----------------------+ <--- stk_base
1441 1470 *
1442 1471 * When we add a string, we store the string's contents (including the null
1443 1472 * terminator) at stk_strp, and we store the offset of the string relative to
1444 1473 * stk_base at --stk_offp. At strings are added, stk_strp increases and
1445 1474 * stk_offp decreases. The amount of space remaining, STK_AVAIL(), is just
1446 1475 * the difference between these pointers. If we run out of space, we return
1447 1476 * an error and exec_args() starts all over again with a buffer twice as large.
1448 1477 * When we're all done, the kernel buffer looks like this:
1449 1478 *
1450 1479 * +-----------------------+ <--- stk_base + stk_size
1451 1480 * | argv[0] offset |
1452 1481 * +-----------------------+
1453 1482 * | ... |
1454 1483 * +-----------------------+
1455 1484 * | argv[argc-1] offset |
1456 1485 * +-----------------------+
1457 1486 * | envp[0] offset |
1458 1487 * +-----------------------+
1459 1488 * | ... |
1460 1489 * +-----------------------+
1461 1490 * | envp[envc-1] offset |
1462 1491 * +-----------------------+
1463 1492 * | AT_SUN_PLATFORM offset|
1464 1493 * +-----------------------+
1465 1494 * | AT_SUN_EXECNAME offset|
1466 1495 * +-----------------------+ <--- stk_offp
1467 1496 * | |
1468 1497 * | STK_AVAIL() space |
1469 1498 * | |
1470 1499 * +-----------------------+ <--- stk_strp
1471 1500 * | AT_SUN_EXECNAME offset|
1472 1501 * +-----------------------+
1473 1502 * | AT_SUN_PLATFORM offset|
1474 1503 * +-----------------------+
1475 1504 * | envp[envc-1] string |
1476 1505 * +-----------------------+
1477 1506 * | ... |
1478 1507 * +-----------------------+
1479 1508 * | envp[0] string |
1480 1509 * +-----------------------+
1481 1510 * | argv[argc-1] string |
1482 1511 * +-----------------------+
1483 1512 * | ... |
1484 1513 * +-----------------------+
1485 1514 * | argv[0] string |
1486 1515 * +-----------------------+ <--- stk_base
1487 1516 */
1488 1517
1489 1518 #define STK_AVAIL(args) ((char *)(args)->stk_offp - (args)->stk_strp)
1490 1519
1491 1520 /*
1492 1521 * Add a string to the stack.
1493 1522 */
1494 1523 static int
1495 1524 stk_add(uarg_t *args, const char *sp, enum uio_seg segflg)
1496 1525 {
1497 1526 int error;
1498 1527 size_t len;
1499 1528
1500 1529 if (STK_AVAIL(args) < sizeof (int))
1501 1530 return (E2BIG);
1502 1531 *--args->stk_offp = args->stk_strp - args->stk_base;
1503 1532
1504 1533 if (segflg == UIO_USERSPACE) {
1505 1534 error = copyinstr(sp, args->stk_strp, STK_AVAIL(args), &len);
1506 1535 if (error != 0)
1507 1536 return (error);
1508 1537 } else {
1509 1538 len = strlen(sp) + 1;
1510 1539 if (len > STK_AVAIL(args))
1511 1540 return (E2BIG);
1512 1541 bcopy(sp, args->stk_strp, len);
1513 1542 }
1514 1543
1515 1544 args->stk_strp += len;
1516 1545
1517 1546 return (0);
1518 1547 }
1519 1548
1520 1549 static int
1521 1550 stk_getptr(uarg_t *args, char *src, char **dst)
1522 1551 {
1523 1552 int error;
1524 1553
1525 1554 if (args->from_model == DATAMODEL_NATIVE) {
1526 1555 ulong_t ptr;
1527 1556 error = fulword(src, &ptr);
1528 1557 *dst = (caddr_t)ptr;
1529 1558 } else {
1530 1559 uint32_t ptr;
1531 1560 error = fuword32(src, &ptr);
1532 1561 *dst = (caddr_t)(uintptr_t)ptr;
1533 1562 }
1534 1563 return (error);
1535 1564 }
1536 1565
1537 1566 static int
1538 1567 stk_putptr(uarg_t *args, char *addr, char *value)
1539 1568 {
1540 1569 if (args->to_model == DATAMODEL_NATIVE)
1541 1570 return (sulword(addr, (ulong_t)value));
1542 1571 else
1543 1572 return (suword32(addr, (uint32_t)(uintptr_t)value));
1544 1573 }
1545 1574
1546 1575 static int
1547 1576 stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
1548 1577 {
1549 1578 char *sp;
1550 1579 int argc, error;
1551 1580 int argv_empty = 0;
1552 1581 size_t ptrsize = args->from_ptrsize;
1553 1582 size_t size, pad;
1554 1583 char *argv = (char *)uap->argp;
1555 1584 char *envp = (char *)uap->envp;
1556 1585
1557 1586 /*
1558 1587 * Copy interpreter's name and argument to argv[0] and argv[1].
1559 1588 * In the rare case that we have nested interpreters then those names
1560 1589 * and arguments are also copied to the subsequent slots in argv.
1561 1590 */
1562 1591 if (intp != NULL && intp->intp_name[0] != NULL) {
1563 1592 int i;
1564 1593
1565 1594 for (i = 0; i < INTP_MAXDEPTH; i++) {
1566 1595 if (intp->intp_name[i] == NULL)
1567 1596 break;
1568 1597 error = stk_add(args, intp->intp_name[i], UIO_SYSSPACE);
1569 1598 if (error != 0)
1570 1599 return (error);
1571 1600 if (intp->intp_arg[i] != NULL) {
1572 1601 error = stk_add(args, intp->intp_arg[i],
1573 1602 UIO_SYSSPACE);
1574 1603 if (error != 0)
1575 1604 return (error);
1576 1605 }
1577 1606 }
1578 1607
1579 1608 if (args->fname != NULL)
1580 1609 error = stk_add(args, args->fname, UIO_SYSSPACE);
1581 1610 else
1582 1611 error = stk_add(args, uap->fname, UIO_USERSPACE);
1583 1612 if (error)
1584 1613 return (error);
1585 1614
1586 1615 /*
1587 1616 * Check for an empty argv[].
1588 1617 */
1589 1618 if (stk_getptr(args, argv, &sp))
1590 1619 return (EFAULT);
1591 1620 if (sp == NULL)
1592 1621 argv_empty = 1;
1593 1622
1594 1623 argv += ptrsize; /* ignore original argv[0] */
1595 1624 }
1596 1625
1597 1626 if (argv_empty == 0) {
1598 1627 /*
1599 1628 * Add argv[] strings to the stack.
1600 1629 */
1601 1630 for (;;) {
1602 1631 if (stk_getptr(args, argv, &sp))
1603 1632 return (EFAULT);
1604 1633 if (sp == NULL)
1605 1634 break;
1606 1635 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0)
1607 1636 return (error);
1608 1637 argv += ptrsize;
1609 1638 }
1610 1639 }
1611 1640 argc = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
1612 1641 args->arglen = args->stk_strp - args->stk_base;
1613 1642
1614 1643 /*
1615 1644 * Add environ[] strings to the stack.
1616 1645 */
1617 1646 if (envp != NULL) {
1618 1647 for (;;) {
1619 1648 char *tmp = args->stk_strp;
1620 1649 if (stk_getptr(args, envp, &sp))
1621 1650 return (EFAULT);
1622 1651 if (sp == NULL)
1623 1652 break;
1624 1653 if ((error = stk_add(args, sp, UIO_USERSPACE)) != 0)
1625 1654 return (error);
1626 1655 if (args->scrubenv && strncmp(tmp, "LD_", 3) == 0) {
1627 1656 /* Undo the copied string */
1628 1657 args->stk_strp = tmp;
1629 1658 *(args->stk_offp++) = NULL;
1630 1659 }
1631 1660 envp += ptrsize;
1632 1661 }
1633 1662 }
1634 1663 args->na = (int *)(args->stk_base + args->stk_size) - args->stk_offp;
1635 1664 args->ne = args->na - argc;
1636 1665
1637 1666 /*
1638 1667 * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
1639 1668 * AT_SUN_EMULATOR strings to the stack.
1640 1669 */
1641 1670 if (auxvpp != NULL && *auxvpp != NULL) {
1642 1671 if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
1643 1672 return (error);
1644 1673 if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
1645 1674 return (error);
1646 1675 if (args->brandname != NULL &&
1647 1676 (error = stk_add(args, args->brandname, UIO_SYSSPACE)) != 0)
1648 1677 return (error);
1649 1678 if (args->emulator != NULL &&
1650 1679 (error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
1651 1680 return (error);
1652 1681 }
1653 1682
1654 1683 /*
1655 1684 * Compute the size of the stack. This includes all the pointers,
1656 1685 * the space reserved for the aux vector, and all the strings.
1657 1686 * The total number of pointers is args->na (which is argc + envc)
1658 1687 * plus 4 more: (1) a pointer's worth of space for argc; (2) the NULL
1659 1688 * after the last argument (i.e. argv[argc]); (3) the NULL after the
1660 1689 * last environment variable (i.e. envp[envc]); and (4) the NULL after
1661 1690 * all the strings, at the very top of the stack.
1662 1691 */
1663 1692 size = (args->na + 4) * args->to_ptrsize + args->auxsize +
1664 1693 (args->stk_strp - args->stk_base);
1665 1694
1666 1695 /*
1667 1696 * Pad the string section with zeroes to align the stack size.
1668 1697 */
1669 1698 pad = P2NPHASE(size, args->stk_align);
1670 1699
1671 1700 if (STK_AVAIL(args) < pad)
1672 1701 return (E2BIG);
1673 1702
1674 1703 args->usrstack_size = size + pad;
1675 1704
1676 1705 while (pad-- != 0)
1677 1706 *args->stk_strp++ = 0;
1678 1707
1679 1708 args->nc = args->stk_strp - args->stk_base;
1680 1709
1681 1710 return (0);
1682 1711 }
1683 1712
1684 1713 static int
1685 1714 stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
1686 1715 {
1687 1716 size_t ptrsize = args->to_ptrsize;
1688 1717 ssize_t pslen;
1689 1718 char *kstrp = args->stk_base;
1690 1719 char *ustrp = usrstack - args->nc - ptrsize;
1691 1720 char *usp = usrstack - args->usrstack_size;
1692 1721 int *offp = (int *)(args->stk_base + args->stk_size);
1693 1722 int envc = args->ne;
1694 1723 int argc = args->na - envc;
1695 1724 int i;
1696 1725
1697 1726 /*
1698 1727 * Record argc for /proc.
1699 1728 */
1700 1729 up->u_argc = argc;
1701 1730
1702 1731 /*
1703 1732 * Put argc on the stack. Note that even though it's an int,
1704 1733 * it always consumes ptrsize bytes (for alignment).
1705 1734 */
1706 1735 if (stk_putptr(args, usp, (char *)(uintptr_t)argc))
1707 1736 return (-1);
1708 1737
1709 1738 /*
1710 1739 * Add argc space (ptrsize) to usp and record argv for /proc.
1711 1740 */
1712 1741 up->u_argv = (uintptr_t)(usp += ptrsize);
1713 1742
1714 1743 /*
1715 1744 * Put the argv[] pointers on the stack.
1716 1745 */
1717 1746 for (i = 0; i < argc; i++, usp += ptrsize)
1718 1747 if (stk_putptr(args, usp, &ustrp[*--offp]))
1719 1748 return (-1);
1720 1749
1721 1750 /*
1722 1751 * Copy arguments to u_psargs.
1723 1752 */
1724 1753 pslen = MIN(args->arglen, PSARGSZ) - 1;
1725 1754 for (i = 0; i < pslen; i++)
1726 1755 up->u_psargs[i] = (kstrp[i] == '\0' ? ' ' : kstrp[i]);
1727 1756 while (i < PSARGSZ)
1728 1757 up->u_psargs[i++] = '\0';
1729 1758
1730 1759 /*
1731 1760 * Add space for argv[]'s NULL terminator (ptrsize) to usp and
1732 1761 * record envp for /proc.
1733 1762 */
1734 1763 up->u_envp = (uintptr_t)(usp += ptrsize);
1735 1764
1736 1765 /*
1737 1766 * Put the envp[] pointers on the stack.
1738 1767 */
1739 1768 for (i = 0; i < envc; i++, usp += ptrsize)
1740 1769 if (stk_putptr(args, usp, &ustrp[*--offp]))
1741 1770 return (-1);
1742 1771
1743 1772 /*
1744 1773 * Add space for envp[]'s NULL terminator (ptrsize) to usp and
1745 1774 * remember where the stack ends, which is also where auxv begins.
1746 1775 */
1747 1776 args->stackend = usp += ptrsize;
1748 1777
1749 1778 /*
1750 1779 * Put all the argv[], envp[], and auxv strings on the stack.
1751 1780 */
1752 1781 if (copyout(args->stk_base, ustrp, args->nc))
1753 1782 return (-1);
1754 1783
1755 1784 /*
1756 1785 * Fill in the aux vector now that we know the user stack addresses
1757 1786 * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
1758 1787 * AT_SUN_EMULATOR strings.
1759 1788 */
1760 1789 if (auxvpp != NULL && *auxvpp != NULL) {
1761 1790 if (args->to_model == DATAMODEL_NATIVE) {
1762 1791 auxv_t **a = (auxv_t **)auxvpp;
1763 1792 ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
1764 1793 ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp])
1765 1794 if (args->brandname != NULL)
1766 1795 ADDAUX(*a,
1767 1796 AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
1768 1797 if (args->emulator != NULL)
1769 1798 ADDAUX(*a,
1770 1799 AT_SUN_EMULATOR, (long)&ustrp[*--offp])
1771 1800 } else {
1772 1801 auxv32_t **a = (auxv32_t **)auxvpp;
1773 1802 ADDAUX(*a,
1774 1803 AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
1775 1804 ADDAUX(*a,
1776 1805 AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
1777 1806 if (args->brandname != NULL)
1778 1807 ADDAUX(*a, AT_SUN_BRANDNAME,
1779 1808 (int)(uintptr_t)&ustrp[*--offp])
↓ open down ↓ |
886 lines elided |
↑ open up ↑ |
1780 1809 if (args->emulator != NULL)
1781 1810 ADDAUX(*a, AT_SUN_EMULATOR,
1782 1811 (int)(uintptr_t)&ustrp[*--offp])
1783 1812 }
1784 1813 }
1785 1814
1786 1815 return (0);
1787 1816 }
1788 1817
1789 1818 /*
1819 + * Though the actual stack base is constant, slew the %sp by a random aligned
1820 + * amount in [0,aslr_max_stack_skew). Mostly, this makes life slightly more
1821 + * complicated for buffer overflows hoping to overwrite the return address.
1822 + *
1823 + * On some platforms this helps avoid cache thrashing when identical processes
1824 + * simultaneously share caches that don't provide enough associativity
1825 + * (e.g. sun4v systems). In this case stack slewing makes the same hot stack
1826 + * variables in different processes live in different cache sets increasing
1827 + * effective associativity.
1828 + */
1829 +size_t
1830 +exec_get_spslew(void)
1831 +{
1832 +#ifdef sun4v
1833 + static uint_t sp_color_stride = 16;
1834 + static uint_t sp_color_mask = 0x1f;
1835 + static uint_t sp_current_color = (uint_t)-1;
1836 +#endif
1837 + size_t off;
1838 +
1839 + ASSERT(ISP2(aslr_max_stack_skew));
1840 +
1841 + if ((aslr_max_stack_skew == 0) ||
1842 + !secflag_enabled(curproc, PROC_SEC_ASLR)) {
1843 +#ifdef sun4v
1844 + uint_t spcolor = atomic_inc_32_nv(&sp_current_color);
1845 + return ((size_t)((spcolor & sp_color_mask) *
1846 + SA(sp_color_stride)));
1847 +#else
1848 + return (0);
1849 +#endif
1850 + }
1851 +
1852 + (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1853 + return (SA(P2PHASE(off, aslr_max_stack_skew)));
1854 +}
1855 +
1856 +/*
1790 1857 * Initialize a new user stack with the specified arguments and environment.
1791 1858 * The initial user stack layout is as follows:
1792 1859 *
1793 1860 * User Stack
1794 1861 * +---------------+ <--- curproc->p_usrstack
1795 1862 * | |
1796 1863 * | slew |
1797 1864 * | |
1798 1865 * +---------------+
1799 1866 * | NULL |
1800 1867 * +---------------+
1801 1868 * | |
1802 1869 * | auxv strings |
1803 1870 * | |
1804 1871 * +---------------+
1805 1872 * | |
1806 1873 * | envp strings |
1807 1874 * | |
1808 1875 * +---------------+
1809 1876 * | |
1810 1877 * | argv strings |
1811 1878 * | |
1812 1879 * +---------------+ <--- ustrp
1813 1880 * | |
1814 1881 * | aux vector |
1815 1882 * | |
1816 1883 * +---------------+ <--- auxv
1817 1884 * | NULL |
1818 1885 * +---------------+
1819 1886 * | envp[envc-1] |
1820 1887 * +---------------+
1821 1888 * | ... |
1822 1889 * +---------------+
1823 1890 * | envp[0] |
1824 1891 * +---------------+ <--- envp[]
1825 1892 * | NULL |
1826 1893 * +---------------+
1827 1894 * | argv[argc-1] |
1828 1895 * +---------------+
1829 1896 * | ... |
1830 1897 * +---------------+
1831 1898 * | argv[0] |
1832 1899 * +---------------+ <--- argv[]
1833 1900 * | argc |
1834 1901 * +---------------+ <--- stack base
1835 1902 */
1836 1903 int
1837 1904 exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
1838 1905 {
1839 1906 size_t size;
1840 1907 int error;
1841 1908 proc_t *p = ttoproc(curthread);
1842 1909 user_t *up = PTOU(p);
1843 1910 char *usrstack;
1844 1911 rctl_entity_p_t e;
1845 1912 struct as *as;
1846 1913 extern int use_stk_lpg;
1847 1914 size_t sp_slew;
1848 1915
1849 1916 args->from_model = p->p_model;
1850 1917 if (p->p_model == DATAMODEL_NATIVE) {
1851 1918 args->from_ptrsize = sizeof (long);
1852 1919 } else {
1853 1920 args->from_ptrsize = sizeof (int32_t);
1854 1921 }
1855 1922
1856 1923 if (args->to_model == DATAMODEL_NATIVE) {
1857 1924 args->to_ptrsize = sizeof (long);
1858 1925 args->ncargs = NCARGS;
1859 1926 args->stk_align = STACK_ALIGN;
1860 1927 if (args->addr32)
1861 1928 usrstack = (char *)USRSTACK64_32;
1862 1929 else
1863 1930 usrstack = (char *)USRSTACK;
1864 1931 } else {
1865 1932 args->to_ptrsize = sizeof (int32_t);
1866 1933 args->ncargs = NCARGS32;
1867 1934 args->stk_align = STACK_ALIGN32;
1868 1935 usrstack = (char *)USRSTACK32;
1869 1936 }
1870 1937
1871 1938 ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
1872 1939
1873 1940 #if defined(__sparc)
1874 1941 /*
1875 1942 * Make sure user register windows are empty before
1876 1943 * attempting to make a new stack.
1877 1944 */
1878 1945 (void) flush_user_windows_to_stack(NULL);
1879 1946 #endif
1880 1947
1881 1948 for (size = PAGESIZE; ; size *= 2) {
1882 1949 args->stk_size = size;
1883 1950 args->stk_base = kmem_alloc(size, KM_SLEEP);
1884 1951 args->stk_strp = args->stk_base;
1885 1952 args->stk_offp = (int *)(args->stk_base + size);
1886 1953 error = stk_copyin(uap, args, intp, auxvpp);
1887 1954 if (error == 0)
1888 1955 break;
1889 1956 kmem_free(args->stk_base, size);
1890 1957 if (error != E2BIG && error != ENAMETOOLONG)
1891 1958 return (error);
1892 1959 if (size >= args->ncargs)
1893 1960 return (E2BIG);
1894 1961 }
1895 1962
1896 1963 size = args->usrstack_size;
1897 1964
1898 1965 ASSERT(error == 0);
1899 1966 ASSERT(P2PHASE(size, args->stk_align) == 0);
1900 1967 ASSERT((ssize_t)STK_AVAIL(args) >= 0);
1901 1968
1902 1969 if (size > args->ncargs) {
1903 1970 kmem_free(args->stk_base, args->stk_size);
1904 1971 return (E2BIG);
1905 1972 }
1906 1973
1907 1974 /*
1908 1975 * Leave only the current lwp and force the other lwps to exit.
1909 1976 * If another lwp beat us to the punch by calling exit(), bail out.
1910 1977 */
1911 1978 if ((error = exitlwps(0)) != 0) {
1912 1979 kmem_free(args->stk_base, args->stk_size);
1913 1980 return (error);
1914 1981 }
1915 1982
1916 1983 /*
1917 1984 * Revoke any doors created by the process.
1918 1985 */
1919 1986 if (p->p_door_list)
1920 1987 door_exit();
1921 1988
1922 1989 /*
1923 1990 * Release schedctl data structures.
1924 1991 */
1925 1992 if (p->p_pagep)
1926 1993 schedctl_proc_cleanup();
1927 1994
1928 1995 /*
1929 1996 * Clean up any DTrace helpers for the process.
1930 1997 */
1931 1998 if (p->p_dtrace_helpers != NULL) {
1932 1999 ASSERT(dtrace_helpers_cleanup != NULL);
1933 2000 (*dtrace_helpers_cleanup)();
1934 2001 }
1935 2002
1936 2003 mutex_enter(&p->p_lock);
1937 2004 /*
1938 2005 * Cleanup the DTrace provider associated with this process.
1939 2006 */
1940 2007 if (p->p_dtrace_probes) {
1941 2008 ASSERT(dtrace_fasttrap_exec_ptr != NULL);
1942 2009 dtrace_fasttrap_exec_ptr(p);
1943 2010 }
1944 2011 mutex_exit(&p->p_lock);
1945 2012
1946 2013 /*
1947 2014 * discard the lwpchan cache.
1948 2015 */
1949 2016 if (p->p_lcp != NULL)
1950 2017 lwpchan_destroy_cache(1);
1951 2018
1952 2019 /*
1953 2020 * Delete the POSIX timers.
1954 2021 */
1955 2022 if (p->p_itimer != NULL)
1956 2023 timer_exit();
1957 2024
1958 2025 /*
1959 2026 * Delete the ITIMER_REALPROF interval timer.
1960 2027 * The other ITIMER_* interval timers are specified
1961 2028 * to be inherited across exec().
1962 2029 */
1963 2030 delete_itimer_realprof();
1964 2031
1965 2032 if (AU_AUDITING())
1966 2033 audit_exec(args->stk_base, args->stk_base + args->arglen,
1967 2034 args->na - args->ne, args->ne, args->pfcred);
1968 2035
1969 2036 /*
1970 2037 * Ensure that we don't change resource associations while we
1971 2038 * change address spaces.
1972 2039 */
1973 2040 mutex_enter(&p->p_lock);
1974 2041 pool_barrier_enter();
1975 2042 mutex_exit(&p->p_lock);
1976 2043
1977 2044 /*
1978 2045 * Destroy the old address space and create a new one.
1979 2046 * From here on, any errors are fatal to the exec()ing process.
1980 2047 * On error we return -1, which means the caller must SIGKILL
1981 2048 * the process.
1982 2049 */
1983 2050 relvm();
1984 2051
1985 2052 mutex_enter(&p->p_lock);
1986 2053 pool_barrier_exit();
1987 2054 mutex_exit(&p->p_lock);
1988 2055
1989 2056 up->u_execsw = args->execswp;
1990 2057
1991 2058 p->p_brkbase = NULL;
1992 2059 p->p_brksize = 0;
1993 2060 p->p_brkpageszc = 0;
1994 2061 p->p_stksize = 0;
1995 2062 p->p_stkpageszc = 0;
1996 2063 p->p_model = args->to_model;
1997 2064 p->p_usrstack = usrstack;
1998 2065 p->p_stkprot = args->stk_prot;
1999 2066 p->p_datprot = args->dat_prot;
2000 2067
2001 2068 /*
2002 2069 * Reset resource controls such that all controls are again active as
2003 2070 * well as appropriate to the potentially new address model for the
2004 2071 * process.
2005 2072 */
2006 2073 e.rcep_p.proc = p;
2007 2074 e.rcep_t = RCENTITY_PROCESS;
2008 2075 rctl_set_reset(p->p_rctls, p, &e);
↓ open down ↓ |
209 lines elided |
↑ open up ↑ |
2009 2076
2010 2077 /* Too early to call map_pgsz for the heap */
2011 2078 if (use_stk_lpg) {
2012 2079 p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
2013 2080 }
2014 2081
2015 2082 mutex_enter(&p->p_lock);
2016 2083 p->p_flag |= SAUTOLPG; /* kernel controls page sizes */
2017 2084 mutex_exit(&p->p_lock);
2018 2085
2019 - /*
2020 - * Some platforms may choose to randomize real stack start by adding a
2021 - * small slew (not more than a few hundred bytes) to the top of the
2022 - * stack. This helps avoid cache thrashing when identical processes
2023 - * simultaneously share caches that don't provide enough associativity
2024 - * (e.g. sun4v systems). In this case stack slewing makes the same hot
2025 - * stack variables in different processes to live in different cache
2026 - * sets increasing effective associativity.
2027 - */
2028 2086 sp_slew = exec_get_spslew();
2029 2087 ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);
2088 + /* Be certain we don't underflow */
2089 + VERIFY((curproc->p_usrstack - (size + sp_slew)) < curproc->p_usrstack);
2030 2090 exec_set_sp(size + sp_slew);
2031 2091
2032 2092 as = as_alloc();
2033 2093 p->p_as = as;
2034 2094 as->a_proc = p;
2035 2095 if (p->p_model == DATAMODEL_ILP32 || args->addr32)
2036 2096 as->a_userlimit = (caddr_t)USERLIMIT32;
2037 2097 (void) hat_setup(as->a_hat, HAT_ALLOC);
2038 2098 hat_join_srd(as->a_hat, args->ex_vp);
2039 2099
2040 2100 /*
2041 2101 * Finally, write out the contents of the new stack.
2042 2102 */
2043 2103 error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
2044 2104 kmem_free(args->stk_base, args->stk_size);
2045 2105 return (error);
2046 2106 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX