25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 */
31
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/pathname.h>
45 #include <sys/cmn_err.h>
46 #include <sys/systm.h>
47 #include <sys/elf.h>
48 #include <sys/vmsystm.h>
49 #include <sys/debug.h>
50 #include <sys/auxv.h>
51 #include <sys/exec.h>
52 #include <sys/prsystm.h>
53 #include <vm/as.h>
54 #include <vm/rm.h>
55 #include <vm/seg.h>
56 #include <vm/seg_vn.h>
57 #include <sys/modctl.h>
58 #include <sys/systeminfo.h>
59 #include <sys/vmparam.h>
60 #include <sys/machelf.h>
61 #include <sys/shm_impl.h>
62 #include <sys/archsystm.h>
63 #include <sys/fasttrap.h>
64 #include <sys/brand.h>
65 #include "elf_impl.h"
66 #include <sys/sdt.h>
67 #include <sys/siginfo.h>
68
69 extern int at_flags;
70
71 #define ORIGIN_STR "ORIGIN"
72 #define ORIGIN_STR_SIZE 6
73
74 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
75 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
76 ssize_t *);
77 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
78 ssize_t *, caddr_t *, ssize_t *);
79 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
80 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
81 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
82 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
83
84 typedef enum {
85 STR_CTF,
86 STR_SYMTAB,
87 STR_DYNSYM,
88 STR_STRTAB,
89 STR_DYNSTR,
145 }
146
147 static int
148 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
149 {
150 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
151
152 /*
153 * See the comment in fasttrap.h for information on how to safely
154 * update this program header.
155 */
156 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
157 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
158 return (-1);
159
160 args->thrptr = phdrp->p_vaddr + base;
161
162 return (0);
163 }
164
165 /*
166 * Map in the executable pointed to by vp. Returns 0 on success.
167 */
168 int
169 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
170 intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
171 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
172 {
173 size_t len;
174 struct vattr vat;
175 caddr_t phdrbase = NULL;
176 ssize_t phdrsize;
177 int nshdrs, shstrndx, nphdrs;
178 int error = 0;
179 Phdr *uphdr = NULL;
180 Phdr *junk = NULL;
181 Phdr *dynphdr = NULL;
182 Phdr *dtrphdr = NULL;
183 uintptr_t lddata;
184 long execsz;
238 kmem_free(phdrbase, phdrsize);
239 return (error);
240 }
241
242 /*ARGSUSED*/
243 int
244 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
245 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
246 int brand_action)
247 {
248 caddr_t phdrbase = NULL;
249 caddr_t bssbase = 0;
250 caddr_t brkbase = 0;
251 size_t brksize = 0;
252 ssize_t dlnsize;
253 aux_entry_t *aux;
254 int error;
255 ssize_t resid;
256 int fd = -1;
257 intptr_t voffset;
258 Phdr *dyphdr = NULL;
259 Phdr *stphdr = NULL;
260 Phdr *uphdr = NULL;
261 Phdr *junk = NULL;
262 size_t len;
263 ssize_t phdrsize;
264 int postfixsize = 0;
265 int i, hsize;
266 Phdr *phdrp;
267 Phdr *dataphdrp = NULL;
268 Phdr *dtrphdr;
269 Phdr *capphdr = NULL;
270 Cap *cap = NULL;
271 ssize_t capsize;
272 int hasu = 0;
273 int hasauxv = 0;
274 int hasdy = 0;
275 int branded = 0;
276
277 struct proc *p = ttoproc(curthread);
278 struct user *up = PTOU(p);
279 struct bigwad {
280 Ehdr ehdr;
281 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
282 char dl_name[MAXPATHLEN];
283 char pathbuf[MAXPATHLEN];
284 struct vattr vattr;
285 struct execenv exenv;
286 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
287 Ehdr *ehdrp;
288 int nshdrs, shstrndx, nphdrs;
289 char *dlnp;
290 char *pathbufp;
291 rlim64_t limit;
292 rlim64_t roundlimit;
293
294 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
353 */
354 if ((level <= INTP_MAXDEPTH) &&
355 (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
356 error = BROP(p)->b_elfexec(vp, uap, args,
357 idatap, level + 1, execsz, setid, exec_file, cred,
358 brand_action);
359 goto out;
360 }
361
362 /*
363 * Determine aux size now so that stack can be built
364 * in one shot (except actual copyout of aux image),
365 * determine any non-default stack protections,
366 * and still have this code be machine independent.
367 */
368 hsize = ehdrp->e_phentsize;
369 phdrp = (Phdr *)phdrbase;
370 for (i = nphdrs; i > 0; i--) {
371 switch (phdrp->p_type) {
372 case PT_INTERP:
373 hasauxv = hasdy = 1;
374 break;
375 case PT_PHDR:
376 hasu = 1;
377 break;
378 case PT_SUNWSTACK:
379 args->stk_prot = PROT_USER;
380 if (phdrp->p_flags & PF_R)
381 args->stk_prot |= PROT_READ;
382 if (phdrp->p_flags & PF_W)
383 args->stk_prot |= PROT_WRITE;
384 if (phdrp->p_flags & PF_X)
385 args->stk_prot |= PROT_EXEC;
386 break;
387 case PT_LOAD:
388 dataphdrp = phdrp;
389 break;
390 case PT_SUNWCAP:
391 capphdr = phdrp;
392 break;
393 }
394 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
395 }
396
397 if (ehdrp->e_type != ET_EXEC) {
398 dataphdrp = NULL;
399 hasauxv = 1;
400 }
401
402 /* Copy BSS permissions to args->dat_prot */
403 if (dataphdrp != NULL) {
404 args->dat_prot = PROT_USER;
405 if (dataphdrp->p_flags & PF_R)
406 args->dat_prot |= PROT_READ;
407 if (dataphdrp->p_flags & PF_W)
408 args->dat_prot |= PROT_WRITE;
409 if (dataphdrp->p_flags & PF_X)
410 args->dat_prot |= PROT_EXEC;
411 }
412
415 * it now. This may be increased by exec_args if there are
416 * ISA-specific types (included in __KERN_NAUXV_IMPL).
417 */
418 if (hasauxv) {
419 /*
420 * If a AUX vector is being built - the base AUX
421 * entries are:
422 *
423 * AT_BASE
424 * AT_FLAGS
425 * AT_PAGESZ
426 * AT_SUN_AUXFLAGS
427 * AT_SUN_HWCAP
428 * AT_SUN_HWCAP2
429 * AT_SUN_PLATFORM (added in stk_copyout)
430 * AT_SUN_EXECNAME (added in stk_copyout)
431 * AT_NULL
432 *
433 * total == 9
434 */
435 if (hasdy && hasu) {
436 /*
437 * Has PT_INTERP & PT_PHDR - the auxvectors that
438 * will be built are:
439 *
440 * AT_PHDR
441 * AT_PHENT
442 * AT_PHNUM
443 * AT_ENTRY
444 * AT_LDDATA
445 *
446 * total = 5
447 */
448 args->auxsize = (9 + 5) * sizeof (aux_entry_t);
449 } else if (hasdy) {
450 /*
451 * Has PT_INTERP but no PT_PHDR
452 *
453 * AT_EXECFD
454 * AT_LDDATA
455 *
456 * total = 2
457 */
458 args->auxsize = (9 + 2) * sizeof (aux_entry_t);
459 } else {
460 args->auxsize = 9 * sizeof (aux_entry_t);
461 }
462 } else {
463 args->auxsize = 0;
464 }
465
466 /*
467 * If this binary is using an emulator, we need to add an
468 * AT_SUN_EMULATOR aux entry.
469 */
470 if (args->emulator != NULL)
471 args->auxsize += sizeof (aux_entry_t);
472
473 if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
474 branded = 1;
475 /*
476 * We will be adding 4 entries to the aux vectors. One for
477 * the the brandname and 3 for the brand specific aux vectors.
478 */
479 args->auxsize += 4 * sizeof (aux_entry_t);
480 }
481
482 /* Hardware/Software capabilities */
483 if (capphdr != NULL &&
484 (capsize = capphdr->p_filesz) > 0 &&
485 capsize <= 16 * sizeof (*cap)) {
486 int ncaps = capsize / sizeof (*cap);
487 Cap *cp;
488
489 cap = kmem_alloc(capsize, KM_SLEEP);
490 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
491 capsize, (offset_t)capphdr->p_offset,
492 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
493 uprintf("%s: Cannot read capabilities section\n",
494 exec_file);
495 goto out;
496 }
497 for (cp = cap; cp < cap + ncaps; cp++) {
498 if (cp->c_tag == CA_SUNW_SF_1 &&
499 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
500 if (args->to_model == DATAMODEL_LP64)
501 args->addr32 = 1;
512 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
513 if (error == -1) {
514 error = ENOEXEC;
515 goto bad;
516 }
517 goto out;
518 }
519 /* we're single threaded after this point */
520
521 /*
522 * If this is an ET_DYN executable (shared object),
523 * determine its memory size so that mapelfexec() can load it.
524 */
525 if (ehdrp->e_type == ET_DYN)
526 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
527 else
528 len = 0;
529
530 dtrphdr = NULL;
531
532 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
533 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
534 len, execsz, &brksize)) != 0)
535 goto bad;
536
537 if (uphdr != NULL && dyphdr == NULL)
538 goto bad;
539
540 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
541 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
542 goto bad;
543 }
544
545 if (dyphdr != NULL) {
546 size_t len;
547 uintptr_t lddata;
548 char *p;
549 struct vnode *nvp;
550
551 dlnsize = dyphdr->p_filesz;
552
553 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
554 goto bad;
555
556 /*
557 * Read in "interpreter" pathname.
558 */
559 if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
560 (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
561 CRED(), &resid)) != 0) {
562 uprintf("%s: Cannot obtain interpreter pathname\n",
563 exec_file);
564 goto bad;
565 }
566
567 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
568 goto bad;
569
570 /*
571 * Search for '$ORIGIN' token in interpreter path.
572 * If found, expand it.
573 */
574 for (p = dlnp; p = strchr(p, '$'); ) {
575 uint_t len, curlen;
576 char *_ptr;
577
578 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
579 continue;
580
758 * libraries instead of using the brand libraries that are
759 * installed in the zone. We only do this for processes
760 * which we trust because we see they are already running
761 * under pfexec (where uid != euid). This prevents a
762 * malicious user within the zone from crafting a wrapper to
763 * run native suid commands with unsecure libraries interposed.
764 */
765 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
766 (setid &= ~EXECSETID_SETID) != 0))
767 auxf &= ~AF_SUN_SETUGID;
768
769 /*
770 * Record the user addr of the auxflags aux vector entry
771 * since brands may optionally want to manipulate this field.
772 */
773 args->auxp_auxflags =
774 (char *)((char *)args->stackend +
775 ((char *)&aux->a_type -
776 (char *)bigwad->elfargs));
777 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
778 /*
779 * Hardware capability flag word (performance hints)
780 * Used for choosing faster library routines.
781 * (Potentially different between 32-bit and 64-bit ABIs)
782 */
783 #if defined(_LP64)
784 if (args->to_model == DATAMODEL_NATIVE) {
785 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
786 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
787 } else {
788 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
789 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
790 }
791 #else
792 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
793 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
794 #endif
795 if (branded) {
796 /*
797 * Reserve space for the brand-private aux vectors,
1167 kmem_free(*shstrbasep, *shstrsizep);
1168 return (err);
1169 }
1170
1171 /*
1172 * Make sure the strtab is null-terminated to make sure we
1173 * don't run off the end of the table.
1174 */
1175 (*shstrbasep)[*shstrsizep - 1] = '\0';
1176
1177 return (0);
1178 }
1179
1180 static int
1181 mapelfexec(
1182 vnode_t *vp,
1183 Ehdr *ehdr,
1184 int nphdrs,
1185 caddr_t phdrbase,
1186 Phdr **uphdr,
1187 Phdr **dyphdr,
1188 Phdr **stphdr,
1189 Phdr **dtphdr,
1190 Phdr *dataphdrp,
1191 caddr_t *bssbase,
1192 caddr_t *brkbase,
1193 intptr_t *voffset,
1194 intptr_t *minaddr,
1195 size_t len,
1196 long *execsz,
1197 size_t *brksize)
1198 {
1199 Phdr *phdr;
1200 int i, prot, error;
1201 caddr_t addr = NULL;
1202 size_t zfodsz;
1203 int ptload = 0;
1204 int page;
1205 off_t offset;
1206 int hsize = ehdr->e_phentsize;
1207 caddr_t mintmp = (caddr_t)-1;
1208 extern int use_brk_lpg;
1209
1210 if (ehdr->e_type == ET_DYN) {
1211 /*
1212 * Obtain the virtual address of a hole in the
1213 * address space to map the "interpreter".
1214 */
1215 map_addr(&addr, len, (offset_t)0, 1, 0);
1216 if (addr == NULL)
1217 return (ENOMEM);
1218 *voffset = (intptr_t)addr;
1219
1220 /*
1221 * Calculate the minimum vaddr so it can be subtracted out.
1222 * According to the ELF specification, since PT_LOAD sections
1223 * must be sorted by increasing p_vaddr values, this is
1224 * guaranteed to be the first PT_LOAD section.
1225 */
1226 phdr = (Phdr *)phdrbase;
1227 for (i = nphdrs; i > 0; i--) {
1228 if (phdr->p_type == PT_LOAD) {
1229 *voffset -= (uintptr_t)phdr->p_vaddr;
1230 break;
1231 }
1232 phdr = (Phdr *)((caddr_t)phdr + hsize);
1233 }
1234
1235 } else {
1236 *voffset = 0;
1237 }
1238 phdr = (Phdr *)phdrbase;
1239 for (i = nphdrs; i > 0; i--) {
1240 switch (phdr->p_type) {
1241 case PT_LOAD:
1242 if ((*dyphdr != NULL) && (*uphdr == NULL))
1243 return (0);
1244
1245 ptload = 1;
1246 prot = PROT_USER;
1247 if (phdr->p_flags & PF_R)
1248 prot |= PROT_READ;
1249 if (phdr->p_flags & PF_W)
1250 prot |= PROT_WRITE;
1251 if (phdr->p_flags & PF_X)
1252 prot |= PROT_EXEC;
1253
1254 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1255
1256 /*
1257 * Keep track of the segment with the lowest starting
1258 * address.
1259 */
1260 if (addr < mintmp)
1261 mintmp = addr;
1262
1277 */
1278 if (brksize != NULL && use_brk_lpg &&
1279 zfodsz != 0 && phdr == dataphdrp &&
1280 (prot & PROT_WRITE)) {
1281 size_t tlen = P2NPHASE((uintptr_t)addr +
1282 phdr->p_filesz, PAGESIZE);
1283
1284 if (zfodsz > tlen) {
1285 curproc->p_brkpageszc =
1286 page_szc(map_pgsz(MAPPGSZ_HEAP,
1287 curproc, addr + phdr->p_filesz +
1288 tlen, zfodsz - tlen, 0));
1289 }
1290 }
1291
1292 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1293 (prot & PROT_WRITE)) {
1294 uint_t szc = curproc->p_brkpageszc;
1295 size_t pgsz = page_get_pagesize(szc);
1296 caddr_t ebss = addr + phdr->p_memsz;
1297 size_t extra_zfodsz;
1298
1299 ASSERT(pgsz > PAGESIZE);
1300
1301 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1302
1303 if (error = execmap(vp, addr, phdr->p_filesz,
1304 zfodsz + extra_zfodsz, phdr->p_offset,
1305 prot, page, szc))
1306 goto bad;
1307 if (brksize != NULL)
1308 *brksize = extra_zfodsz;
1309 } else {
1310 if (error = execmap(vp, addr, phdr->p_filesz,
1311 zfodsz, phdr->p_offset, prot, page, 0))
1312 goto bad;
1313 }
1314
1315 if (bssbase != NULL && addr >= *bssbase &&
1316 phdr == dataphdrp) {
1317 *bssbase = addr + phdr->p_filesz;
1318 }
1319 if (brkbase != NULL && addr >= *brkbase) {
1320 *brkbase = addr + phdr->p_memsz;
1321 }
1322
1323 *execsz += btopr(phdr->p_memsz);
1324 break;
1325
1326 case PT_INTERP:
1327 if (ptload)
1328 goto bad;
1329 *dyphdr = phdr;
1330 break;
1331
1332 case PT_SHLIB:
1333 *stphdr = phdr;
1334 break;
1335
1336 case PT_PHDR:
1337 if (ptload)
1338 goto bad;
1339 *uphdr = phdr;
1340 break;
1341
1342 case PT_NULL:
1343 case PT_DYNAMIC:
1344 case PT_NOTE:
1345 break;
1346
1347 case PT_SUNWDTRACE:
1348 if (dtphdr != NULL)
1349 *dtphdr = phdr;
1350 break;
1351
1352 default:
1353 break;
1354 }
1355 phdr = (Phdr *)((caddr_t)phdr + hsize);
1356 }
1357
1358 if (minaddr != NULL) {
1359 ASSERT(mintmp != (caddr_t)-1);
1360 *minaddr = (intptr_t)mintmp;
1361 }
1362
1363 return (0);
1364 bad:
1365 if (error == 0)
1366 error = EINVAL;
1367 return (error);
1368 }
1369
1370 int
1371 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1372 rlim64_t rlimit, cred_t *credp)
1373 {
1374 Note note;
1375 int error;
1376
1377 bzero(¬e, sizeof (note));
1378 bcopy("CORE", note.name, 4);
1379 note.nhdr.n_type = type;
1380 /*
1381 * The System V ABI states that n_namesz must be the length of the
1382 * string that follows the Nhdr structure including the terminating
|
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 */
31
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/pathname.h>
45 #include <sys/policy.h>
46 #include <sys/cmn_err.h>
47 #include <sys/systm.h>
48 #include <sys/elf.h>
49 #include <sys/vmsystm.h>
50 #include <sys/debug.h>
51 #include <sys/auxv.h>
52 #include <sys/exec.h>
53 #include <sys/prsystm.h>
54 #include <vm/as.h>
55 #include <vm/rm.h>
56 #include <vm/seg.h>
57 #include <vm/seg_vn.h>
58 #include <sys/modctl.h>
59 #include <sys/systeminfo.h>
60 #include <sys/vmparam.h>
61 #include <sys/machelf.h>
62 #include <sys/shm_impl.h>
63 #include <sys/archsystm.h>
64 #include <sys/fasttrap.h>
65 #include <sys/brand.h>
66 #include "elf_impl.h"
67 #include <sys/sdt.h>
68 #include <sys/siginfo.h>
69 #include <sys/random.h>
70
71 extern int at_flags;
72 extern volatile size_t aslr_max_brk_skew;
73
74 #define ORIGIN_STR "ORIGIN"
75 #define ORIGIN_STR_SIZE 6
76
77 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
78 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
79 ssize_t *);
80 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
81 ssize_t *, caddr_t *, ssize_t *);
82 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
83 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
84 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
85 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
86
87 typedef enum {
88 STR_CTF,
89 STR_SYMTAB,
90 STR_DYNSYM,
91 STR_STRTAB,
92 STR_DYNSTR,
148 }
149
150 static int
151 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
152 {
153 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
154
155 /*
156 * See the comment in fasttrap.h for information on how to safely
157 * update this program header.
158 */
159 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
160 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
161 return (-1);
162
163 args->thrptr = phdrp->p_vaddr + base;
164
165 return (0);
166 }
167
168 static int
169 handle_secflag_dt(proc_t *p, uint_t dt, uint_t val)
170 {
171 uint_t flag;
172
173 switch (dt) {
174 case DT_SUNW_ASLR:
175 flag = PROC_SEC_ASLR;
176 break;
177 default:
178 return (EINVAL);
179 }
180
181 if (val == 0) {
182 if (secflag_isset(p->p_secflags.psf_lower, flag))
183 return (EPERM);
184 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
185 secflag_isset(p->p_secflags.psf_inherit, flag))
186 return (EPERM);
187
188 secflag_clear(&p->p_secflags.psf_effective, flag);
189 } else {
190 if (!secflag_isset(p->p_secflags.psf_upper, flag))
191 return (EPERM);
192
193 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
194 !secflag_isset(p->p_secflags.psf_inherit, flag))
195 return (EPERM);
196
197 secflag_set(&p->p_secflags.psf_effective, flag);
198 }
199
200 return (0);
201 }
202
203 /*
204 * Map in the executable pointed to by vp. Returns 0 on success.
205 */
206 int
207 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
208 intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
209 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
210 {
211 size_t len;
212 struct vattr vat;
213 caddr_t phdrbase = NULL;
214 ssize_t phdrsize;
215 int nshdrs, shstrndx, nphdrs;
216 int error = 0;
217 Phdr *uphdr = NULL;
218 Phdr *junk = NULL;
219 Phdr *dynphdr = NULL;
220 Phdr *dtrphdr = NULL;
221 uintptr_t lddata;
222 long execsz;
276 kmem_free(phdrbase, phdrsize);
277 return (error);
278 }
279
280 /*ARGSUSED*/
281 int
282 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
283 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
284 int brand_action)
285 {
286 caddr_t phdrbase = NULL;
287 caddr_t bssbase = 0;
288 caddr_t brkbase = 0;
289 size_t brksize = 0;
290 ssize_t dlnsize;
291 aux_entry_t *aux;
292 int error;
293 ssize_t resid;
294 int fd = -1;
295 intptr_t voffset;
296 Phdr *intphdr = NULL;
297 Phdr *dynamicphdr = NULL;
298 Phdr *stphdr = NULL;
299 Phdr *uphdr = NULL;
300 Phdr *junk = NULL;
301 size_t len;
302 ssize_t phdrsize;
303 int postfixsize = 0;
304 int i, hsize;
305 Phdr *phdrp;
306 Phdr *dataphdrp = NULL;
307 Phdr *dtrphdr;
308 Phdr *capphdr = NULL;
309 Cap *cap = NULL;
310 ssize_t capsize;
311 Dyn *dyn = NULL;
312 int hasu = 0;
313 int hasauxv = 0;
314 int hasintp = 0;
315 int branded = 0;
316
317 struct proc *p = ttoproc(curthread);
318 struct user *up = PTOU(p);
319 struct bigwad {
320 Ehdr ehdr;
321 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
322 char dl_name[MAXPATHLEN];
323 char pathbuf[MAXPATHLEN];
324 struct vattr vattr;
325 struct execenv exenv;
326 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
327 Ehdr *ehdrp;
328 int nshdrs, shstrndx, nphdrs;
329 char *dlnp;
330 char *pathbufp;
331 rlim64_t limit;
332 rlim64_t roundlimit;
333
334 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
393 */
394 if ((level <= INTP_MAXDEPTH) &&
395 (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
396 error = BROP(p)->b_elfexec(vp, uap, args,
397 idatap, level + 1, execsz, setid, exec_file, cred,
398 brand_action);
399 goto out;
400 }
401
402 /*
403 * Determine aux size now so that stack can be built
404 * in one shot (except actual copyout of aux image),
405 * determine any non-default stack protections,
406 * and still have this code be machine independent.
407 */
408 hsize = ehdrp->e_phentsize;
409 phdrp = (Phdr *)phdrbase;
410 for (i = nphdrs; i > 0; i--) {
411 switch (phdrp->p_type) {
412 case PT_INTERP:
413 hasauxv = hasintp = 1;
414 break;
415 case PT_PHDR:
416 hasu = 1;
417 break;
418 case PT_SUNWSTACK:
419 args->stk_prot = PROT_USER;
420 if (phdrp->p_flags & PF_R)
421 args->stk_prot |= PROT_READ;
422 if (phdrp->p_flags & PF_W)
423 args->stk_prot |= PROT_WRITE;
424 if (phdrp->p_flags & PF_X)
425 args->stk_prot |= PROT_EXEC;
426 break;
427 case PT_LOAD:
428 dataphdrp = phdrp;
429 break;
430 case PT_SUNWCAP:
431 capphdr = phdrp;
432 break;
433 case PT_DYNAMIC:
434 dynamicphdr = phdrp;
435 break;
436 }
437 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
438 }
439
440 if (ehdrp->e_type != ET_EXEC) {
441 dataphdrp = NULL;
442 hasauxv = 1;
443 }
444
445 /* Copy BSS permissions to args->dat_prot */
446 if (dataphdrp != NULL) {
447 args->dat_prot = PROT_USER;
448 if (dataphdrp->p_flags & PF_R)
449 args->dat_prot |= PROT_READ;
450 if (dataphdrp->p_flags & PF_W)
451 args->dat_prot |= PROT_WRITE;
452 if (dataphdrp->p_flags & PF_X)
453 args->dat_prot |= PROT_EXEC;
454 }
455
458 * it now. This may be increased by exec_args if there are
459 * ISA-specific types (included in __KERN_NAUXV_IMPL).
460 */
461 if (hasauxv) {
462 /*
463 * If a AUX vector is being built - the base AUX
464 * entries are:
465 *
466 * AT_BASE
467 * AT_FLAGS
468 * AT_PAGESZ
469 * AT_SUN_AUXFLAGS
470 * AT_SUN_HWCAP
471 * AT_SUN_HWCAP2
472 * AT_SUN_PLATFORM (added in stk_copyout)
473 * AT_SUN_EXECNAME (added in stk_copyout)
474 * AT_NULL
475 *
476 * total == 9
477 */
478 if (hasintp && hasu) {
479 /*
480 * Has PT_INTERP & PT_PHDR - the auxvectors that
481 * will be built are:
482 *
483 * AT_PHDR
484 * AT_PHENT
485 * AT_PHNUM
486 * AT_ENTRY
487 * AT_LDDATA
488 *
489 * total = 5
490 */
491 args->auxsize = (9 + 5) * sizeof (aux_entry_t);
492 } else if (hasintp) {
493 /*
494 * Has PT_INTERP but no PT_PHDR
495 *
496 * AT_EXECFD
497 * AT_LDDATA
498 *
499 * total = 2
500 */
501 args->auxsize = (9 + 2) * sizeof (aux_entry_t);
502 } else {
503 args->auxsize = 9 * sizeof (aux_entry_t);
504 }
505 } else {
506 args->auxsize = 0;
507 }
508
509 /*
510 * If this binary is using an emulator, we need to add an
511 * AT_SUN_EMULATOR aux entry.
512 */
513 if (args->emulator != NULL)
514 args->auxsize += sizeof (aux_entry_t);
515
516 if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
517 branded = 1;
518 /*
519 * We will be adding 4 entries to the aux vectors. One for
520 * the the brandname and 3 for the brand specific aux vectors.
521 */
522 args->auxsize += 4 * sizeof (aux_entry_t);
523 }
524
525 /* If the binary has an explicit ASLR flag, it must be honoured */
526 if ((dynamicphdr != NULL) &&
527 (dynamicphdr->p_filesz > 0)) {
528 Dyn *dp;
529 off_t i = 0;
530
531 #define DYN_STRIDE 100
532 for (i = 0; i < dynamicphdr->p_filesz;
533 i += sizeof (*dyn) * DYN_STRIDE) {
534 int ndyns = (dynamicphdr->p_filesz - i) / sizeof (*dyn);
535 size_t dynsize;
536
537 ndyns = MIN(DYN_STRIDE, ndyns);
538 dynsize = ndyns * sizeof (*dyn);
539
540 dyn = kmem_alloc(dynsize, KM_SLEEP);
541
542 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn,
543 dynsize, (offset_t)(dynamicphdr->p_offset + i),
544 UIO_SYSSPACE, 0, (rlim64_t)0,
545 CRED(), &resid)) != 0) {
546 uprintf("%s: cannot read .dynamic section\n",
547 exec_file);
548 goto out;
549 }
550
551 for (dp = dyn; dp < (dyn + ndyns); dp++) {
552 if (dp->d_tag == DT_SUNW_ASLR) {
553 if ((error = handle_secflag_dt(p,
554 DT_SUNW_ASLR,
555 dp->d_un.d_val)) != 0) {
556 uprintf("%s: error setting "
557 "security-flag from "
558 "DT_SUNW_ASLR: %d\n",
559 exec_file, error);
560 goto out;
561 }
562 }
563 }
564
565 kmem_free(dyn, dynsize);
566 }
567 }
568
569 /* Hardware/Software capabilities */
570 if (capphdr != NULL &&
571 (capsize = capphdr->p_filesz) > 0 &&
572 capsize <= 16 * sizeof (*cap)) {
573 int ncaps = capsize / sizeof (*cap);
574 Cap *cp;
575
576 cap = kmem_alloc(capsize, KM_SLEEP);
577 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
578 capsize, (offset_t)capphdr->p_offset,
579 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
580 uprintf("%s: Cannot read capabilities section\n",
581 exec_file);
582 goto out;
583 }
584 for (cp = cap; cp < cap + ncaps; cp++) {
585 if (cp->c_tag == CA_SUNW_SF_1 &&
586 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
587 if (args->to_model == DATAMODEL_LP64)
588 args->addr32 = 1;
599 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
600 if (error == -1) {
601 error = ENOEXEC;
602 goto bad;
603 }
604 goto out;
605 }
606 /* we're single threaded after this point */
607
608 /*
609 * If this is an ET_DYN executable (shared object),
610 * determine its memory size so that mapelfexec() can load it.
611 */
612 if (ehdrp->e_type == ET_DYN)
613 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
614 else
615 len = 0;
616
617 dtrphdr = NULL;
618
619 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr,
620 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
621 len, execsz, &brksize)) != 0)
622 goto bad;
623
624 if (uphdr != NULL && intphdr == NULL)
625 goto bad;
626
627 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
628 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
629 goto bad;
630 }
631
632 if (intphdr != NULL) {
633 size_t len;
634 uintptr_t lddata;
635 char *p;
636 struct vnode *nvp;
637
638 dlnsize = intphdr->p_filesz;
639
640 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
641 goto bad;
642
643 /*
644 * Read in "interpreter" pathname.
645 */
646 if ((error = vn_rdwr(UIO_READ, vp, dlnp, intphdr->p_filesz,
647 (offset_t)intphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
648 CRED(), &resid)) != 0) {
649 uprintf("%s: Cannot obtain interpreter pathname\n",
650 exec_file);
651 goto bad;
652 }
653
654 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
655 goto bad;
656
657 /*
658 * Search for '$ORIGIN' token in interpreter path.
659 * If found, expand it.
660 */
661 for (p = dlnp; p = strchr(p, '$'); ) {
662 uint_t len, curlen;
663 char *_ptr;
664
665 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
666 continue;
667
845 * libraries instead of using the brand libraries that are
846 * installed in the zone. We only do this for processes
847 * which we trust because we see they are already running
848 * under pfexec (where uid != euid). This prevents a
849 * malicious user within the zone from crafting a wrapper to
850 * run native suid commands with unsecure libraries interposed.
851 */
852 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
853 (setid &= ~EXECSETID_SETID) != 0))
854 auxf &= ~AF_SUN_SETUGID;
855
856 /*
857 * Record the user addr of the auxflags aux vector entry
858 * since brands may optionally want to manipulate this field.
859 */
860 args->auxp_auxflags =
861 (char *)((char *)args->stackend +
862 ((char *)&aux->a_type -
863 (char *)bigwad->elfargs));
864 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
865
866 /*
867 * Hardware capability flag word (performance hints)
868 * Used for choosing faster library routines.
869 * (Potentially different between 32-bit and 64-bit ABIs)
870 */
871 #if defined(_LP64)
872 if (args->to_model == DATAMODEL_NATIVE) {
873 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
874 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
875 } else {
876 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
877 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
878 }
879 #else
880 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
881 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
882 #endif
883 if (branded) {
884 /*
885 * Reserve space for the brand-private aux vectors,
1255 kmem_free(*shstrbasep, *shstrsizep);
1256 return (err);
1257 }
1258
1259 /*
1260 * Make sure the strtab is null-terminated to make sure we
1261 * don't run off the end of the table.
1262 */
1263 (*shstrbasep)[*shstrsizep - 1] = '\0';
1264
1265 return (0);
1266 }
1267
1268 static int
1269 mapelfexec(
1270 vnode_t *vp,
1271 Ehdr *ehdr,
1272 int nphdrs,
1273 caddr_t phdrbase,
1274 Phdr **uphdr,
1275 Phdr **intphdr,
1276 Phdr **stphdr,
1277 Phdr **dtphdr,
1278 Phdr *dataphdrp,
1279 caddr_t *bssbase,
1280 caddr_t *brkbase,
1281 intptr_t *voffset,
1282 intptr_t *minaddr,
1283 size_t len,
1284 long *execsz,
1285 size_t *brksize)
1286 {
1287 Phdr *phdr;
1288 int i, prot, error;
1289 caddr_t addr = NULL;
1290 size_t zfodsz;
1291 int ptload = 0;
1292 int page;
1293 off_t offset;
1294 int hsize = ehdr->e_phentsize;
1295 caddr_t mintmp = (caddr_t)-1;
1296 extern int use_brk_lpg;
1297
1298 if (ehdr->e_type == ET_DYN) {
1299 secflagset_t flags = 0;
1300 /*
1301 * Obtain the virtual address of a hole in the
1302 * address space to map the "interpreter".
1303 */
1304 if (secflag_enabled(curproc, PROC_SEC_ASLR))
1305 flags |= _MAP_RANDOMIZE;
1306
1307 map_addr(&addr, len, (offset_t)0, 1, flags);
1308 if (addr == NULL)
1309 return (ENOMEM);
1310 *voffset = (intptr_t)addr;
1311
1312 /*
1313 * Calculate the minimum vaddr so it can be subtracted out.
1314 * According to the ELF specification, since PT_LOAD sections
1315 * must be sorted by increasing p_vaddr values, this is
1316 * guaranteed to be the first PT_LOAD section.
1317 */
1318 phdr = (Phdr *)phdrbase;
1319 for (i = nphdrs; i > 0; i--) {
1320 if (phdr->p_type == PT_LOAD) {
1321 *voffset -= (uintptr_t)phdr->p_vaddr;
1322 break;
1323 }
1324 phdr = (Phdr *)((caddr_t)phdr + hsize);
1325 }
1326
1327 } else {
1328 *voffset = 0;
1329 }
1330 phdr = (Phdr *)phdrbase;
1331 for (i = nphdrs; i > 0; i--) {
1332 switch (phdr->p_type) {
1333 case PT_LOAD:
1334 if ((*intphdr != NULL) && (*uphdr == NULL))
1335 return (0);
1336
1337 ptload = 1;
1338 prot = PROT_USER;
1339 if (phdr->p_flags & PF_R)
1340 prot |= PROT_READ;
1341 if (phdr->p_flags & PF_W)
1342 prot |= PROT_WRITE;
1343 if (phdr->p_flags & PF_X)
1344 prot |= PROT_EXEC;
1345
1346 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1347
1348 /*
1349 * Keep track of the segment with the lowest starting
1350 * address.
1351 */
1352 if (addr < mintmp)
1353 mintmp = addr;
1354
1369 */
1370 if (brksize != NULL && use_brk_lpg &&
1371 zfodsz != 0 && phdr == dataphdrp &&
1372 (prot & PROT_WRITE)) {
1373 size_t tlen = P2NPHASE((uintptr_t)addr +
1374 phdr->p_filesz, PAGESIZE);
1375
1376 if (zfodsz > tlen) {
1377 curproc->p_brkpageszc =
1378 page_szc(map_pgsz(MAPPGSZ_HEAP,
1379 curproc, addr + phdr->p_filesz +
1380 tlen, zfodsz - tlen, 0));
1381 }
1382 }
1383
1384 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1385 (prot & PROT_WRITE)) {
1386 uint_t szc = curproc->p_brkpageszc;
1387 size_t pgsz = page_get_pagesize(szc);
1388 caddr_t ebss = addr + phdr->p_memsz;
1389 /*
1390 * If we need extra space to keep the BSS an
1391 * integral number of pages in size, some of
1392 * that space may fall beyond p_brkbase, so we
1393 * need to set p_brksize to account for it
1394 * being (logically) part of the brk.
1395 */
1396 size_t extra_zfodsz;
1397
1398 ASSERT(pgsz > PAGESIZE);
1399
1400 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1401
1402 if (error = execmap(vp, addr, phdr->p_filesz,
1403 zfodsz + extra_zfodsz, phdr->p_offset,
1404 prot, page, szc))
1405 goto bad;
1406 if (brksize != NULL)
1407 *brksize = extra_zfodsz;
1408 } else {
1409 if (error = execmap(vp, addr, phdr->p_filesz,
1410 zfodsz, phdr->p_offset, prot, page, 0))
1411 goto bad;
1412 }
1413
1414 if (bssbase != NULL && addr >= *bssbase &&
1415 phdr == dataphdrp) {
1416 *bssbase = addr + phdr->p_filesz;
1417 }
1418 if (brkbase != NULL && addr >= *brkbase) {
1419 *brkbase = addr + phdr->p_memsz;
1420 }
1421
1422 *execsz += btopr(phdr->p_memsz);
1423 break;
1424
1425 case PT_INTERP:
1426 if (ptload)
1427 goto bad;
1428 *intphdr = phdr;
1429 break;
1430
1431 case PT_SHLIB:
1432 *stphdr = phdr;
1433 break;
1434
1435 case PT_PHDR:
1436 if (ptload)
1437 goto bad;
1438 *uphdr = phdr;
1439 break;
1440
1441 case PT_NULL:
1442 case PT_DYNAMIC:
1443 case PT_NOTE:
1444 break;
1445
1446 case PT_SUNWDTRACE:
1447 if (dtphdr != NULL)
1448 *dtphdr = phdr;
1449 break;
1450
1451 default:
1452 break;
1453 }
1454 phdr = (Phdr *)((caddr_t)phdr + hsize);
1455 }
1456
1457 if (minaddr != NULL) {
1458 ASSERT(mintmp != (caddr_t)-1);
1459 *minaddr = (intptr_t)mintmp;
1460 }
1461
1462 if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) {
1463 size_t off;
1464 uintptr_t base = (uintptr_t)*brkbase;
1465 uintptr_t oend = base + *brksize;
1466
1467 ASSERT(ISP2(aslr_max_brk_skew));
1468
1469 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1470 base += P2PHASE(off, aslr_max_brk_skew);
1471 base = P2ROUNDUP(base, PAGESIZE);
1472 *brkbase = (caddr_t)base;
1473 /*
1474 * Above, we set *brksize to account for the possibility we
1475 * had to grow the 'brk' in padding out the BSS to a page
1476 * boundary.
1477 *
1478 * We now need to adjust that based on where we now are
1479 * actually putting the brk.
1480 */
1481 if (oend > base)
1482 *brksize = oend - base;
1483 else
1484 *brksize = 0;
1485 }
1486
1487 return (0);
1488 bad:
1489 if (error == 0)
1490 error = EINVAL;
1491 return (error);
1492 }
1493
1494 int
1495 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1496 rlim64_t rlimit, cred_t *credp)
1497 {
1498 Note note;
1499 int error;
1500
1501 bzero(¬e, sizeof (note));
1502 bcopy("CORE", note.name, 4);
1503 note.nhdr.n_type = type;
1504 /*
1505 * The System V ABI states that n_namesz must be the length of the
1506 * string that follows the Nhdr structure including the terminating
|