25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 */
31
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/pathname.h>
45 #include <sys/cmn_err.h>
46 #include <sys/systm.h>
47 #include <sys/elf.h>
48 #include <sys/vmsystm.h>
49 #include <sys/debug.h>
50 #include <sys/auxv.h>
51 #include <sys/exec.h>
52 #include <sys/prsystm.h>
53 #include <vm/as.h>
54 #include <vm/rm.h>
55 #include <vm/seg.h>
56 #include <vm/seg_vn.h>
57 #include <sys/modctl.h>
58 #include <sys/systeminfo.h>
59 #include <sys/vmparam.h>
60 #include <sys/machelf.h>
61 #include <sys/shm_impl.h>
62 #include <sys/archsystm.h>
63 #include <sys/fasttrap.h>
64 #include <sys/brand.h>
65 #include "elf_impl.h"
66 #include <sys/sdt.h>
67 #include <sys/siginfo.h>
68
69 extern int at_flags;
70
71 #define ORIGIN_STR "ORIGIN"
72 #define ORIGIN_STR_SIZE 6
73
74 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
75 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
76 ssize_t *);
77 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
78 ssize_t *, caddr_t *, ssize_t *);
79 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
80 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
81 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
82 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
83
84 typedef enum {
85 STR_CTF,
86 STR_SYMTAB,
87 STR_DYNSYM,
88 STR_STRTAB,
89 STR_DYNSTR,
145 }
146
147 static int
148 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
149 {
150 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
151
152 /*
153 * See the comment in fasttrap.h for information on how to safely
154 * update this program header.
155 */
156 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
157 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
158 return (-1);
159
160 args->thrptr = phdrp->p_vaddr + base;
161
162 return (0);
163 }
164
165 /*
166 * Map in the executable pointed to by vp. Returns 0 on success.
167 */
168 int
169 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
170 intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
171 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
172 {
173 size_t len;
174 struct vattr vat;
175 caddr_t phdrbase = NULL;
176 ssize_t phdrsize;
177 int nshdrs, shstrndx, nphdrs;
178 int error = 0;
179 Phdr *uphdr = NULL;
180 Phdr *junk = NULL;
181 Phdr *dynphdr = NULL;
182 Phdr *dtrphdr = NULL;
183 uintptr_t lddata;
184 long execsz;
238 kmem_free(phdrbase, phdrsize);
239 return (error);
240 }
241
242 /*ARGSUSED*/
243 int
244 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
245 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
246 int brand_action)
247 {
248 caddr_t phdrbase = NULL;
249 caddr_t bssbase = 0;
250 caddr_t brkbase = 0;
251 size_t brksize = 0;
252 ssize_t dlnsize;
253 aux_entry_t *aux;
254 int error;
255 ssize_t resid;
256 int fd = -1;
257 intptr_t voffset;
258 Phdr *dyphdr = NULL;
259 Phdr *stphdr = NULL;
260 Phdr *uphdr = NULL;
261 Phdr *junk = NULL;
262 size_t len;
263 ssize_t phdrsize;
264 int postfixsize = 0;
265 int i, hsize;
266 Phdr *phdrp;
267 Phdr *dataphdrp = NULL;
268 Phdr *dtrphdr;
269 Phdr *capphdr = NULL;
270 Cap *cap = NULL;
271 ssize_t capsize;
272 int hasu = 0;
273 int hasauxv = 0;
274 int hasdy = 0;
275 int branded = 0;
276
277 struct proc *p = ttoproc(curthread);
278 struct user *up = PTOU(p);
279 struct bigwad {
280 Ehdr ehdr;
281 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
282 char dl_name[MAXPATHLEN];
283 char pathbuf[MAXPATHLEN];
284 struct vattr vattr;
285 struct execenv exenv;
286 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
287 Ehdr *ehdrp;
288 int nshdrs, shstrndx, nphdrs;
289 char *dlnp;
290 char *pathbufp;
291 rlim64_t limit;
292 rlim64_t roundlimit;
293
294 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
353 */
354 if ((level <= INTP_MAXDEPTH) &&
355 (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
356 error = BROP(p)->b_elfexec(vp, uap, args,
357 idatap, level + 1, execsz, setid, exec_file, cred,
358 brand_action);
359 goto out;
360 }
361
362 /*
363 * Determine aux size now so that stack can be built
364 * in one shot (except actual copyout of aux image),
365 * determine any non-default stack protections,
366 * and still have this code be machine independent.
367 */
368 hsize = ehdrp->e_phentsize;
369 phdrp = (Phdr *)phdrbase;
370 for (i = nphdrs; i > 0; i--) {
371 switch (phdrp->p_type) {
372 case PT_INTERP:
373 hasauxv = hasdy = 1;
374 break;
375 case PT_PHDR:
376 hasu = 1;
377 break;
378 case PT_SUNWSTACK:
379 args->stk_prot = PROT_USER;
380 if (phdrp->p_flags & PF_R)
381 args->stk_prot |= PROT_READ;
382 if (phdrp->p_flags & PF_W)
383 args->stk_prot |= PROT_WRITE;
384 if (phdrp->p_flags & PF_X)
385 args->stk_prot |= PROT_EXEC;
386 break;
387 case PT_LOAD:
388 dataphdrp = phdrp;
389 break;
390 case PT_SUNWCAP:
391 capphdr = phdrp;
392 break;
393 }
394 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
395 }
396
397 if (ehdrp->e_type != ET_EXEC) {
398 dataphdrp = NULL;
399 hasauxv = 1;
400 }
401
402 /* Copy BSS permissions to args->dat_prot */
403 if (dataphdrp != NULL) {
404 args->dat_prot = PROT_USER;
405 if (dataphdrp->p_flags & PF_R)
406 args->dat_prot |= PROT_READ;
407 if (dataphdrp->p_flags & PF_W)
408 args->dat_prot |= PROT_WRITE;
409 if (dataphdrp->p_flags & PF_X)
410 args->dat_prot |= PROT_EXEC;
411 }
412
415 * it now. This may be increased by exec_args if there are
416 * ISA-specific types (included in __KERN_NAUXV_IMPL).
417 */
418 if (hasauxv) {
419 /*
420 * If a AUX vector is being built - the base AUX
421 * entries are:
422 *
423 * AT_BASE
424 * AT_FLAGS
425 * AT_PAGESZ
426 * AT_SUN_AUXFLAGS
427 * AT_SUN_HWCAP
428 * AT_SUN_HWCAP2
429 * AT_SUN_PLATFORM (added in stk_copyout)
430 * AT_SUN_EXECNAME (added in stk_copyout)
431 * AT_NULL
432 *
433 * total == 9
434 */
435 if (hasdy && hasu) {
436 /*
437 * Has PT_INTERP & PT_PHDR - the auxvectors that
438 * will be built are:
439 *
440 * AT_PHDR
441 * AT_PHENT
442 * AT_PHNUM
443 * AT_ENTRY
444 * AT_LDDATA
445 *
446 * total = 5
447 */
448 args->auxsize = (9 + 5) * sizeof (aux_entry_t);
449 } else if (hasdy) {
450 /*
451 * Has PT_INTERP but no PT_PHDR
452 *
453 * AT_EXECFD
454 * AT_LDDATA
455 *
456 * total = 2
457 */
458 args->auxsize = (9 + 2) * sizeof (aux_entry_t);
459 } else {
460 args->auxsize = 9 * sizeof (aux_entry_t);
461 }
462 } else {
463 args->auxsize = 0;
464 }
465
466 /*
467 * If this binary is using an emulator, we need to add an
468 * AT_SUN_EMULATOR aux entry.
469 */
470 if (args->emulator != NULL)
471 args->auxsize += sizeof (aux_entry_t);
472
473 if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
474 branded = 1;
475 /*
476 * We will be adding 4 entries to the aux vectors. One for
477 * the the brandname and 3 for the brand specific aux vectors.
478 */
479 args->auxsize += 4 * sizeof (aux_entry_t);
480 }
481
482 /* Hardware/Software capabilities */
483 if (capphdr != NULL &&
484 (capsize = capphdr->p_filesz) > 0 &&
485 capsize <= 16 * sizeof (*cap)) {
486 int ncaps = capsize / sizeof (*cap);
487 Cap *cp;
488
489 cap = kmem_alloc(capsize, KM_SLEEP);
490 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
491 capsize, (offset_t)capphdr->p_offset,
492 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
493 uprintf("%s: Cannot read capabilities section\n",
494 exec_file);
495 goto out;
496 }
497 for (cp = cap; cp < cap + ncaps; cp++) {
498 if (cp->c_tag == CA_SUNW_SF_1 &&
499 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
500 if (args->to_model == DATAMODEL_LP64)
501 args->addr32 = 1;
512 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
513 if (error == -1) {
514 error = ENOEXEC;
515 goto bad;
516 }
517 goto out;
518 }
519 /* we're single threaded after this point */
520
521 /*
522 * If this is an ET_DYN executable (shared object),
523 * determine its memory size so that mapelfexec() can load it.
524 */
525 if (ehdrp->e_type == ET_DYN)
526 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
527 else
528 len = 0;
529
530 dtrphdr = NULL;
531
532 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
533 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
534 len, execsz, &brksize)) != 0)
535 goto bad;
536
537 if (uphdr != NULL && dyphdr == NULL)
538 goto bad;
539
540 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
541 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
542 goto bad;
543 }
544
545 if (dyphdr != NULL) {
546 size_t len;
547 uintptr_t lddata;
548 char *p;
549 struct vnode *nvp;
550
551 dlnsize = dyphdr->p_filesz;
552
553 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
554 goto bad;
555
556 /*
557 * Read in "interpreter" pathname.
558 */
559 if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
560 (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
561 CRED(), &resid)) != 0) {
562 uprintf("%s: Cannot obtain interpreter pathname\n",
563 exec_file);
564 goto bad;
565 }
566
567 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
568 goto bad;
569
570 /*
571 * Search for '$ORIGIN' token in interpreter path.
572 * If found, expand it.
573 */
574 for (p = dlnp; p = strchr(p, '$'); ) {
575 uint_t len, curlen;
576 char *_ptr;
577
578 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
579 continue;
580
758 * libraries instead of using the brand libraries that are
759 * installed in the zone. We only do this for processes
760 * which we trust because we see they are already running
761 * under pfexec (where uid != euid). This prevents a
762 * malicious user within the zone from crafting a wrapper to
763 * run native suid commands with unsecure libraries interposed.
764 */
765 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
766 (setid &= ~EXECSETID_SETID) != 0))
767 auxf &= ~AF_SUN_SETUGID;
768
769 /*
770 * Record the user addr of the auxflags aux vector entry
771 * since brands may optionally want to manipulate this field.
772 */
773 args->auxp_auxflags =
774 (char *)((char *)args->stackend +
775 ((char *)&aux->a_type -
776 (char *)bigwad->elfargs));
777 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
778 /*
779 * Hardware capability flag word (performance hints)
780 * Used for choosing faster library routines.
781 * (Potentially different between 32-bit and 64-bit ABIs)
782 */
783 #if defined(_LP64)
784 if (args->to_model == DATAMODEL_NATIVE) {
785 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
786 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
787 } else {
788 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
789 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
790 }
791 #else
792 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
793 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
794 #endif
795 if (branded) {
796 /*
797 * Reserve space for the brand-private aux vectors,
1167 kmem_free(*shstrbasep, *shstrsizep);
1168 return (err);
1169 }
1170
1171 /*
1172 * Make sure the strtab is null-terminated to make sure we
1173 * don't run off the end of the table.
1174 */
1175 (*shstrbasep)[*shstrsizep - 1] = '\0';
1176
1177 return (0);
1178 }
1179
1180 static int
1181 mapelfexec(
1182 vnode_t *vp,
1183 Ehdr *ehdr,
1184 int nphdrs,
1185 caddr_t phdrbase,
1186 Phdr **uphdr,
1187 Phdr **dyphdr,
1188 Phdr **stphdr,
1189 Phdr **dtphdr,
1190 Phdr *dataphdrp,
1191 caddr_t *bssbase,
1192 caddr_t *brkbase,
1193 intptr_t *voffset,
1194 intptr_t *minaddr,
1195 size_t len,
1196 long *execsz,
1197 size_t *brksize)
1198 {
1199 Phdr *phdr;
1200 int i, prot, error;
1201 caddr_t addr = NULL;
1202 size_t zfodsz;
1203 int ptload = 0;
1204 int page;
1205 off_t offset;
1206 int hsize = ehdr->e_phentsize;
1207 caddr_t mintmp = (caddr_t)-1;
1208 extern int use_brk_lpg;
1209
1210 if (ehdr->e_type == ET_DYN) {
1211 /*
1212 * Obtain the virtual address of a hole in the
1213 * address space to map the "interpreter".
1214 */
1215 map_addr(&addr, len, (offset_t)0, 1, 0);
1216 if (addr == NULL)
1217 return (ENOMEM);
1218 *voffset = (intptr_t)addr;
1219
1220 /*
1221 * Calculate the minimum vaddr so it can be subtracted out.
1222 * According to the ELF specification, since PT_LOAD sections
1223 * must be sorted by increasing p_vaddr values, this is
1224 * guaranteed to be the first PT_LOAD section.
1225 */
1226 phdr = (Phdr *)phdrbase;
1227 for (i = nphdrs; i > 0; i--) {
1228 if (phdr->p_type == PT_LOAD) {
1229 *voffset -= (uintptr_t)phdr->p_vaddr;
1230 break;
1231 }
1232 phdr = (Phdr *)((caddr_t)phdr + hsize);
1233 }
1234
1235 } else {
1236 *voffset = 0;
1237 }
1238 phdr = (Phdr *)phdrbase;
1239 for (i = nphdrs; i > 0; i--) {
1240 switch (phdr->p_type) {
1241 case PT_LOAD:
1242 if ((*dyphdr != NULL) && (*uphdr == NULL))
1243 return (0);
1244
1245 ptload = 1;
1246 prot = PROT_USER;
1247 if (phdr->p_flags & PF_R)
1248 prot |= PROT_READ;
1249 if (phdr->p_flags & PF_W)
1250 prot |= PROT_WRITE;
1251 if (phdr->p_flags & PF_X)
1252 prot |= PROT_EXEC;
1253
1254 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1255
1256 /*
1257 * Keep track of the segment with the lowest starting
1258 * address.
1259 */
1260 if (addr < mintmp)
1261 mintmp = addr;
1262
1277 */
1278 if (brksize != NULL && use_brk_lpg &&
1279 zfodsz != 0 && phdr == dataphdrp &&
1280 (prot & PROT_WRITE)) {
1281 size_t tlen = P2NPHASE((uintptr_t)addr +
1282 phdr->p_filesz, PAGESIZE);
1283
1284 if (zfodsz > tlen) {
1285 curproc->p_brkpageszc =
1286 page_szc(map_pgsz(MAPPGSZ_HEAP,
1287 curproc, addr + phdr->p_filesz +
1288 tlen, zfodsz - tlen, 0));
1289 }
1290 }
1291
1292 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1293 (prot & PROT_WRITE)) {
1294 uint_t szc = curproc->p_brkpageszc;
1295 size_t pgsz = page_get_pagesize(szc);
1296 caddr_t ebss = addr + phdr->p_memsz;
1297 size_t extra_zfodsz;
1298
1299 ASSERT(pgsz > PAGESIZE);
1300
1301 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1302
1303 if (error = execmap(vp, addr, phdr->p_filesz,
1304 zfodsz + extra_zfodsz, phdr->p_offset,
1305 prot, page, szc))
1306 goto bad;
1307 if (brksize != NULL)
1308 *brksize = extra_zfodsz;
1309 } else {
1310 if (error = execmap(vp, addr, phdr->p_filesz,
1311 zfodsz, phdr->p_offset, prot, page, 0))
1312 goto bad;
1313 }
1314
1315 if (bssbase != NULL && addr >= *bssbase &&
1316 phdr == dataphdrp) {
1317 *bssbase = addr + phdr->p_filesz;
1318 }
1319 if (brkbase != NULL && addr >= *brkbase) {
1320 *brkbase = addr + phdr->p_memsz;
1321 }
1322
1323 *execsz += btopr(phdr->p_memsz);
1324 break;
1325
1326 case PT_INTERP:
1327 if (ptload)
1328 goto bad;
1329 *dyphdr = phdr;
1330 break;
1331
1332 case PT_SHLIB:
1333 *stphdr = phdr;
1334 break;
1335
1336 case PT_PHDR:
1337 if (ptload)
1338 goto bad;
1339 *uphdr = phdr;
1340 break;
1341
1342 case PT_NULL:
1343 case PT_DYNAMIC:
1344 case PT_NOTE:
1345 break;
1346
1347 case PT_SUNWDTRACE:
1348 if (dtphdr != NULL)
1349 *dtphdr = phdr;
1350 break;
1351
1352 default:
1353 break;
1354 }
1355 phdr = (Phdr *)((caddr_t)phdr + hsize);
1356 }
1357
1358 if (minaddr != NULL) {
1359 ASSERT(mintmp != (caddr_t)-1);
1360 *minaddr = (intptr_t)mintmp;
1361 }
1362
1363 return (0);
1364 bad:
1365 if (error == 0)
1366 error = EINVAL;
1367 return (error);
1368 }
1369
1370 int
1371 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1372 rlim64_t rlimit, cred_t *credp)
1373 {
1374 Note note;
1375 int error;
1376
1377 bzero(¬e, sizeof (note));
1378 bcopy("CORE", note.name, 4);
1379 note.nhdr.n_type = type;
1380 /*
1381 * The System V ABI states that n_namesz must be the length of the
1382 * string that follows the Nhdr structure including the terminating
|
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
30 */
31
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/thread.h>
35 #include <sys/sysmacros.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/vnode.h>
41 #include <sys/mman.h>
42 #include <sys/kmem.h>
43 #include <sys/proc.h>
44 #include <sys/pathname.h>
45 #include <sys/policy.h>
46 #include <sys/cmn_err.h>
47 #include <sys/systm.h>
48 #include <sys/elf.h>
49 #include <sys/vmsystm.h>
50 #include <sys/debug.h>
51 #include <sys/auxv.h>
52 #include <sys/exec.h>
53 #include <sys/prsystm.h>
54 #include <vm/as.h>
55 #include <vm/rm.h>
56 #include <vm/seg.h>
57 #include <vm/seg_vn.h>
58 #include <sys/modctl.h>
59 #include <sys/systeminfo.h>
60 #include <sys/vmparam.h>
61 #include <sys/machelf.h>
62 #include <sys/shm_impl.h>
63 #include <sys/archsystm.h>
64 #include <sys/fasttrap.h>
65 #include <sys/brand.h>
66 #include "elf_impl.h"
67 #include <sys/sdt.h>
68 #include <sys/siginfo.h>
69 #include <sys/random.h>
70
71 extern int at_flags;
72 extern volatile size_t aslr_max_brk_skew;
73
74 #define ORIGIN_STR "ORIGIN"
75 #define ORIGIN_STR_SIZE 6
76
77 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
78 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
79 ssize_t *);
80 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
81 ssize_t *, caddr_t *, ssize_t *);
82 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
83 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
84 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
85 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
86
87 typedef enum {
88 STR_CTF,
89 STR_SYMTAB,
90 STR_DYNSYM,
91 STR_STRTAB,
92 STR_DYNSTR,
148 }
149
150 static int
151 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
152 {
153 ASSERT(phdrp->p_type == PT_SUNWDTRACE);
154
155 /*
156 * See the comment in fasttrap.h for information on how to safely
157 * update this program header.
158 */
159 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
160 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
161 return (-1);
162
163 args->thrptr = phdrp->p_vaddr + base;
164
165 return (0);
166 }
167
168 static int
169 handle_secflag_dt(proc_t *p, uint_t dt, uint_t val)
170 {
171 uint_t flag;
172
173 switch (dt) {
174 case DT_SUNW_ASLR:
175 flag = PROC_SEC_ASLR;
176 break;
177 default:
178 return (EINVAL);
179 }
180
181 if (val == 0) {
182 if (secflag_isset(p->p_secflags.psf_lower, flag))
183 return (EPERM);
184 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
185 secflag_isset(p->p_secflags.psf_inherit, flag))
186 return (EPERM);
187
188 secflag_clear(&p->p_secflags.psf_inherit, flag);
189 secflag_clear(&p->p_secflags.psf_effective, flag);
190 } else {
191 if (!secflag_isset(p->p_secflags.psf_upper, flag))
192 return (EPERM);
193
194 if ((secpolicy_psecflags(CRED(), p, p) != 0) &&
195 !secflag_isset(p->p_secflags.psf_inherit, flag))
196 return (EPERM);
197
198 secflag_set(&p->p_secflags.psf_inherit, flag);
199 secflag_set(&p->p_secflags.psf_effective, flag);
200 }
201
202 return (0);
203 }
204
205 /*
206 * Map in the executable pointed to by vp. Returns 0 on success.
207 */
208 int
209 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
210 intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
211 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
212 {
213 size_t len;
214 struct vattr vat;
215 caddr_t phdrbase = NULL;
216 ssize_t phdrsize;
217 int nshdrs, shstrndx, nphdrs;
218 int error = 0;
219 Phdr *uphdr = NULL;
220 Phdr *junk = NULL;
221 Phdr *dynphdr = NULL;
222 Phdr *dtrphdr = NULL;
223 uintptr_t lddata;
224 long execsz;
278 kmem_free(phdrbase, phdrsize);
279 return (error);
280 }
281
282 /*ARGSUSED*/
283 int
284 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
285 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
286 int brand_action)
287 {
288 caddr_t phdrbase = NULL;
289 caddr_t bssbase = 0;
290 caddr_t brkbase = 0;
291 size_t brksize = 0;
292 ssize_t dlnsize;
293 aux_entry_t *aux;
294 int error;
295 ssize_t resid;
296 int fd = -1;
297 intptr_t voffset;
298 Phdr *intphdr = NULL;
299 Phdr *dynamicphdr = NULL;
300 Phdr *stphdr = NULL;
301 Phdr *uphdr = NULL;
302 Phdr *junk = NULL;
303 size_t len;
304 ssize_t phdrsize;
305 int postfixsize = 0;
306 int i, hsize;
307 Phdr *phdrp;
308 Phdr *dataphdrp = NULL;
309 Phdr *dtrphdr;
310 Phdr *capphdr = NULL;
311 Cap *cap = NULL;
312 ssize_t capsize;
313 Dyn *dyn = NULL;
314 int hasu = 0;
315 int hasauxv = 0;
316 int hasintp = 0;
317 int branded = 0;
318
319 struct proc *p = ttoproc(curthread);
320 struct user *up = PTOU(p);
321 struct bigwad {
322 Ehdr ehdr;
323 aux_entry_t elfargs[__KERN_NAUXV_IMPL];
324 char dl_name[MAXPATHLEN];
325 char pathbuf[MAXPATHLEN];
326 struct vattr vattr;
327 struct execenv exenv;
328 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */
329 Ehdr *ehdrp;
330 int nshdrs, shstrndx, nphdrs;
331 char *dlnp;
332 char *pathbufp;
333 rlim64_t limit;
334 rlim64_t roundlimit;
335
336 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
395 */
396 if ((level <= INTP_MAXDEPTH) &&
397 (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
398 error = BROP(p)->b_elfexec(vp, uap, args,
399 idatap, level + 1, execsz, setid, exec_file, cred,
400 brand_action);
401 goto out;
402 }
403
404 /*
405 * Determine aux size now so that stack can be built
406 * in one shot (except actual copyout of aux image),
407 * determine any non-default stack protections,
408 * and still have this code be machine independent.
409 */
410 hsize = ehdrp->e_phentsize;
411 phdrp = (Phdr *)phdrbase;
412 for (i = nphdrs; i > 0; i--) {
413 switch (phdrp->p_type) {
414 case PT_INTERP:
415 hasauxv = hasintp = 1;
416 break;
417 case PT_PHDR:
418 hasu = 1;
419 break;
420 case PT_SUNWSTACK:
421 args->stk_prot = PROT_USER;
422 if (phdrp->p_flags & PF_R)
423 args->stk_prot |= PROT_READ;
424 if (phdrp->p_flags & PF_W)
425 args->stk_prot |= PROT_WRITE;
426 if (phdrp->p_flags & PF_X)
427 args->stk_prot |= PROT_EXEC;
428 break;
429 case PT_LOAD:
430 dataphdrp = phdrp;
431 break;
432 case PT_SUNWCAP:
433 capphdr = phdrp;
434 break;
435 case PT_DYNAMIC:
436 dynamicphdr = phdrp;
437 break;
438 }
439 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
440 }
441
442 if (ehdrp->e_type != ET_EXEC) {
443 dataphdrp = NULL;
444 hasauxv = 1;
445 }
446
447 /* Copy BSS permissions to args->dat_prot */
448 if (dataphdrp != NULL) {
449 args->dat_prot = PROT_USER;
450 if (dataphdrp->p_flags & PF_R)
451 args->dat_prot |= PROT_READ;
452 if (dataphdrp->p_flags & PF_W)
453 args->dat_prot |= PROT_WRITE;
454 if (dataphdrp->p_flags & PF_X)
455 args->dat_prot |= PROT_EXEC;
456 }
457
460 * it now. This may be increased by exec_args if there are
461 * ISA-specific types (included in __KERN_NAUXV_IMPL).
462 */
463 if (hasauxv) {
464 /*
465 * If a AUX vector is being built - the base AUX
466 * entries are:
467 *
468 * AT_BASE
469 * AT_FLAGS
470 * AT_PAGESZ
471 * AT_SUN_AUXFLAGS
472 * AT_SUN_HWCAP
473 * AT_SUN_HWCAP2
474 * AT_SUN_PLATFORM (added in stk_copyout)
475 * AT_SUN_EXECNAME (added in stk_copyout)
476 * AT_NULL
477 *
478 * total == 9
479 */
480 if (hasintp && hasu) {
481 /*
482 * Has PT_INTERP & PT_PHDR - the auxvectors that
483 * will be built are:
484 *
485 * AT_PHDR
486 * AT_PHENT
487 * AT_PHNUM
488 * AT_ENTRY
489 * AT_LDDATA
490 *
491 * total = 5
492 */
493 args->auxsize = (9 + 5) * sizeof (aux_entry_t);
494 } else if (hasintp) {
495 /*
496 * Has PT_INTERP but no PT_PHDR
497 *
498 * AT_EXECFD
499 * AT_LDDATA
500 *
501 * total = 2
502 */
503 args->auxsize = (9 + 2) * sizeof (aux_entry_t);
504 } else {
505 args->auxsize = 9 * sizeof (aux_entry_t);
506 }
507 } else {
508 args->auxsize = 0;
509 }
510
511 /*
512 * If this binary is using an emulator, we need to add an
513 * AT_SUN_EMULATOR aux entry.
514 */
515 if (args->emulator != NULL)
516 args->auxsize += sizeof (aux_entry_t);
517
518 if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
519 branded = 1;
520 /*
521 * We will be adding 4 entries to the aux vectors. One for
522 * the the brandname and 3 for the brand specific aux vectors.
523 */
524 args->auxsize += 4 * sizeof (aux_entry_t);
525 }
526
527 /* If the binary has an explicit ASLR flag, it must be honoured */
528 if ((dynamicphdr != NULL) &&
529 (dynamicphdr->p_filesz > 0)) {
530 Dyn *dp;
531 off_t i = 0;
532
533 #define DYN_STRIDE 100
534 for (i = 0; i < dynamicphdr->p_filesz;
535 i += sizeof (*dyn) * DYN_STRIDE) {
536 int ndyns = (dynamicphdr->p_filesz - i) / sizeof (*dyn);
537 size_t dynsize;
538
539 ndyns = MIN(DYN_STRIDE, ndyns);
540 dynsize = ndyns * sizeof (*dyn);
541
542 dyn = kmem_alloc(dynsize, KM_SLEEP);
543
544 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn,
545 dynsize, (offset_t)(dynamicphdr->p_offset + i),
546 UIO_SYSSPACE, 0, (rlim64_t)0,
547 CRED(), &resid)) != 0) {
548 uprintf("%s: cannot read .dynamic section\n",
549 exec_file);
550 goto out;
551 }
552
553 for (dp = dyn; dp < (dyn + ndyns); dp++) {
554 if (dp->d_tag == DT_SUNW_ASLR) {
555 if ((error = handle_secflag_dt(p,
556 DT_SUNW_ASLR,
557 dp->d_un.d_val)) != 0) {
558 uprintf("%s: error setting "
559 "security-flag from "
560 "DT_SUNW_ASLR: %d\n",
561 exec_file, error);
562 goto out;
563 }
564 }
565 }
566
567 kmem_free(dyn, dynsize);
568 }
569 }
570
571 /* Hardware/Software capabilities */
572 if (capphdr != NULL &&
573 (capsize = capphdr->p_filesz) > 0 &&
574 capsize <= 16 * sizeof (*cap)) {
575 int ncaps = capsize / sizeof (*cap);
576 Cap *cp;
577
578 cap = kmem_alloc(capsize, KM_SLEEP);
579 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
580 capsize, (offset_t)capphdr->p_offset,
581 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
582 uprintf("%s: Cannot read capabilities section\n",
583 exec_file);
584 goto out;
585 }
586 for (cp = cap; cp < cap + ncaps; cp++) {
587 if (cp->c_tag == CA_SUNW_SF_1 &&
588 (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
589 if (args->to_model == DATAMODEL_LP64)
590 args->addr32 = 1;
601 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
602 if (error == -1) {
603 error = ENOEXEC;
604 goto bad;
605 }
606 goto out;
607 }
608 /* we're single threaded after this point */
609
610 /*
611 * If this is an ET_DYN executable (shared object),
612 * determine its memory size so that mapelfexec() can load it.
613 */
614 if (ehdrp->e_type == ET_DYN)
615 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
616 else
617 len = 0;
618
619 dtrphdr = NULL;
620
621 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr,
622 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
623 len, execsz, &brksize)) != 0)
624 goto bad;
625
626 if (uphdr != NULL && intphdr == NULL)
627 goto bad;
628
629 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
630 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
631 goto bad;
632 }
633
634 if (intphdr != NULL) {
635 size_t len;
636 uintptr_t lddata;
637 char *p;
638 struct vnode *nvp;
639
640 dlnsize = intphdr->p_filesz;
641
642 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
643 goto bad;
644
645 /*
646 * Read in "interpreter" pathname.
647 */
648 if ((error = vn_rdwr(UIO_READ, vp, dlnp, intphdr->p_filesz,
649 (offset_t)intphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
650 CRED(), &resid)) != 0) {
651 uprintf("%s: Cannot obtain interpreter pathname\n",
652 exec_file);
653 goto bad;
654 }
655
656 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
657 goto bad;
658
659 /*
660 * Search for '$ORIGIN' token in interpreter path.
661 * If found, expand it.
662 */
663 for (p = dlnp; p = strchr(p, '$'); ) {
664 uint_t len, curlen;
665 char *_ptr;
666
667 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
668 continue;
669
847 * libraries instead of using the brand libraries that are
848 * installed in the zone. We only do this for processes
849 * which we trust because we see they are already running
850 * under pfexec (where uid != euid). This prevents a
851 * malicious user within the zone from crafting a wrapper to
852 * run native suid commands with unsecure libraries interposed.
853 */
854 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
855 (setid &= ~EXECSETID_SETID) != 0))
856 auxf &= ~AF_SUN_SETUGID;
857
858 /*
859 * Record the user addr of the auxflags aux vector entry
860 * since brands may optionally want to manipulate this field.
861 */
862 args->auxp_auxflags =
863 (char *)((char *)args->stackend +
864 ((char *)&aux->a_type -
865 (char *)bigwad->elfargs));
866 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
867
868 /*
869 * Hardware capability flag word (performance hints)
870 * Used for choosing faster library routines.
871 * (Potentially different between 32-bit and 64-bit ABIs)
872 */
873 #if defined(_LP64)
874 if (args->to_model == DATAMODEL_NATIVE) {
875 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
876 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
877 } else {
878 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
879 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
880 }
881 #else
882 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
883 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
884 #endif
885 if (branded) {
886 /*
887 * Reserve space for the brand-private aux vectors,
1257 kmem_free(*shstrbasep, *shstrsizep);
1258 return (err);
1259 }
1260
1261 /*
1262 * Make sure the strtab is null-terminated to make sure we
1263 * don't run off the end of the table.
1264 */
1265 (*shstrbasep)[*shstrsizep - 1] = '\0';
1266
1267 return (0);
1268 }
1269
1270 static int
1271 mapelfexec(
1272 vnode_t *vp,
1273 Ehdr *ehdr,
1274 int nphdrs,
1275 caddr_t phdrbase,
1276 Phdr **uphdr,
1277 Phdr **intphdr,
1278 Phdr **stphdr,
1279 Phdr **dtphdr,
1280 Phdr *dataphdrp,
1281 caddr_t *bssbase,
1282 caddr_t *brkbase,
1283 intptr_t *voffset,
1284 intptr_t *minaddr,
1285 size_t len,
1286 long *execsz,
1287 size_t *brksize)
1288 {
1289 Phdr *phdr;
1290 int i, prot, error;
1291 caddr_t addr = NULL;
1292 size_t zfodsz;
1293 int ptload = 0;
1294 int page;
1295 off_t offset;
1296 int hsize = ehdr->e_phentsize;
1297 caddr_t mintmp = (caddr_t)-1;
1298 extern int use_brk_lpg;
1299
1300 if (ehdr->e_type == ET_DYN) {
1301 secflagset_t flags = 0;
1302 /*
1303 * Obtain the virtual address of a hole in the
1304 * address space to map the "interpreter".
1305 */
1306 if (secflag_enabled(curproc, PROC_SEC_ASLR))
1307 flags |= _MAP_RANDOMIZE;
1308
1309 map_addr(&addr, len, (offset_t)0, 1, flags);
1310 if (addr == NULL)
1311 return (ENOMEM);
1312 *voffset = (intptr_t)addr;
1313
1314 /*
1315 * Calculate the minimum vaddr so it can be subtracted out.
1316 * According to the ELF specification, since PT_LOAD sections
1317 * must be sorted by increasing p_vaddr values, this is
1318 * guaranteed to be the first PT_LOAD section.
1319 */
1320 phdr = (Phdr *)phdrbase;
1321 for (i = nphdrs; i > 0; i--) {
1322 if (phdr->p_type == PT_LOAD) {
1323 *voffset -= (uintptr_t)phdr->p_vaddr;
1324 break;
1325 }
1326 phdr = (Phdr *)((caddr_t)phdr + hsize);
1327 }
1328
1329 } else {
1330 *voffset = 0;
1331 }
1332 phdr = (Phdr *)phdrbase;
1333 for (i = nphdrs; i > 0; i--) {
1334 switch (phdr->p_type) {
1335 case PT_LOAD:
1336 if ((*intphdr != NULL) && (*uphdr == NULL))
1337 return (0);
1338
1339 ptload = 1;
1340 prot = PROT_USER;
1341 if (phdr->p_flags & PF_R)
1342 prot |= PROT_READ;
1343 if (phdr->p_flags & PF_W)
1344 prot |= PROT_WRITE;
1345 if (phdr->p_flags & PF_X)
1346 prot |= PROT_EXEC;
1347
1348 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1349
1350 /*
1351 * Keep track of the segment with the lowest starting
1352 * address.
1353 */
1354 if (addr < mintmp)
1355 mintmp = addr;
1356
1371 */
1372 if (brksize != NULL && use_brk_lpg &&
1373 zfodsz != 0 && phdr == dataphdrp &&
1374 (prot & PROT_WRITE)) {
1375 size_t tlen = P2NPHASE((uintptr_t)addr +
1376 phdr->p_filesz, PAGESIZE);
1377
1378 if (zfodsz > tlen) {
1379 curproc->p_brkpageszc =
1380 page_szc(map_pgsz(MAPPGSZ_HEAP,
1381 curproc, addr + phdr->p_filesz +
1382 tlen, zfodsz - tlen, 0));
1383 }
1384 }
1385
1386 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1387 (prot & PROT_WRITE)) {
1388 uint_t szc = curproc->p_brkpageszc;
1389 size_t pgsz = page_get_pagesize(szc);
1390 caddr_t ebss = addr + phdr->p_memsz;
1391 /*
1392 * If we need extra space to keep the BSS an
1393 * integral number of pages in size, some of
1394 * that space may fall beyond p_brkbase, so we
1395 * need to set p_brksize to account for it
1396 * being (logically) part of the brk.
1397 */
1398 size_t extra_zfodsz;
1399
1400 ASSERT(pgsz > PAGESIZE);
1401
1402 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1403
1404 if (error = execmap(vp, addr, phdr->p_filesz,
1405 zfodsz + extra_zfodsz, phdr->p_offset,
1406 prot, page, szc))
1407 goto bad;
1408 if (brksize != NULL)
1409 *brksize = extra_zfodsz;
1410 } else {
1411 if (error = execmap(vp, addr, phdr->p_filesz,
1412 zfodsz, phdr->p_offset, prot, page, 0))
1413 goto bad;
1414 }
1415
1416 if (bssbase != NULL && addr >= *bssbase &&
1417 phdr == dataphdrp) {
1418 *bssbase = addr + phdr->p_filesz;
1419 }
1420 if (brkbase != NULL && addr >= *brkbase) {
1421 *brkbase = addr + phdr->p_memsz;
1422 }
1423
1424 *execsz += btopr(phdr->p_memsz);
1425 break;
1426
1427 case PT_INTERP:
1428 if (ptload)
1429 goto bad;
1430 *intphdr = phdr;
1431 break;
1432
1433 case PT_SHLIB:
1434 *stphdr = phdr;
1435 break;
1436
1437 case PT_PHDR:
1438 if (ptload)
1439 goto bad;
1440 *uphdr = phdr;
1441 break;
1442
1443 case PT_NULL:
1444 case PT_DYNAMIC:
1445 case PT_NOTE:
1446 break;
1447
1448 case PT_SUNWDTRACE:
1449 if (dtphdr != NULL)
1450 *dtphdr = phdr;
1451 break;
1452
1453 default:
1454 break;
1455 }
1456 phdr = (Phdr *)((caddr_t)phdr + hsize);
1457 }
1458
1459 if (minaddr != NULL) {
1460 ASSERT(mintmp != (caddr_t)-1);
1461 *minaddr = (intptr_t)mintmp;
1462 }
1463
1464 if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) {
1465 size_t off;
1466 uintptr_t base = (uintptr_t)*brkbase;
1467 uintptr_t oend = base + *brksize;
1468
1469 ASSERT(ISP2(aslr_max_brk_skew));
1470
1471 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1472 base += P2PHASE(off, aslr_max_brk_skew);
1473 base = P2ROUNDUP(base, PAGESIZE);
1474 *brkbase = (caddr_t)base;
1475 /*
1476 * Above, we set *brksize to account for the possibility we
1477 * had to grow the 'brk' in padding out the BSS to a page
1478 * boundary.
1479 *
1480 * We now need to adjust that based on where we now are
1481 * actually putting the brk.
1482 */
1483 if (oend > base)
1484 *brksize = oend - base;
1485 else
1486 *brksize = 0;
1487 }
1488
1489 return (0);
1490 bad:
1491 if (error == 0)
1492 error = EINVAL;
1493 return (error);
1494 }
1495
1496 int
1497 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1498 rlim64_t rlimit, cred_t *credp)
1499 {
1500 Note note;
1501 int error;
1502
1503 bzero(¬e, sizeof (note));
1504 bcopy("CORE", note.name, 4);
1505 note.nhdr.n_type = type;
1506 /*
1507 * The System V ABI states that n_namesz must be the length of the
1508 * string that follows the Nhdr structure including the terminating
|