1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 /* 29 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/thread.h> 35 #include <sys/sysmacros.h> 36 #include <sys/signal.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/vnode.h> 41 #include <sys/mman.h> 42 #include <sys/kmem.h> 43 #include <sys/proc.h> 44 #include <sys/pathname.h> 45 #include <sys/policy.h> 46 #include <sys/cmn_err.h> 47 #include <sys/systm.h> 48 #include <sys/elf.h> 49 #include <sys/vmsystm.h> 50 #include <sys/debug.h> 51 #include <sys/auxv.h> 52 #include <sys/exec.h> 53 #include <sys/prsystm.h> 54 #include <vm/as.h> 55 #include <vm/rm.h> 56 #include <vm/seg.h> 57 #include <vm/seg_vn.h> 58 #include <sys/modctl.h> 59 #include <sys/systeminfo.h> 60 #include <sys/vmparam.h> 61 #include <sys/machelf.h> 62 #include <sys/shm_impl.h> 63 #include <sys/archsystm.h> 64 #include <sys/fasttrap.h> 65 #include <sys/brand.h> 66 #include "elf_impl.h" 67 #include <sys/sdt.h> 68 #include <sys/siginfo.h> 69 #include <sys/random.h> 70 71 extern int at_flags; 72 extern volatile size_t aslr_max_brk_skew; 73 74 #define ORIGIN_STR "ORIGIN" 75 #define ORIGIN_STR_SIZE 6 76 77 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *); 78 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *, 79 ssize_t *); 80 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *, 81 ssize_t *, caddr_t *, ssize_t *); 82 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *); 83 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t, 84 Phdr **, Phdr **, Phdr **, Phdr **, Phdr *, 85 caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *); 86 87 typedef enum { 88 STR_CTF, 89 STR_SYMTAB, 90 STR_DYNSYM, 91 STR_STRTAB, 92 STR_DYNSTR, 93 STR_SHSTRTAB, 94 STR_NUM 95 } shstrtype_t; 96 97 static const char *shstrtab_data[] = { 98 ".SUNW_ctf", 99 ".symtab", 100 ".dynsym", 101 ".strtab", 102 ".dynstr", 103 ".shstrtab" 104 }; 105 106 typedef struct shstrtab { 107 int sst_ndx[STR_NUM]; 108 int sst_cur; 109 } shstrtab_t; 110 111 static void 112 shstrtab_init(shstrtab_t *s) 113 { 114 bzero(&s->sst_ndx, sizeof (s->sst_ndx)); 115 s->sst_cur = 1; 116 } 117 118 static int 119 shstrtab_ndx(shstrtab_t *s, shstrtype_t type) 120 { 121 int ret; 122 123 if ((ret = s->sst_ndx[type]) != 0) 124 return (ret); 125 126 ret = s->sst_ndx[type] = s->sst_cur; 127 s->sst_cur += strlen(shstrtab_data[type]) + 1; 128 129 return (ret); 130 } 131 132 static size_t 133 shstrtab_size(const shstrtab_t *s) 134 { 135 return (s->sst_cur); 136 } 137 138 static void 139 shstrtab_dump(const shstrtab_t *s, char *buf) 140 { 141 int i, ndx; 142 143 *buf = '\0'; 144 for (i = 0; i < STR_NUM; i++) { 145 if ((ndx = s->sst_ndx[i]) != 0) 146 (void) strcpy(buf + ndx, shstrtab_data[i]); 147 } 148 } 149 150 static int 151 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base) 152 { 153 ASSERT(phdrp->p_type == PT_SUNWDTRACE); 154 155 /* 156 * See the comment in fasttrap.h for information on how to safely 157 * update this program header. 158 */ 159 if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE || 160 (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X)) 161 return (-1); 162 163 args->thrptr = phdrp->p_vaddr + base; 164 165 return (0); 166 } 167 168 static int 169 handle_secflag_dt(proc_t *p, uint_t dt, uint_t val) 170 { 171 uint_t flag; 172 173 switch (dt) { 174 case DT_SUNW_ASLR: 175 flag = PROC_SEC_ASLR; 176 break; 177 default: 178 return (EINVAL); 179 } 180 181 if (val == 0) { 182 if (secflag_isset(p->p_secflags.psf_lower, flag)) 183 return (EPERM); 184 if ((secpolicy_psecflags(CRED(), p, p) != 0) && 185 secflag_isset(p->p_secflags.psf_inherit, flag)) 186 return (EPERM); 187 188 secflag_clear(&p->p_secflags.psf_inherit, flag); 189 secflag_clear(&p->p_secflags.psf_effective, flag); 190 } else { 191 if (!secflag_isset(p->p_secflags.psf_upper, flag)) 192 return (EPERM); 193 194 if ((secpolicy_psecflags(CRED(), p, p) != 0) && 195 !secflag_isset(p->p_secflags.psf_inherit, flag)) 196 return (EPERM); 197 198 secflag_set(&p->p_secflags.psf_inherit, flag); 199 secflag_set(&p->p_secflags.psf_effective, flag); 200 } 201 202 return (0); 203 } 204 205 /* 206 * Map in the executable pointed to by vp. Returns 0 on success. 207 */ 208 int 209 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr, 210 intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase, 211 caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap) 212 { 213 size_t len; 214 struct vattr vat; 215 caddr_t phdrbase = NULL; 216 ssize_t phdrsize; 217 int nshdrs, shstrndx, nphdrs; 218 int error = 0; 219 Phdr *uphdr = NULL; 220 Phdr *junk = NULL; 221 Phdr *dynphdr = NULL; 222 Phdr *dtrphdr = NULL; 223 uintptr_t lddata; 224 long execsz; 225 intptr_t minaddr; 226 227 if (lddatap != NULL) 228 *lddatap = NULL; 229 230 if (error = execpermissions(vp, &vat, args)) { 231 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname); 232 return (error); 233 } 234 235 if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx, 236 &nphdrs)) != 0 || 237 (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase, 238 &phdrsize)) != 0) { 239 uprintf("%s: Cannot read %s\n", exec_file, args->pathname); 240 return (error); 241 } 242 243 if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) { 244 uprintf("%s: Nothing to load in %s", exec_file, args->pathname); 245 kmem_free(phdrbase, phdrsize); 246 return (ENOEXEC); 247 } 248 if (lddatap != NULL) 249 *lddatap = lddata; 250 251 if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr, 252 &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr, 253 len, &execsz, brksize)) { 254 uprintf("%s: Cannot map %s\n", exec_file, args->pathname); 255 kmem_free(phdrbase, phdrsize); 256 return (error); 257 } 258 259 /* 260 * Inform our caller if the executable needs an interpreter. 261 */ 262 *interp = (dynphdr == NULL) ? 0 : 1; 263 264 /* 265 * If this is a statically linked executable, voffset should indicate 266 * the address of the executable itself (it normally holds the address 267 * of the interpreter). 268 */ 269 if (ehdr->e_type == ET_EXEC && *interp == 0) 270 *voffset = minaddr; 271 272 if (uphdr != NULL) { 273 *uphdr_vaddr = uphdr->p_vaddr; 274 } else { 275 *uphdr_vaddr = (Addr)-1; 276 } 277 278 kmem_free(phdrbase, phdrsize); 279 return (error); 280 } 281 282 /*ARGSUSED*/ 283 int 284 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 285 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 286 int brand_action) 287 { 288 caddr_t phdrbase = NULL; 289 caddr_t bssbase = 0; 290 caddr_t brkbase = 0; 291 size_t brksize = 0; 292 ssize_t dlnsize; 293 aux_entry_t *aux; 294 int error; 295 ssize_t resid; 296 int fd = -1; 297 intptr_t voffset; 298 Phdr *intphdr = NULL; 299 Phdr *dynamicphdr = NULL; 300 Phdr *stphdr = NULL; 301 Phdr *uphdr = NULL; 302 Phdr *junk = NULL; 303 size_t len; 304 ssize_t phdrsize; 305 int postfixsize = 0; 306 int i, hsize; 307 Phdr *phdrp; 308 Phdr *dataphdrp = NULL; 309 Phdr *dtrphdr; 310 Phdr *capphdr = NULL; 311 Cap *cap = NULL; 312 ssize_t capsize; 313 Dyn *dyn = NULL; 314 int hasu = 0; 315 int hasauxv = 0; 316 int hasintp = 0; 317 int branded = 0; 318 319 struct proc *p = ttoproc(curthread); 320 struct user *up = PTOU(p); 321 struct bigwad { 322 Ehdr ehdr; 323 aux_entry_t elfargs[__KERN_NAUXV_IMPL]; 324 char dl_name[MAXPATHLEN]; 325 char pathbuf[MAXPATHLEN]; 326 struct vattr vattr; 327 struct execenv exenv; 328 } *bigwad; /* kmem_alloc this behemoth so we don't blow stack */ 329 Ehdr *ehdrp; 330 int nshdrs, shstrndx, nphdrs; 331 char *dlnp; 332 char *pathbufp; 333 rlim64_t limit; 334 rlim64_t roundlimit; 335 336 ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64); 337 338 bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP); 339 ehdrp = &bigwad->ehdr; 340 dlnp = bigwad->dl_name; 341 pathbufp = bigwad->pathbuf; 342 343 /* 344 * Obtain ELF and program header information. 345 */ 346 if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx, 347 &nphdrs)) != 0 || 348 (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase, 349 &phdrsize)) != 0) 350 goto out; 351 352 /* 353 * Prevent executing an ELF file that has no entry point. 354 */ 355 if (ehdrp->e_entry == 0) { 356 uprintf("%s: Bad entry point\n", exec_file); 357 goto bad; 358 } 359 360 /* 361 * Put data model that we're exec-ing to into the args passed to 362 * exec_args(), so it will know what it is copying to on new stack. 363 * Now that we know whether we are exec-ing a 32-bit or 64-bit 364 * executable, we can set execsz with the appropriate NCARGS. 365 */ 366 #ifdef _LP64 367 if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) { 368 args->to_model = DATAMODEL_ILP32; 369 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1); 370 } else { 371 args->to_model = DATAMODEL_LP64; 372 args->stk_prot &= ~PROT_EXEC; 373 #if defined(__i386) || defined(__amd64) 374 args->dat_prot &= ~PROT_EXEC; 375 #endif 376 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1); 377 } 378 #else /* _LP64 */ 379 args->to_model = DATAMODEL_ILP32; 380 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1); 381 #endif /* _LP64 */ 382 383 /* 384 * We delay invoking the brand callback until we've figured out 385 * what kind of elf binary we're trying to run, 32-bit or 64-bit. 386 * We do this because now the brand library can just check 387 * args->to_model to see if the target is 32-bit or 64-bit without 388 * having do duplicate all the code above. 389 * 390 * The level checks associated with brand handling below are used to 391 * prevent a loop since the brand elfexec function typically comes back 392 * through this function. We must check <= here since the nested 393 * handling in the #! interpreter code will increment the level before 394 * calling gexec to run the final elfexec interpreter. 395 */ 396 if ((level <= INTP_MAXDEPTH) && 397 (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { 398 error = BROP(p)->b_elfexec(vp, uap, args, 399 idatap, level + 1, execsz, setid, exec_file, cred, 400 brand_action); 401 goto out; 402 } 403 404 /* 405 * Determine aux size now so that stack can be built 406 * in one shot (except actual copyout of aux image), 407 * determine any non-default stack protections, 408 * and still have this code be machine independent. 409 */ 410 hsize = ehdrp->e_phentsize; 411 phdrp = (Phdr *)phdrbase; 412 for (i = nphdrs; i > 0; i--) { 413 switch (phdrp->p_type) { 414 case PT_INTERP: 415 hasauxv = hasintp = 1; 416 break; 417 case PT_PHDR: 418 hasu = 1; 419 break; 420 case PT_SUNWSTACK: 421 args->stk_prot = PROT_USER; 422 if (phdrp->p_flags & PF_R) 423 args->stk_prot |= PROT_READ; 424 if (phdrp->p_flags & PF_W) 425 args->stk_prot |= PROT_WRITE; 426 if (phdrp->p_flags & PF_X) 427 args->stk_prot |= PROT_EXEC; 428 break; 429 case PT_LOAD: 430 dataphdrp = phdrp; 431 break; 432 case PT_SUNWCAP: 433 capphdr = phdrp; 434 break; 435 case PT_DYNAMIC: 436 dynamicphdr = phdrp; 437 break; 438 } 439 phdrp = (Phdr *)((caddr_t)phdrp + hsize); 440 } 441 442 if (ehdrp->e_type != ET_EXEC) { 443 dataphdrp = NULL; 444 hasauxv = 1; 445 } 446 447 /* Copy BSS permissions to args->dat_prot */ 448 if (dataphdrp != NULL) { 449 args->dat_prot = PROT_USER; 450 if (dataphdrp->p_flags & PF_R) 451 args->dat_prot |= PROT_READ; 452 if (dataphdrp->p_flags & PF_W) 453 args->dat_prot |= PROT_WRITE; 454 if (dataphdrp->p_flags & PF_X) 455 args->dat_prot |= PROT_EXEC; 456 } 457 458 /* 459 * If a auxvector will be required - reserve the space for 460 * it now. This may be increased by exec_args if there are 461 * ISA-specific types (included in __KERN_NAUXV_IMPL). 462 */ 463 if (hasauxv) { 464 /* 465 * If a AUX vector is being built - the base AUX 466 * entries are: 467 * 468 * AT_BASE 469 * AT_FLAGS 470 * AT_PAGESZ 471 * AT_SUN_AUXFLAGS 472 * AT_SUN_HWCAP 473 * AT_SUN_HWCAP2 474 * AT_SUN_PLATFORM (added in stk_copyout) 475 * AT_SUN_EXECNAME (added in stk_copyout) 476 * AT_NULL 477 * 478 * total == 9 479 */ 480 if (hasintp && hasu) { 481 /* 482 * Has PT_INTERP & PT_PHDR - the auxvectors that 483 * will be built are: 484 * 485 * AT_PHDR 486 * AT_PHENT 487 * AT_PHNUM 488 * AT_ENTRY 489 * AT_LDDATA 490 * 491 * total = 5 492 */ 493 args->auxsize = (9 + 5) * sizeof (aux_entry_t); 494 } else if (hasintp) { 495 /* 496 * Has PT_INTERP but no PT_PHDR 497 * 498 * AT_EXECFD 499 * AT_LDDATA 500 * 501 * total = 2 502 */ 503 args->auxsize = (9 + 2) * sizeof (aux_entry_t); 504 } else { 505 args->auxsize = 9 * sizeof (aux_entry_t); 506 } 507 } else { 508 args->auxsize = 0; 509 } 510 511 /* 512 * If this binary is using an emulator, we need to add an 513 * AT_SUN_EMULATOR aux entry. 514 */ 515 if (args->emulator != NULL) 516 args->auxsize += sizeof (aux_entry_t); 517 518 if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) { 519 branded = 1; 520 /* 521 * We will be adding 4 entries to the aux vectors. One for 522 * the the brandname and 3 for the brand specific aux vectors. 523 */ 524 args->auxsize += 4 * sizeof (aux_entry_t); 525 } 526 527 /* If the binary has an explicit ASLR flag, it must be honoured */ 528 if ((dynamicphdr != NULL) && 529 (dynamicphdr->p_filesz > 0)) { 530 Dyn *dp; 531 off_t i = 0; 532 533 #define DYN_STRIDE 100 534 for (i = 0; i < dynamicphdr->p_filesz; 535 i += sizeof (*dyn) * DYN_STRIDE) { 536 int ndyns = (dynamicphdr->p_filesz - i) / sizeof (*dyn); 537 size_t dynsize; 538 539 ndyns = MIN(DYN_STRIDE, ndyns); 540 dynsize = ndyns * sizeof (*dyn); 541 542 dyn = kmem_alloc(dynsize, KM_SLEEP); 543 544 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn, 545 dynsize, (offset_t)(dynamicphdr->p_offset + i), 546 UIO_SYSSPACE, 0, (rlim64_t)0, 547 CRED(), &resid)) != 0) { 548 uprintf("%s: cannot read .dynamic section\n", 549 exec_file); 550 goto out; 551 } 552 553 for (dp = dyn; dp < (dyn + ndyns); dp++) { 554 if (dp->d_tag == DT_SUNW_ASLR) { 555 if ((error = handle_secflag_dt(p, 556 DT_SUNW_ASLR, 557 dp->d_un.d_val)) != 0) { 558 uprintf("%s: error setting " 559 "security-flag from " 560 "DT_SUNW_ASLR: %d\n", 561 exec_file, error); 562 goto out; 563 } 564 } 565 } 566 567 kmem_free(dyn, dynsize); 568 } 569 } 570 571 /* Hardware/Software capabilities */ 572 if (capphdr != NULL && 573 (capsize = capphdr->p_filesz) > 0 && 574 capsize <= 16 * sizeof (*cap)) { 575 int ncaps = capsize / sizeof (*cap); 576 Cap *cp; 577 578 cap = kmem_alloc(capsize, KM_SLEEP); 579 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap, 580 capsize, (offset_t)capphdr->p_offset, 581 UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) { 582 uprintf("%s: Cannot read capabilities section\n", 583 exec_file); 584 goto out; 585 } 586 for (cp = cap; cp < cap + ncaps; cp++) { 587 if (cp->c_tag == CA_SUNW_SF_1 && 588 (cp->c_un.c_val & SF1_SUNW_ADDR32)) { 589 if (args->to_model == DATAMODEL_LP64) 590 args->addr32 = 1; 591 break; 592 } 593 } 594 } 595 596 aux = bigwad->elfargs; 597 /* 598 * Move args to the user's stack. 599 * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries. 600 */ 601 if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) { 602 if (error == -1) { 603 error = ENOEXEC; 604 goto bad; 605 } 606 goto out; 607 } 608 /* we're single threaded after this point */ 609 610 /* 611 * If this is an ET_DYN executable (shared object), 612 * determine its memory size so that mapelfexec() can load it. 613 */ 614 if (ehdrp->e_type == ET_DYN) 615 len = elfsize(ehdrp, nphdrs, phdrbase, NULL); 616 else 617 len = 0; 618 619 dtrphdr = NULL; 620 621 if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr, 622 &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL, 623 len, execsz, &brksize)) != 0) 624 goto bad; 625 626 if (uphdr != NULL && intphdr == NULL) 627 goto bad; 628 629 if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) { 630 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file); 631 goto bad; 632 } 633 634 if (intphdr != NULL) { 635 size_t len; 636 uintptr_t lddata; 637 char *p; 638 struct vnode *nvp; 639 640 dlnsize = intphdr->p_filesz; 641 642 if (dlnsize > MAXPATHLEN || dlnsize <= 0) 643 goto bad; 644 645 /* 646 * Read in "interpreter" pathname. 647 */ 648 if ((error = vn_rdwr(UIO_READ, vp, dlnp, intphdr->p_filesz, 649 (offset_t)intphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0, 650 CRED(), &resid)) != 0) { 651 uprintf("%s: Cannot obtain interpreter pathname\n", 652 exec_file); 653 goto bad; 654 } 655 656 if (resid != 0 || dlnp[dlnsize - 1] != '\0') 657 goto bad; 658 659 /* 660 * Search for '$ORIGIN' token in interpreter path. 661 * If found, expand it. 662 */ 663 for (p = dlnp; p = strchr(p, '$'); ) { 664 uint_t len, curlen; 665 char *_ptr; 666 667 if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE)) 668 continue; 669 670 /* 671 * We don't support $ORIGIN on setid programs to close 672 * a potential attack vector. 673 */ 674 if ((setid & EXECSETID_SETID) != 0) { 675 error = ENOEXEC; 676 goto bad; 677 } 678 679 curlen = 0; 680 len = p - dlnp - 1; 681 if (len) { 682 bcopy(dlnp, pathbufp, len); 683 curlen += len; 684 } 685 if (_ptr = strrchr(args->pathname, '/')) { 686 len = _ptr - args->pathname; 687 if ((curlen + len) > MAXPATHLEN) 688 break; 689 690 bcopy(args->pathname, &pathbufp[curlen], len); 691 curlen += len; 692 } else { 693 /* 694 * executable is a basename found in the 695 * current directory. So - just substitue 696 * '.' for ORIGIN. 697 */ 698 pathbufp[curlen] = '.'; 699 curlen++; 700 } 701 p += ORIGIN_STR_SIZE; 702 len = strlen(p); 703 704 if ((curlen + len) > MAXPATHLEN) 705 break; 706 bcopy(p, &pathbufp[curlen], len); 707 curlen += len; 708 pathbufp[curlen++] = '\0'; 709 bcopy(pathbufp, dlnp, curlen); 710 } 711 712 /* 713 * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1 714 * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1). 715 * Just in case /usr is not mounted, change it now. 716 */ 717 if (strcmp(dlnp, USR_LIB_RTLD) == 0) 718 dlnp += 4; 719 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp); 720 if (error && dlnp != bigwad->dl_name) { 721 /* new kernel, old user-level */ 722 error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW, 723 NULLVPP, &nvp); 724 } 725 if (error) { 726 uprintf("%s: Cannot find %s\n", exec_file, dlnp); 727 goto bad; 728 } 729 730 /* 731 * Setup the "aux" vector. 732 */ 733 if (uphdr) { 734 if (ehdrp->e_type == ET_DYN) { 735 /* don't use the first page */ 736 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE; 737 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE; 738 } else { 739 bigwad->exenv.ex_bssbase = bssbase; 740 bigwad->exenv.ex_brkbase = brkbase; 741 } 742 bigwad->exenv.ex_brksize = brksize; 743 bigwad->exenv.ex_magic = elfmagic; 744 bigwad->exenv.ex_vp = vp; 745 setexecenv(&bigwad->exenv); 746 747 ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset) 748 ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize) 749 ADDAUX(aux, AT_PHNUM, nphdrs) 750 ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset) 751 } else { 752 if ((error = execopen(&vp, &fd)) != 0) { 753 VN_RELE(nvp); 754 goto bad; 755 } 756 757 ADDAUX(aux, AT_EXECFD, fd) 758 } 759 760 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) { 761 VN_RELE(nvp); 762 uprintf("%s: Cannot execute %s\n", exec_file, dlnp); 763 goto bad; 764 } 765 766 /* 767 * Now obtain the ELF header along with the entire program 768 * header contained in "nvp". 769 */ 770 kmem_free(phdrbase, phdrsize); 771 phdrbase = NULL; 772 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs, 773 &shstrndx, &nphdrs)) != 0 || 774 (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase, 775 &phdrsize)) != 0) { 776 VN_RELE(nvp); 777 uprintf("%s: Cannot read %s\n", exec_file, dlnp); 778 goto bad; 779 } 780 781 /* 782 * Determine memory size of the "interpreter's" loadable 783 * sections. This size is then used to obtain the virtual 784 * address of a hole, in the user's address space, large 785 * enough to map the "interpreter". 786 */ 787 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) { 788 VN_RELE(nvp); 789 uprintf("%s: Nothing to load in %s\n", exec_file, dlnp); 790 goto bad; 791 } 792 793 dtrphdr = NULL; 794 795 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk, 796 &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len, 797 execsz, NULL); 798 if (error || junk != NULL) { 799 VN_RELE(nvp); 800 uprintf("%s: Cannot map %s\n", exec_file, dlnp); 801 goto bad; 802 } 803 804 /* 805 * We use the DTrace program header to initialize the 806 * architecture-specific user per-LWP location. The dtrace 807 * fasttrap provider requires ready access to per-LWP scratch 808 * space. We assume that there is only one such program header 809 * in the interpreter. 810 */ 811 if (dtrphdr != NULL && 812 dtrace_safe_phdr(dtrphdr, args, voffset) != 0) { 813 VN_RELE(nvp); 814 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp); 815 goto bad; 816 } 817 818 VN_RELE(nvp); 819 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata) 820 } 821 822 if (hasauxv) { 823 int auxf = AF_SUN_HWCAPVERIFY; 824 /* 825 * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via 826 * exec_args() 827 */ 828 ADDAUX(aux, AT_BASE, voffset) 829 ADDAUX(aux, AT_FLAGS, at_flags) 830 ADDAUX(aux, AT_PAGESZ, PAGESIZE) 831 /* 832 * Linker flags. (security) 833 * p_flag not yet set at this time. 834 * We rely on gexec() to provide us with the information. 835 * If the application is set-uid but this is not reflected 836 * in a mismatch between real/effective uids/gids, then 837 * don't treat this as a set-uid exec. So we care about 838 * the EXECSETID_UGIDS flag but not the ...SETID flag. 839 */ 840 if ((setid &= ~EXECSETID_SETID) != 0) 841 auxf |= AF_SUN_SETUGID; 842 843 /* 844 * If we're running a native process from within a branded 845 * zone under pfexec then we clear the AF_SUN_SETUGID flag so 846 * that the native ld.so.1 is able to link with the native 847 * libraries instead of using the brand libraries that are 848 * installed in the zone. We only do this for processes 849 * which we trust because we see they are already running 850 * under pfexec (where uid != euid). This prevents a 851 * malicious user within the zone from crafting a wrapper to 852 * run native suid commands with unsecure libraries interposed. 853 */ 854 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) && 855 (setid &= ~EXECSETID_SETID) != 0)) 856 auxf &= ~AF_SUN_SETUGID; 857 858 /* 859 * Record the user addr of the auxflags aux vector entry 860 * since brands may optionally want to manipulate this field. 861 */ 862 args->auxp_auxflags = 863 (char *)((char *)args->stackend + 864 ((char *)&aux->a_type - 865 (char *)bigwad->elfargs)); 866 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf); 867 868 /* 869 * Hardware capability flag word (performance hints) 870 * Used for choosing faster library routines. 871 * (Potentially different between 32-bit and 64-bit ABIs) 872 */ 873 #if defined(_LP64) 874 if (args->to_model == DATAMODEL_NATIVE) { 875 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap) 876 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2) 877 } else { 878 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32) 879 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2) 880 } 881 #else 882 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap) 883 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2) 884 #endif 885 if (branded) { 886 /* 887 * Reserve space for the brand-private aux vectors, 888 * and record the user addr of that space. 889 */ 890 args->auxp_brand = 891 (char *)((char *)args->stackend + 892 ((char *)&aux->a_type - 893 (char *)bigwad->elfargs)); 894 ADDAUX(aux, AT_SUN_BRAND_AUX1, 0) 895 ADDAUX(aux, AT_SUN_BRAND_AUX2, 0) 896 ADDAUX(aux, AT_SUN_BRAND_AUX3, 0) 897 } 898 899 ADDAUX(aux, AT_NULL, 0) 900 postfixsize = (char *)aux - (char *)bigwad->elfargs; 901 902 /* 903 * We make assumptions above when we determine how many aux 904 * vector entries we will be adding. However, if we have an 905 * invalid elf file, it is possible that mapelfexec might 906 * behave differently (but not return an error), in which case 907 * the number of aux entries we actually add will be different. 908 * We detect that now and error out. 909 */ 910 if (postfixsize != args->auxsize) { 911 DTRACE_PROBE2(elfexec_badaux, int, postfixsize, 912 int, args->auxsize); 913 goto bad; 914 } 915 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t)); 916 } 917 918 /* 919 * For the 64-bit kernel, the limit is big enough that rounding it up 920 * to a page can overflow the 64-bit limit, so we check for btopr() 921 * overflowing here by comparing it with the unrounded limit in pages. 922 * If it hasn't overflowed, compare the exec size with the rounded up 923 * limit in pages. Otherwise, just compare with the unrounded limit. 924 */ 925 limit = btop(p->p_vmem_ctl); 926 roundlimit = btopr(p->p_vmem_ctl); 927 if ((roundlimit > limit && *execsz > roundlimit) || 928 (roundlimit < limit && *execsz > limit)) { 929 mutex_enter(&p->p_lock); 930 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 931 RCA_SAFE); 932 mutex_exit(&p->p_lock); 933 error = ENOMEM; 934 goto bad; 935 } 936 937 bzero(up->u_auxv, sizeof (up->u_auxv)); 938 if (postfixsize) { 939 int num_auxv; 940 941 /* 942 * Copy the aux vector to the user stack. 943 */ 944 error = execpoststack(args, bigwad->elfargs, postfixsize); 945 if (error) 946 goto bad; 947 948 /* 949 * Copy auxv to the process's user structure for use by /proc. 950 * If this is a branded process, the brand's exec routine will 951 * copy it's private entries to the user structure later. It 952 * relies on the fact that the blank entries are at the end. 953 */ 954 num_auxv = postfixsize / sizeof (aux_entry_t); 955 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t)); 956 aux = bigwad->elfargs; 957 for (i = 0; i < num_auxv; i++) { 958 up->u_auxv[i].a_type = aux[i].a_type; 959 up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val; 960 } 961 } 962 963 /* 964 * Pass back the starting address so we can set the program counter. 965 */ 966 args->entry = (uintptr_t)(ehdrp->e_entry + voffset); 967 968 if (!uphdr) { 969 if (ehdrp->e_type == ET_DYN) { 970 /* 971 * If we are executing a shared library which doesn't 972 * have a interpreter (probably ld.so.1) then 973 * we don't set the brkbase now. Instead we 974 * delay it's setting until the first call 975 * via grow.c::brk(). This permits ld.so.1 to 976 * initialize brkbase to the tail of the executable it 977 * loads (which is where it needs to be). 978 */ 979 bigwad->exenv.ex_brkbase = (caddr_t)0; 980 bigwad->exenv.ex_bssbase = (caddr_t)0; 981 bigwad->exenv.ex_brksize = 0; 982 } else { 983 bigwad->exenv.ex_brkbase = brkbase; 984 bigwad->exenv.ex_bssbase = bssbase; 985 bigwad->exenv.ex_brksize = brksize; 986 } 987 bigwad->exenv.ex_magic = elfmagic; 988 bigwad->exenv.ex_vp = vp; 989 setexecenv(&bigwad->exenv); 990 } 991 992 ASSERT(error == 0); 993 goto out; 994 995 bad: 996 if (fd != -1) /* did we open the a.out yet */ 997 (void) execclose(fd); 998 999 psignal(p, SIGKILL); 1000 1001 if (error == 0) 1002 error = ENOEXEC; 1003 out: 1004 if (phdrbase != NULL) 1005 kmem_free(phdrbase, phdrsize); 1006 if (cap != NULL) 1007 kmem_free(cap, capsize); 1008 kmem_free(bigwad, sizeof (struct bigwad)); 1009 return (error); 1010 } 1011 1012 /* 1013 * Compute the memory size requirement for the ELF file. 1014 */ 1015 static size_t 1016 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata) 1017 { 1018 size_t len; 1019 Phdr *phdrp = (Phdr *)phdrbase; 1020 int hsize = ehdrp->e_phentsize; 1021 int first = 1; 1022 int dfirst = 1; /* first data segment */ 1023 uintptr_t loaddr = 0; 1024 uintptr_t hiaddr = 0; 1025 uintptr_t lo, hi; 1026 int i; 1027 1028 for (i = nphdrs; i > 0; i--) { 1029 if (phdrp->p_type == PT_LOAD) { 1030 lo = phdrp->p_vaddr; 1031 hi = lo + phdrp->p_memsz; 1032 if (first) { 1033 loaddr = lo; 1034 hiaddr = hi; 1035 first = 0; 1036 } else { 1037 if (loaddr > lo) 1038 loaddr = lo; 1039 if (hiaddr < hi) 1040 hiaddr = hi; 1041 } 1042 1043 /* 1044 * save the address of the first data segment 1045 * of a object - used for the AT_SUNW_LDDATA 1046 * aux entry. 1047 */ 1048 if ((lddata != NULL) && dfirst && 1049 (phdrp->p_flags & PF_W)) { 1050 *lddata = lo; 1051 dfirst = 0; 1052 } 1053 } 1054 phdrp = (Phdr *)((caddr_t)phdrp + hsize); 1055 } 1056 1057 len = hiaddr - (loaddr & PAGEMASK); 1058 len = roundup(len, PAGESIZE); 1059 1060 return (len); 1061 } 1062 1063 /* 1064 * Read in the ELF header and program header table. 1065 * SUSV3 requires: 1066 * ENOEXEC File format is not recognized 1067 * EINVAL Format recognized but execution not supported 1068 */ 1069 static int 1070 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx, 1071 int *nphdrs) 1072 { 1073 int error; 1074 ssize_t resid; 1075 1076 /* 1077 * We got here by the first two bytes in ident, 1078 * now read the entire ELF header. 1079 */ 1080 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr, 1081 sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0, 1082 (rlim64_t)0, credp, &resid)) != 0) 1083 return (error); 1084 1085 /* 1086 * Since a separate version is compiled for handling 32-bit and 1087 * 64-bit ELF executables on a 64-bit kernel, the 64-bit version 1088 * doesn't need to be able to deal with 32-bit ELF files. 1089 */ 1090 if (resid != 0 || 1091 ehdr->e_ident[EI_MAG2] != ELFMAG2 || 1092 ehdr->e_ident[EI_MAG3] != ELFMAG3) 1093 return (ENOEXEC); 1094 1095 if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) || 1096 #if defined(_ILP32) || defined(_ELF32_COMPAT) 1097 ehdr->e_ident[EI_CLASS] != ELFCLASS32 || 1098 #else 1099 ehdr->e_ident[EI_CLASS] != ELFCLASS64 || 1100 #endif 1101 !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine, 1102 ehdr->e_flags)) 1103 return (EINVAL); 1104 1105 *nshdrs = ehdr->e_shnum; 1106 *shstrndx = ehdr->e_shstrndx; 1107 *nphdrs = ehdr->e_phnum; 1108 1109 /* 1110 * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need 1111 * to read in the section header at index zero to acces the true 1112 * values for those fields. 1113 */ 1114 if ((*nshdrs == 0 && ehdr->e_shoff != 0) || 1115 *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) { 1116 Shdr shdr; 1117 1118 if (ehdr->e_shoff == 0) 1119 return (EINVAL); 1120 1121 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr, 1122 sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, 1123 (rlim64_t)0, credp, &resid)) != 0) 1124 return (error); 1125 1126 if (*nshdrs == 0) 1127 *nshdrs = shdr.sh_size; 1128 if (*shstrndx == SHN_XINDEX) 1129 *shstrndx = shdr.sh_link; 1130 if (*nphdrs == PN_XNUM && shdr.sh_info != 0) 1131 *nphdrs = shdr.sh_info; 1132 } 1133 1134 return (0); 1135 } 1136 1137 #ifdef _ELF32_COMPAT 1138 extern size_t elf_nphdr_max; 1139 #else 1140 size_t elf_nphdr_max = 1000; 1141 #endif 1142 1143 static int 1144 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs, 1145 caddr_t *phbasep, ssize_t *phsizep) 1146 { 1147 ssize_t resid, minsize; 1148 int err; 1149 1150 /* 1151 * Since we're going to be using e_phentsize to iterate down the 1152 * array of program headers, it must be 8-byte aligned or else 1153 * a we might cause a misaligned access. We use all members through 1154 * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so 1155 * e_phentsize must be at least large enough to include those 1156 * members. 1157 */ 1158 #if !defined(_LP64) || defined(_ELF32_COMPAT) 1159 minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags); 1160 #else 1161 minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz); 1162 #endif 1163 if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3)) 1164 return (EINVAL); 1165 1166 *phsizep = nphdrs * ehdr->e_phentsize; 1167 1168 if (*phsizep > sizeof (Phdr) * elf_nphdr_max) { 1169 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL) 1170 return (ENOMEM); 1171 } else { 1172 *phbasep = kmem_alloc(*phsizep, KM_SLEEP); 1173 } 1174 1175 if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep, 1176 (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0, 1177 credp, &resid)) != 0) { 1178 kmem_free(*phbasep, *phsizep); 1179 *phbasep = NULL; 1180 return (err); 1181 } 1182 1183 return (0); 1184 } 1185 1186 #ifdef _ELF32_COMPAT 1187 extern size_t elf_nshdr_max; 1188 extern size_t elf_shstrtab_max; 1189 #else 1190 size_t elf_nshdr_max = 10000; 1191 size_t elf_shstrtab_max = 100 * 1024; 1192 #endif 1193 1194 1195 static int 1196 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, 1197 int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep, 1198 char **shstrbasep, ssize_t *shstrsizep) 1199 { 1200 ssize_t resid, minsize; 1201 int err; 1202 Shdr *shdr; 1203 1204 /* 1205 * Since we're going to be using e_shentsize to iterate down the 1206 * array of section headers, it must be 8-byte aligned or else 1207 * a we might cause a misaligned access. We use all members through 1208 * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize 1209 * must be at least large enough to include that member. The index 1210 * of the string table section must also be valid. 1211 */ 1212 minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize); 1213 if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) || 1214 shstrndx >= nshdrs) 1215 return (EINVAL); 1216 1217 *shsizep = nshdrs * ehdr->e_shentsize; 1218 1219 if (*shsizep > sizeof (Shdr) * elf_nshdr_max) { 1220 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL) 1221 return (ENOMEM); 1222 } else { 1223 *shbasep = kmem_alloc(*shsizep, KM_SLEEP); 1224 } 1225 1226 if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep, 1227 (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0, 1228 credp, &resid)) != 0) { 1229 kmem_free(*shbasep, *shsizep); 1230 return (err); 1231 } 1232 1233 /* 1234 * Pull the section string table out of the vnode; fail if the size 1235 * is zero. 1236 */ 1237 shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize); 1238 if ((*shstrsizep = shdr->sh_size) == 0) { 1239 kmem_free(*shbasep, *shsizep); 1240 return (EINVAL); 1241 } 1242 1243 if (*shstrsizep > elf_shstrtab_max) { 1244 if ((*shstrbasep = kmem_alloc(*shstrsizep, 1245 KM_NOSLEEP)) == NULL) { 1246 kmem_free(*shbasep, *shsizep); 1247 return (ENOMEM); 1248 } 1249 } else { 1250 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP); 1251 } 1252 1253 if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep, 1254 (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0, 1255 credp, &resid)) != 0) { 1256 kmem_free(*shbasep, *shsizep); 1257 kmem_free(*shstrbasep, *shstrsizep); 1258 return (err); 1259 } 1260 1261 /* 1262 * Make sure the strtab is null-terminated to make sure we 1263 * don't run off the end of the table. 1264 */ 1265 (*shstrbasep)[*shstrsizep - 1] = '\0'; 1266 1267 return (0); 1268 } 1269 1270 static int 1271 mapelfexec( 1272 vnode_t *vp, 1273 Ehdr *ehdr, 1274 int nphdrs, 1275 caddr_t phdrbase, 1276 Phdr **uphdr, 1277 Phdr **intphdr, 1278 Phdr **stphdr, 1279 Phdr **dtphdr, 1280 Phdr *dataphdrp, 1281 caddr_t *bssbase, 1282 caddr_t *brkbase, 1283 intptr_t *voffset, 1284 intptr_t *minaddr, 1285 size_t len, 1286 long *execsz, 1287 size_t *brksize) 1288 { 1289 Phdr *phdr; 1290 int i, prot, error; 1291 caddr_t addr = NULL; 1292 size_t zfodsz; 1293 int ptload = 0; 1294 int page; 1295 off_t offset; 1296 int hsize = ehdr->e_phentsize; 1297 caddr_t mintmp = (caddr_t)-1; 1298 extern int use_brk_lpg; 1299 1300 if (ehdr->e_type == ET_DYN) { 1301 secflagset_t flags = 0; 1302 /* 1303 * Obtain the virtual address of a hole in the 1304 * address space to map the "interpreter". 1305 */ 1306 if (secflag_enabled(curproc, PROC_SEC_ASLR)) 1307 flags |= _MAP_RANDOMIZE; 1308 1309 map_addr(&addr, len, (offset_t)0, 1, flags); 1310 if (addr == NULL) 1311 return (ENOMEM); 1312 *voffset = (intptr_t)addr; 1313 1314 /* 1315 * Calculate the minimum vaddr so it can be subtracted out. 1316 * According to the ELF specification, since PT_LOAD sections 1317 * must be sorted by increasing p_vaddr values, this is 1318 * guaranteed to be the first PT_LOAD section. 1319 */ 1320 phdr = (Phdr *)phdrbase; 1321 for (i = nphdrs; i > 0; i--) { 1322 if (phdr->p_type == PT_LOAD) { 1323 *voffset -= (uintptr_t)phdr->p_vaddr; 1324 break; 1325 } 1326 phdr = (Phdr *)((caddr_t)phdr + hsize); 1327 } 1328 1329 } else { 1330 *voffset = 0; 1331 } 1332 phdr = (Phdr *)phdrbase; 1333 for (i = nphdrs; i > 0; i--) { 1334 switch (phdr->p_type) { 1335 case PT_LOAD: 1336 if ((*intphdr != NULL) && (*uphdr == NULL)) 1337 return (0); 1338 1339 ptload = 1; 1340 prot = PROT_USER; 1341 if (phdr->p_flags & PF_R) 1342 prot |= PROT_READ; 1343 if (phdr->p_flags & PF_W) 1344 prot |= PROT_WRITE; 1345 if (phdr->p_flags & PF_X) 1346 prot |= PROT_EXEC; 1347 1348 addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset); 1349 1350 /* 1351 * Keep track of the segment with the lowest starting 1352 * address. 1353 */ 1354 if (addr < mintmp) 1355 mintmp = addr; 1356 1357 zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz; 1358 1359 offset = phdr->p_offset; 1360 if (((uintptr_t)offset & PAGEOFFSET) == 1361 ((uintptr_t)addr & PAGEOFFSET) && 1362 (!(vp->v_flag & VNOMAP))) { 1363 page = 1; 1364 } else { 1365 page = 0; 1366 } 1367 1368 /* 1369 * Set the heap pagesize for OOB when the bss size 1370 * is known and use_brk_lpg is not 0. 1371 */ 1372 if (brksize != NULL && use_brk_lpg && 1373 zfodsz != 0 && phdr == dataphdrp && 1374 (prot & PROT_WRITE)) { 1375 size_t tlen = P2NPHASE((uintptr_t)addr + 1376 phdr->p_filesz, PAGESIZE); 1377 1378 if (zfodsz > tlen) { 1379 curproc->p_brkpageszc = 1380 page_szc(map_pgsz(MAPPGSZ_HEAP, 1381 curproc, addr + phdr->p_filesz + 1382 tlen, zfodsz - tlen, 0)); 1383 } 1384 } 1385 1386 if (curproc->p_brkpageszc != 0 && phdr == dataphdrp && 1387 (prot & PROT_WRITE)) { 1388 uint_t szc = curproc->p_brkpageszc; 1389 size_t pgsz = page_get_pagesize(szc); 1390 caddr_t ebss = addr + phdr->p_memsz; 1391 /* 1392 * If we need extra space to keep the BSS an 1393 * integral number of pages in size, some of 1394 * that space may fall beyond p_brkbase, so we 1395 * need to set p_brksize to account for it 1396 * being (logically) part of the brk. 1397 */ 1398 size_t extra_zfodsz; 1399 1400 ASSERT(pgsz > PAGESIZE); 1401 1402 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz); 1403 1404 if (error = execmap(vp, addr, phdr->p_filesz, 1405 zfodsz + extra_zfodsz, phdr->p_offset, 1406 prot, page, szc)) 1407 goto bad; 1408 if (brksize != NULL) 1409 *brksize = extra_zfodsz; 1410 } else { 1411 if (error = execmap(vp, addr, phdr->p_filesz, 1412 zfodsz, phdr->p_offset, prot, page, 0)) 1413 goto bad; 1414 } 1415 1416 if (bssbase != NULL && addr >= *bssbase && 1417 phdr == dataphdrp) { 1418 *bssbase = addr + phdr->p_filesz; 1419 } 1420 if (brkbase != NULL && addr >= *brkbase) { 1421 *brkbase = addr + phdr->p_memsz; 1422 } 1423 1424 *execsz += btopr(phdr->p_memsz); 1425 break; 1426 1427 case PT_INTERP: 1428 if (ptload) 1429 goto bad; 1430 *intphdr = phdr; 1431 break; 1432 1433 case PT_SHLIB: 1434 *stphdr = phdr; 1435 break; 1436 1437 case PT_PHDR: 1438 if (ptload) 1439 goto bad; 1440 *uphdr = phdr; 1441 break; 1442 1443 case PT_NULL: 1444 case PT_DYNAMIC: 1445 case PT_NOTE: 1446 break; 1447 1448 case PT_SUNWDTRACE: 1449 if (dtphdr != NULL) 1450 *dtphdr = phdr; 1451 break; 1452 1453 default: 1454 break; 1455 } 1456 phdr = (Phdr *)((caddr_t)phdr + hsize); 1457 } 1458 1459 if (minaddr != NULL) { 1460 ASSERT(mintmp != (caddr_t)-1); 1461 *minaddr = (intptr_t)mintmp; 1462 } 1463 1464 if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) { 1465 size_t off; 1466 uintptr_t base = (uintptr_t)*brkbase; 1467 uintptr_t oend = base + *brksize; 1468 1469 ASSERT(ISP2(aslr_max_brk_skew)); 1470 1471 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off)); 1472 base += P2PHASE(off, aslr_max_brk_skew); 1473 base = P2ROUNDUP(base, PAGESIZE); 1474 *brkbase = (caddr_t)base; 1475 /* 1476 * Above, we set *brksize to account for the possibility we 1477 * had to grow the 'brk' in padding out the BSS to a page 1478 * boundary. 1479 * 1480 * We now need to adjust that based on where we now are 1481 * actually putting the brk. 1482 */ 1483 if (oend > base) 1484 *brksize = oend - base; 1485 else 1486 *brksize = 0; 1487 } 1488 1489 return (0); 1490 bad: 1491 if (error == 0) 1492 error = EINVAL; 1493 return (error); 1494 } 1495 1496 int 1497 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc, 1498 rlim64_t rlimit, cred_t *credp) 1499 { 1500 Note note; 1501 int error; 1502 1503 bzero(¬e, sizeof (note)); 1504 bcopy("CORE", note.name, 4); 1505 note.nhdr.n_type = type; 1506 /* 1507 * The System V ABI states that n_namesz must be the length of the 1508 * string that follows the Nhdr structure including the terminating 1509 * null. The ABI also specifies that sufficient padding should be 1510 * included so that the description that follows the name string 1511 * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries 1512 * respectively. However, since this change was not made correctly 1513 * at the time of the 64-bit port, both 32- and 64-bit binaries 1514 * descriptions are only guaranteed to begin on a 4-byte boundary. 1515 */ 1516 note.nhdr.n_namesz = 5; 1517 note.nhdr.n_descsz = roundup(descsz, sizeof (Word)); 1518 1519 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, ¬e, 1520 sizeof (note), rlimit, credp)) 1521 return (error); 1522 1523 *offsetp += sizeof (note); 1524 1525 if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc, 1526 note.nhdr.n_descsz, rlimit, credp)) 1527 return (error); 1528 1529 *offsetp += note.nhdr.n_descsz; 1530 return (0); 1531 } 1532 1533 /* 1534 * Copy the section data from one vnode to the section of another vnode. 1535 */ 1536 static void 1537 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset, 1538 void *buf, size_t size, cred_t *credp, rlim64_t rlimit) 1539 { 1540 ssize_t resid; 1541 size_t len, n = src->sh_size; 1542 offset_t off = 0; 1543 1544 while (n != 0) { 1545 len = MIN(size, n); 1546 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off, 1547 UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 || 1548 resid >= len || 1549 core_write(dst_vp, UIO_SYSSPACE, *doffset + off, 1550 buf, len - resid, rlimit, credp) != 0) { 1551 dst->sh_size = 0; 1552 dst->sh_offset = 0; 1553 return; 1554 } 1555 1556 ASSERT(n >= len - resid); 1557 1558 n -= len - resid; 1559 off += len - resid; 1560 } 1561 1562 *doffset += src->sh_size; 1563 } 1564 1565 #ifdef _ELF32_COMPAT 1566 extern size_t elf_datasz_max; 1567 #else 1568 size_t elf_datasz_max = 1 * 1024 * 1024; 1569 #endif 1570 1571 /* 1572 * This function processes mappings that correspond to load objects to 1573 * examine their respective sections for elfcore(). It's called once with 1574 * v set to NULL to count the number of sections that we're going to need 1575 * and then again with v set to some allocated buffer that we fill in with 1576 * all the section data. 1577 */ 1578 static int 1579 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp, 1580 Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp) 1581 { 1582 vnode_t *lastvp = NULL; 1583 struct seg *seg; 1584 int i, j; 1585 void *data = NULL; 1586 size_t datasz = 0; 1587 shstrtab_t shstrtab; 1588 struct as *as = p->p_as; 1589 int error = 0; 1590 1591 if (v != NULL) 1592 shstrtab_init(&shstrtab); 1593 1594 i = 1; 1595 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1596 uint_t prot; 1597 vnode_t *mvp; 1598 void *tmp = NULL; 1599 caddr_t saddr = seg->s_base; 1600 caddr_t naddr; 1601 caddr_t eaddr; 1602 size_t segsize; 1603 1604 Ehdr ehdr; 1605 int nshdrs, shstrndx, nphdrs; 1606 caddr_t shbase; 1607 ssize_t shsize; 1608 char *shstrbase; 1609 ssize_t shstrsize; 1610 1611 Shdr *shdr; 1612 const char *name; 1613 size_t sz; 1614 uintptr_t off; 1615 1616 int ctf_ndx = 0; 1617 int symtab_ndx = 0; 1618 1619 /* 1620 * Since we're just looking for text segments of load 1621 * objects, we only care about the protection bits; we don't 1622 * care about the actual size of the segment so we use the 1623 * reserved size. If the segment's size is zero, there's 1624 * something fishy going on so we ignore this segment. 1625 */ 1626 if (seg->s_ops != &segvn_ops || 1627 SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 || 1628 mvp == lastvp || mvp == NULL || mvp->v_type != VREG || 1629 (segsize = pr_getsegsize(seg, 1)) == 0) 1630 continue; 1631 1632 eaddr = saddr + segsize; 1633 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr); 1634 pr_getprot_done(&tmp); 1635 1636 /* 1637 * Skip this segment unless the protection bits look like 1638 * what we'd expect for a text segment. 1639 */ 1640 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC) 1641 continue; 1642 1643 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx, 1644 &nphdrs) != 0 || 1645 getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx, 1646 &shbase, &shsize, &shstrbase, &shstrsize) != 0) 1647 continue; 1648 1649 off = ehdr.e_shentsize; 1650 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) { 1651 Shdr *symtab = NULL, *strtab; 1652 1653 shdr = (Shdr *)(shbase + off); 1654 1655 if (shdr->sh_name >= shstrsize) 1656 continue; 1657 1658 name = shstrbase + shdr->sh_name; 1659 1660 if (strcmp(name, shstrtab_data[STR_CTF]) == 0) { 1661 if ((content & CC_CONTENT_CTF) == 0 || 1662 ctf_ndx != 0) 1663 continue; 1664 1665 if (shdr->sh_link > 0 && 1666 shdr->sh_link < nshdrs) { 1667 symtab = (Shdr *)(shbase + 1668 shdr->sh_link * ehdr.e_shentsize); 1669 } 1670 1671 if (v != NULL && i < nv - 1) { 1672 if (shdr->sh_size > datasz && 1673 shdr->sh_size <= elf_datasz_max) { 1674 if (data != NULL) 1675 kmem_free(data, datasz); 1676 1677 datasz = shdr->sh_size; 1678 data = kmem_alloc(datasz, 1679 KM_SLEEP); 1680 } 1681 1682 v[i].sh_name = shstrtab_ndx(&shstrtab, 1683 STR_CTF); 1684 v[i].sh_addr = (Addr)(uintptr_t)saddr; 1685 v[i].sh_type = SHT_PROGBITS; 1686 v[i].sh_addralign = 4; 1687 *doffsetp = roundup(*doffsetp, 1688 v[i].sh_addralign); 1689 v[i].sh_offset = *doffsetp; 1690 v[i].sh_size = shdr->sh_size; 1691 if (symtab == NULL) { 1692 v[i].sh_link = 0; 1693 } else if (symtab->sh_type == 1694 SHT_SYMTAB && 1695 symtab_ndx != 0) { 1696 v[i].sh_link = 1697 symtab_ndx; 1698 } else { 1699 v[i].sh_link = i + 1; 1700 } 1701 1702 copy_scn(shdr, mvp, &v[i], vp, 1703 doffsetp, data, datasz, credp, 1704 rlimit); 1705 } 1706 1707 ctf_ndx = i++; 1708 1709 /* 1710 * We've already dumped the symtab. 1711 */ 1712 if (symtab != NULL && 1713 symtab->sh_type == SHT_SYMTAB && 1714 symtab_ndx != 0) 1715 continue; 1716 1717 } else if (strcmp(name, 1718 shstrtab_data[STR_SYMTAB]) == 0) { 1719 if ((content & CC_CONTENT_SYMTAB) == 0 || 1720 symtab != 0) 1721 continue; 1722 1723 symtab = shdr; 1724 } 1725 1726 if (symtab != NULL) { 1727 if ((symtab->sh_type != SHT_DYNSYM && 1728 symtab->sh_type != SHT_SYMTAB) || 1729 symtab->sh_link == 0 || 1730 symtab->sh_link >= nshdrs) 1731 continue; 1732 1733 strtab = (Shdr *)(shbase + 1734 symtab->sh_link * ehdr.e_shentsize); 1735 1736 if (strtab->sh_type != SHT_STRTAB) 1737 continue; 1738 1739 if (v != NULL && i < nv - 2) { 1740 sz = MAX(symtab->sh_size, 1741 strtab->sh_size); 1742 if (sz > datasz && 1743 sz <= elf_datasz_max) { 1744 if (data != NULL) 1745 kmem_free(data, datasz); 1746 1747 datasz = sz; 1748 data = kmem_alloc(datasz, 1749 KM_SLEEP); 1750 } 1751 1752 if (symtab->sh_type == SHT_DYNSYM) { 1753 v[i].sh_name = shstrtab_ndx( 1754 &shstrtab, STR_DYNSYM); 1755 v[i + 1].sh_name = shstrtab_ndx( 1756 &shstrtab, STR_DYNSTR); 1757 } else { 1758 v[i].sh_name = shstrtab_ndx( 1759 &shstrtab, STR_SYMTAB); 1760 v[i + 1].sh_name = shstrtab_ndx( 1761 &shstrtab, STR_STRTAB); 1762 } 1763 1764 v[i].sh_type = symtab->sh_type; 1765 v[i].sh_addr = symtab->sh_addr; 1766 if (ehdr.e_type == ET_DYN || 1767 v[i].sh_addr == 0) 1768 v[i].sh_addr += 1769 (Addr)(uintptr_t)saddr; 1770 v[i].sh_addralign = 1771 symtab->sh_addralign; 1772 *doffsetp = roundup(*doffsetp, 1773 v[i].sh_addralign); 1774 v[i].sh_offset = *doffsetp; 1775 v[i].sh_size = symtab->sh_size; 1776 v[i].sh_link = i + 1; 1777 v[i].sh_entsize = symtab->sh_entsize; 1778 v[i].sh_info = symtab->sh_info; 1779 1780 copy_scn(symtab, mvp, &v[i], vp, 1781 doffsetp, data, datasz, credp, 1782 rlimit); 1783 1784 v[i + 1].sh_type = SHT_STRTAB; 1785 v[i + 1].sh_flags = SHF_STRINGS; 1786 v[i + 1].sh_addr = symtab->sh_addr; 1787 if (ehdr.e_type == ET_DYN || 1788 v[i + 1].sh_addr == 0) 1789 v[i + 1].sh_addr += 1790 (Addr)(uintptr_t)saddr; 1791 v[i + 1].sh_addralign = 1792 strtab->sh_addralign; 1793 *doffsetp = roundup(*doffsetp, 1794 v[i + 1].sh_addralign); 1795 v[i + 1].sh_offset = *doffsetp; 1796 v[i + 1].sh_size = strtab->sh_size; 1797 1798 copy_scn(strtab, mvp, &v[i + 1], vp, 1799 doffsetp, data, datasz, credp, 1800 rlimit); 1801 } 1802 1803 if (symtab->sh_type == SHT_SYMTAB) 1804 symtab_ndx = i; 1805 i += 2; 1806 } 1807 } 1808 1809 kmem_free(shstrbase, shstrsize); 1810 kmem_free(shbase, shsize); 1811 1812 lastvp = mvp; 1813 } 1814 1815 if (v == NULL) { 1816 if (i == 1) 1817 *nshdrsp = 0; 1818 else 1819 *nshdrsp = i + 1; 1820 goto done; 1821 } 1822 1823 if (i != nv - 1) { 1824 cmn_err(CE_WARN, "elfcore: core dump failed for " 1825 "process %d; address space is changing", p->p_pid); 1826 error = EIO; 1827 goto done; 1828 } 1829 1830 v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB); 1831 v[i].sh_size = shstrtab_size(&shstrtab); 1832 v[i].sh_addralign = 1; 1833 *doffsetp = roundup(*doffsetp, v[i].sh_addralign); 1834 v[i].sh_offset = *doffsetp; 1835 v[i].sh_flags = SHF_STRINGS; 1836 v[i].sh_type = SHT_STRTAB; 1837 1838 if (v[i].sh_size > datasz) { 1839 if (data != NULL) 1840 kmem_free(data, datasz); 1841 1842 datasz = v[i].sh_size; 1843 data = kmem_alloc(datasz, 1844 KM_SLEEP); 1845 } 1846 1847 shstrtab_dump(&shstrtab, data); 1848 1849 if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp, 1850 data, v[i].sh_size, rlimit, credp)) != 0) 1851 goto done; 1852 1853 *doffsetp += v[i].sh_size; 1854 1855 done: 1856 if (data != NULL) 1857 kmem_free(data, datasz); 1858 1859 return (error); 1860 } 1861 1862 int 1863 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig, 1864 core_content_t content) 1865 { 1866 offset_t poffset, soffset; 1867 Off doffset; 1868 int error, i, nphdrs, nshdrs; 1869 int overflow = 0; 1870 struct seg *seg; 1871 struct as *as = p->p_as; 1872 union { 1873 Ehdr ehdr; 1874 Phdr phdr[1]; 1875 Shdr shdr[1]; 1876 } *bigwad; 1877 size_t bigsize; 1878 size_t phdrsz, shdrsz; 1879 Ehdr *ehdr; 1880 Phdr *v; 1881 caddr_t brkbase; 1882 size_t brksize; 1883 caddr_t stkbase; 1884 size_t stksize; 1885 int ntries = 0; 1886 klwp_t *lwp = ttolwp(curthread); 1887 1888 top: 1889 /* 1890 * Make sure we have everything we need (registers, etc.). 1891 * All other lwps have already stopped and are in an orderly state. 1892 */ 1893 ASSERT(p == ttoproc(curthread)); 1894 prstop(0, 0); 1895 1896 AS_LOCK_ENTER(as, RW_WRITER); 1897 nphdrs = prnsegs(as, 0) + 2; /* two CORE note sections */ 1898 1899 /* 1900 * Count the number of section headers we're going to need. 1901 */ 1902 nshdrs = 0; 1903 if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) { 1904 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0, 1905 NULL, &nshdrs); 1906 } 1907 AS_LOCK_EXIT(as); 1908 1909 ASSERT(nshdrs == 0 || nshdrs > 1); 1910 1911 /* 1912 * The core file contents may required zero section headers, but if 1913 * we overflow the 16 bits allotted to the program header count in 1914 * the ELF header, we'll need that program header at index zero. 1915 */ 1916 if (nshdrs == 0 && nphdrs >= PN_XNUM) 1917 nshdrs = 1; 1918 1919 phdrsz = nphdrs * sizeof (Phdr); 1920 shdrsz = nshdrs * sizeof (Shdr); 1921 1922 bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz)); 1923 bigwad = kmem_alloc(bigsize, KM_SLEEP); 1924 1925 ehdr = &bigwad->ehdr; 1926 bzero(ehdr, sizeof (*ehdr)); 1927 1928 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1929 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1930 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1931 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1932 ehdr->e_ident[EI_CLASS] = ELFCLASS; 1933 ehdr->e_type = ET_CORE; 1934 1935 #if !defined(_LP64) || defined(_ELF32_COMPAT) 1936 1937 #if defined(__sparc) 1938 ehdr->e_ident[EI_DATA] = ELFDATA2MSB; 1939 ehdr->e_machine = EM_SPARC; 1940 #elif defined(__i386) || defined(__i386_COMPAT) 1941 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 1942 ehdr->e_machine = EM_386; 1943 #else 1944 #error "no recognized machine type is defined" 1945 #endif 1946 1947 #else /* !defined(_LP64) || defined(_ELF32_COMPAT) */ 1948 1949 #if defined(__sparc) 1950 ehdr->e_ident[EI_DATA] = ELFDATA2MSB; 1951 ehdr->e_machine = EM_SPARCV9; 1952 #elif defined(__amd64) 1953 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 1954 ehdr->e_machine = EM_AMD64; 1955 #else 1956 #error "no recognized 64-bit machine type is defined" 1957 #endif 1958 1959 #endif /* !defined(_LP64) || defined(_ELF32_COMPAT) */ 1960 1961 /* 1962 * If the count of program headers or section headers or the index 1963 * of the section string table can't fit in the mere 16 bits 1964 * shortsightedly allotted to them in the ELF header, we use the 1965 * extended formats and put the real values in the section header 1966 * as index 0. 1967 */ 1968 ehdr->e_version = EV_CURRENT; 1969 ehdr->e_ehsize = sizeof (Ehdr); 1970 1971 if (nphdrs >= PN_XNUM) 1972 ehdr->e_phnum = PN_XNUM; 1973 else 1974 ehdr->e_phnum = (unsigned short)nphdrs; 1975 1976 ehdr->e_phoff = sizeof (Ehdr); 1977 ehdr->e_phentsize = sizeof (Phdr); 1978 1979 if (nshdrs > 0) { 1980 if (nshdrs >= SHN_LORESERVE) 1981 ehdr->e_shnum = 0; 1982 else 1983 ehdr->e_shnum = (unsigned short)nshdrs; 1984 1985 if (nshdrs - 1 >= SHN_LORESERVE) 1986 ehdr->e_shstrndx = SHN_XINDEX; 1987 else 1988 ehdr->e_shstrndx = (unsigned short)(nshdrs - 1); 1989 1990 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs; 1991 ehdr->e_shentsize = sizeof (Shdr); 1992 } 1993 1994 if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr, 1995 sizeof (Ehdr), rlimit, credp)) 1996 goto done; 1997 1998 poffset = sizeof (Ehdr); 1999 soffset = sizeof (Ehdr) + phdrsz; 2000 doffset = sizeof (Ehdr) + phdrsz + shdrsz; 2001 2002 v = &bigwad->phdr[0]; 2003 bzero(v, phdrsz); 2004 2005 setup_old_note_header(&v[0], p); 2006 v[0].p_offset = doffset = roundup(doffset, sizeof (Word)); 2007 doffset += v[0].p_filesz; 2008 2009 setup_note_header(&v[1], p); 2010 v[1].p_offset = doffset = roundup(doffset, sizeof (Word)); 2011 doffset += v[1].p_filesz; 2012 2013 mutex_enter(&p->p_lock); 2014 2015 brkbase = p->p_brkbase; 2016 brksize = p->p_brksize; 2017 2018 stkbase = p->p_usrstack - p->p_stksize; 2019 stksize = p->p_stksize; 2020 2021 mutex_exit(&p->p_lock); 2022 2023 AS_LOCK_ENTER(as, RW_WRITER); 2024 i = 2; 2025 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 2026 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); 2027 caddr_t saddr, naddr; 2028 void *tmp = NULL; 2029 extern struct seg_ops segspt_shmops; 2030 2031 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { 2032 uint_t prot; 2033 size_t size; 2034 int type; 2035 vnode_t *mvp; 2036 2037 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); 2038 prot &= PROT_READ | PROT_WRITE | PROT_EXEC; 2039 if ((size = (size_t)(naddr - saddr)) == 0) 2040 continue; 2041 if (i == nphdrs) { 2042 overflow++; 2043 continue; 2044 } 2045 v[i].p_type = PT_LOAD; 2046 v[i].p_vaddr = (Addr)(uintptr_t)saddr; 2047 v[i].p_memsz = size; 2048 if (prot & PROT_READ) 2049 v[i].p_flags |= PF_R; 2050 if (prot & PROT_WRITE) 2051 v[i].p_flags |= PF_W; 2052 if (prot & PROT_EXEC) 2053 v[i].p_flags |= PF_X; 2054 2055 /* 2056 * Figure out which mappings to include in the core. 2057 */ 2058 type = SEGOP_GETTYPE(seg, saddr); 2059 2060 if (saddr == stkbase && size == stksize) { 2061 if (!(content & CC_CONTENT_STACK)) 2062 goto exclude; 2063 2064 } else if (saddr == brkbase && size == brksize) { 2065 if (!(content & CC_CONTENT_HEAP)) 2066 goto exclude; 2067 2068 } else if (seg->s_ops == &segspt_shmops) { 2069 if (type & MAP_NORESERVE) { 2070 if (!(content & CC_CONTENT_DISM)) 2071 goto exclude; 2072 } else { 2073 if (!(content & CC_CONTENT_ISM)) 2074 goto exclude; 2075 } 2076 2077 } else if (seg->s_ops != &segvn_ops) { 2078 goto exclude; 2079 2080 } else if (type & MAP_SHARED) { 2081 if (shmgetid(p, saddr) != SHMID_NONE) { 2082 if (!(content & CC_CONTENT_SHM)) 2083 goto exclude; 2084 2085 } else if (SEGOP_GETVP(seg, seg->s_base, 2086 &mvp) != 0 || mvp == NULL || 2087 mvp->v_type != VREG) { 2088 if (!(content & CC_CONTENT_SHANON)) 2089 goto exclude; 2090 2091 } else { 2092 if (!(content & CC_CONTENT_SHFILE)) 2093 goto exclude; 2094 } 2095 2096 } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 || 2097 mvp == NULL || mvp->v_type != VREG) { 2098 if (!(content & CC_CONTENT_ANON)) 2099 goto exclude; 2100 2101 } else if (prot == (PROT_READ | PROT_EXEC)) { 2102 if (!(content & CC_CONTENT_TEXT)) 2103 goto exclude; 2104 2105 } else if (prot == PROT_READ) { 2106 if (!(content & CC_CONTENT_RODATA)) 2107 goto exclude; 2108 2109 } else { 2110 if (!(content & CC_CONTENT_DATA)) 2111 goto exclude; 2112 } 2113 2114 doffset = roundup(doffset, sizeof (Word)); 2115 v[i].p_offset = doffset; 2116 v[i].p_filesz = size; 2117 doffset += size; 2118 exclude: 2119 i++; 2120 } 2121 ASSERT(tmp == NULL); 2122 } 2123 AS_LOCK_EXIT(as); 2124 2125 if (overflow || i != nphdrs) { 2126 if (ntries++ == 0) { 2127 kmem_free(bigwad, bigsize); 2128 overflow = 0; 2129 goto top; 2130 } 2131 cmn_err(CE_WARN, "elfcore: core dump failed for " 2132 "process %d; address space is changing", p->p_pid); 2133 error = EIO; 2134 goto done; 2135 } 2136 2137 if ((error = core_write(vp, UIO_SYSSPACE, poffset, 2138 v, phdrsz, rlimit, credp)) != 0) 2139 goto done; 2140 2141 if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit, 2142 credp)) != 0) 2143 goto done; 2144 2145 if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit, 2146 credp, content)) != 0) 2147 goto done; 2148 2149 for (i = 2; i < nphdrs; i++) { 2150 prkillinfo_t killinfo; 2151 sigqueue_t *sq; 2152 int sig, j; 2153 2154 if (v[i].p_filesz == 0) 2155 continue; 2156 2157 /* 2158 * If dumping out this segment fails, rather than failing 2159 * the core dump entirely, we reset the size of the mapping 2160 * to zero to indicate that the data is absent from the core 2161 * file and or in the PF_SUNW_FAILURE flag to differentiate 2162 * this from mappings that were excluded due to the core file 2163 * content settings. 2164 */ 2165 if ((error = core_seg(p, vp, v[i].p_offset, 2166 (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz, 2167 rlimit, credp)) == 0) { 2168 continue; 2169 } 2170 2171 if ((sig = lwp->lwp_cursig) == 0) { 2172 /* 2173 * We failed due to something other than a signal. 2174 * Since the space reserved for the segment is now 2175 * unused, we stash the errno in the first four 2176 * bytes. This undocumented interface will let us 2177 * understand the nature of the failure. 2178 */ 2179 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset, 2180 &error, sizeof (error), rlimit, credp); 2181 2182 v[i].p_filesz = 0; 2183 v[i].p_flags |= PF_SUNW_FAILURE; 2184 if ((error = core_write(vp, UIO_SYSSPACE, 2185 poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]), 2186 rlimit, credp)) != 0) 2187 goto done; 2188 2189 continue; 2190 } 2191 2192 /* 2193 * We took a signal. We want to abort the dump entirely, but 2194 * we also want to indicate what failed and why. We therefore 2195 * use the space reserved for the first failing segment to 2196 * write our error (which, for purposes of compatability with 2197 * older core dump readers, we set to EINTR) followed by any 2198 * siginfo associated with the signal. 2199 */ 2200 bzero(&killinfo, sizeof (killinfo)); 2201 killinfo.prk_error = EINTR; 2202 2203 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo; 2204 2205 if (sq != NULL) { 2206 bcopy(&sq->sq_info, &killinfo.prk_info, 2207 sizeof (sq->sq_info)); 2208 } else { 2209 killinfo.prk_info.si_signo = lwp->lwp_cursig; 2210 killinfo.prk_info.si_code = SI_NOINFO; 2211 } 2212 2213 #if (defined(_SYSCALL32_IMPL) || defined(_LP64)) 2214 /* 2215 * If this is a 32-bit process, we need to translate from the 2216 * native siginfo to the 32-bit variant. (Core readers must 2217 * always have the same data model as their target or must 2218 * be aware of -- and compensate for -- data model differences.) 2219 */ 2220 if (curproc->p_model == DATAMODEL_ILP32) { 2221 siginfo32_t si32; 2222 2223 siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32); 2224 bcopy(&si32, &killinfo.prk_info, sizeof (si32)); 2225 } 2226 #endif 2227 2228 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset, 2229 &killinfo, sizeof (killinfo), rlimit, credp); 2230 2231 /* 2232 * For the segment on which we took the signal, indicate that 2233 * its data now refers to a siginfo. 2234 */ 2235 v[i].p_filesz = 0; 2236 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED | 2237 PF_SUNW_SIGINFO; 2238 2239 /* 2240 * And for every other segment, indicate that its absence 2241 * is due to a signal. 2242 */ 2243 for (j = i + 1; j < nphdrs; j++) { 2244 v[j].p_filesz = 0; 2245 v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED; 2246 } 2247 2248 /* 2249 * Finally, write out our modified program headers. 2250 */ 2251 if ((error = core_write(vp, UIO_SYSSPACE, 2252 poffset + sizeof (v[i]) * i, &v[i], 2253 sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0) 2254 goto done; 2255 2256 break; 2257 } 2258 2259 if (nshdrs > 0) { 2260 bzero(&bigwad->shdr[0], shdrsz); 2261 2262 if (nshdrs >= SHN_LORESERVE) 2263 bigwad->shdr[0].sh_size = nshdrs; 2264 2265 if (nshdrs - 1 >= SHN_LORESERVE) 2266 bigwad->shdr[0].sh_link = nshdrs - 1; 2267 2268 if (nphdrs >= PN_XNUM) 2269 bigwad->shdr[0].sh_info = nphdrs; 2270 2271 if (nshdrs > 1) { 2272 AS_LOCK_ENTER(as, RW_WRITER); 2273 if ((error = process_scns(content, p, credp, vp, 2274 &bigwad->shdr[0], nshdrs, rlimit, &doffset, 2275 NULL)) != 0) { 2276 AS_LOCK_EXIT(as); 2277 goto done; 2278 } 2279 AS_LOCK_EXIT(as); 2280 } 2281 2282 if ((error = core_write(vp, UIO_SYSSPACE, soffset, 2283 &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0) 2284 goto done; 2285 } 2286 2287 done: 2288 kmem_free(bigwad, bigsize); 2289 return (error); 2290 } 2291 2292 #ifndef _ELF32_COMPAT 2293 2294 static struct execsw esw = { 2295 #ifdef _LP64 2296 elf64magicstr, 2297 #else /* _LP64 */ 2298 elf32magicstr, 2299 #endif /* _LP64 */ 2300 0, 2301 5, 2302 elfexec, 2303 elfcore 2304 }; 2305 2306 static struct modlexec modlexec = { 2307 &mod_execops, "exec module for elf", &esw 2308 }; 2309 2310 #ifdef _LP64 2311 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args, 2312 intpdata_t *idatap, int level, long *execsz, 2313 int setid, caddr_t exec_file, cred_t *cred, 2314 int brand_action); 2315 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp, 2316 rlim64_t rlimit, int sig, core_content_t content); 2317 2318 static struct execsw esw32 = { 2319 elf32magicstr, 2320 0, 2321 5, 2322 elf32exec, 2323 elf32core 2324 }; 2325 2326 static struct modlexec modlexec32 = { 2327 &mod_execops, "32-bit exec module for elf", &esw32 2328 }; 2329 #endif /* _LP64 */ 2330 2331 static struct modlinkage modlinkage = { 2332 MODREV_1, 2333 (void *)&modlexec, 2334 #ifdef _LP64 2335 (void *)&modlexec32, 2336 #endif /* _LP64 */ 2337 NULL 2338 }; 2339 2340 int 2341 _init(void) 2342 { 2343 return (mod_install(&modlinkage)); 2344 } 2345 2346 int 2347 _fini(void) 2348 { 2349 return (mod_remove(&modlinkage)); 2350 } 2351 2352 int 2353 _info(struct modinfo *modinfop) 2354 { 2355 return (mod_info(&modlinkage, modinfop)); 2356 } 2357 2358 #endif /* !_ELF32_COMPAT */