1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 /*
  29  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/user.h>
  39 #include <sys/errno.h>
  40 #include <sys/vnode.h>
  41 #include <sys/mman.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/systm.h>
  47 #include <sys/elf.h>
  48 #include <sys/vmsystm.h>
  49 #include <sys/debug.h>
  50 #include <sys/auxv.h>
  51 #include <sys/exec.h>
  52 #include <sys/prsystm.h>
  53 #include <vm/as.h>
  54 #include <vm/rm.h>
  55 #include <vm/seg.h>
  56 #include <vm/seg_vn.h>
  57 #include <sys/modctl.h>
  58 #include <sys/systeminfo.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/machelf.h>
  61 #include <sys/shm_impl.h>
  62 #include <sys/archsystm.h>
  63 #include <sys/fasttrap.h>
  64 #include <sys/brand.h>
  65 #include "elf_impl.h"
  66 #include <sys/sdt.h>
  67 #include <sys/siginfo.h>
  68 #include <sys/random.h>
  69 
  70 extern int at_flags;
  71 extern volatile size_t aslr_max_brk_skew;
  72 
  73 #define ORIGIN_STR      "ORIGIN"
  74 #define ORIGIN_STR_SIZE 6
  75 
  76 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  77 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  78     ssize_t *);
  79 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  80     ssize_t *, caddr_t *, ssize_t *);
  81 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  82 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  83     Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  84     caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  85 
  86 typedef enum {
  87         STR_CTF,
  88         STR_SYMTAB,
  89         STR_DYNSYM,
  90         STR_STRTAB,
  91         STR_DYNSTR,
  92         STR_SHSTRTAB,
  93         STR_NUM
  94 } shstrtype_t;
  95 
  96 static const char *shstrtab_data[] = {
  97         ".SUNW_ctf",
  98         ".symtab",
  99         ".dynsym",
 100         ".strtab",
 101         ".dynstr",
 102         ".shstrtab"
 103 };
 104 
 105 typedef struct shstrtab {
 106         int     sst_ndx[STR_NUM];
 107         int     sst_cur;
 108 } shstrtab_t;
 109 
 110 static void
 111 shstrtab_init(shstrtab_t *s)
 112 {
 113         bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 114         s->sst_cur = 1;
 115 }
 116 
 117 static int
 118 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 119 {
 120         int ret;
 121 
 122         if ((ret = s->sst_ndx[type]) != 0)
 123                 return (ret);
 124 
 125         ret = s->sst_ndx[type] = s->sst_cur;
 126         s->sst_cur += strlen(shstrtab_data[type]) + 1;
 127 
 128         return (ret);
 129 }
 130 
 131 static size_t
 132 shstrtab_size(const shstrtab_t *s)
 133 {
 134         return (s->sst_cur);
 135 }
 136 
 137 static void
 138 shstrtab_dump(const shstrtab_t *s, char *buf)
 139 {
 140         int i, ndx;
 141 
 142         *buf = '\0';
 143         for (i = 0; i < STR_NUM; i++) {
 144                 if ((ndx = s->sst_ndx[i]) != 0)
 145                         (void) strcpy(buf + ndx, shstrtab_data[i]);
 146         }
 147 }
 148 
 149 static int
 150 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 151 {
 152         ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 153 
 154         /*
 155          * See the comment in fasttrap.h for information on how to safely
 156          * update this program header.
 157          */
 158         if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 159             (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 160                 return (-1);
 161 
 162         args->thrptr = phdrp->p_vaddr + base;
 163 
 164         return (0);
 165 }
 166 
 167 /*
 168  * Map in the executable pointed to by vp. Returns 0 on success.
 169  */
 170 int
 171 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 172     intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
 173     caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
 174 {
 175         size_t          len;
 176         struct vattr    vat;
 177         caddr_t         phdrbase = NULL;
 178         ssize_t         phdrsize;
 179         int             nshdrs, shstrndx, nphdrs;
 180         int             error = 0;
 181         Phdr            *uphdr = NULL;
 182         Phdr            *junk = NULL;
 183         Phdr            *dynphdr = NULL;
 184         Phdr            *dtrphdr = NULL;
 185         uintptr_t       lddata;
 186         long            execsz;
 187         intptr_t        minaddr;
 188 
 189         if (lddatap != NULL)
 190                 *lddatap = NULL;
 191 
 192         if (error = execpermissions(vp, &vat, args)) {
 193                 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 194                 return (error);
 195         }
 196 
 197         if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 198             &nphdrs)) != 0 ||
 199             (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 200             &phdrsize)) != 0) {
 201                 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 202                 return (error);
 203         }
 204 
 205         if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 206                 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
 207                 kmem_free(phdrbase, phdrsize);
 208                 return (ENOEXEC);
 209         }
 210         if (lddatap != NULL)
 211                 *lddatap = lddata;
 212 
 213         if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 214             &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 215             len, &execsz, brksize)) {
 216                 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
 217                 kmem_free(phdrbase, phdrsize);
 218                 return (error);
 219         }
 220 
 221         /*
 222          * Inform our caller if the executable needs an interpreter.
 223          */
 224         *interp = (dynphdr == NULL) ? 0 : 1;
 225 
 226         /*
 227          * If this is a statically linked executable, voffset should indicate
 228          * the address of the executable itself (it normally holds the address
 229          * of the interpreter).
 230          */
 231         if (ehdr->e_type == ET_EXEC && *interp == 0)
 232                 *voffset = minaddr;
 233 
 234         if (uphdr != NULL) {
 235                 *uphdr_vaddr = uphdr->p_vaddr;
 236         } else {
 237                 *uphdr_vaddr = (Addr)-1;
 238         }
 239 
 240         kmem_free(phdrbase, phdrsize);
 241         return (error);
 242 }
 243 
 244 /*ARGSUSED*/
 245 int
 246 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 247     int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 248     int brand_action)
 249 {
 250         caddr_t         phdrbase = NULL;
 251         caddr_t         bssbase = 0;
 252         caddr_t         brkbase = 0;
 253         size_t          brksize = 0;
 254         ssize_t         dlnsize;
 255         aux_entry_t     *aux;
 256         int             error;
 257         ssize_t         resid;
 258         int             fd = -1;
 259         intptr_t        voffset;
 260         Phdr            *intphdr = NULL;
 261         Phdr            *dynamicphdr = NULL;
 262         Phdr            *stphdr = NULL;
 263         Phdr            *uphdr = NULL;
 264         Phdr            *junk = NULL;
 265         size_t          len;
 266         ssize_t         phdrsize;
 267         int             postfixsize = 0;
 268         int             i, hsize;
 269         Phdr            *phdrp;
 270         Phdr            *dataphdrp = NULL;
 271         Phdr            *dtrphdr;
 272         Phdr            *capphdr = NULL;
 273         Cap             *cap = NULL;
 274         ssize_t         capsize;
 275         Dyn             *dyn = NULL;
 276         ssize_t         dynsize;
 277         int             hasu = 0;
 278         int             hasauxv = 0;
 279         int             hasintp = 0;
 280         int             branded = 0;
 281 
 282         struct proc *p = ttoproc(curthread);
 283         struct user *up = PTOU(p);
 284         struct bigwad {
 285                 Ehdr    ehdr;
 286                 aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 287                 char            dl_name[MAXPATHLEN];
 288                 char            pathbuf[MAXPATHLEN];
 289                 struct vattr    vattr;
 290                 struct execenv  exenv;
 291         } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 292         Ehdr            *ehdrp;
 293         int             nshdrs, shstrndx, nphdrs;
 294         char            *dlnp;
 295         char            *pathbufp;
 296         rlim64_t        limit;
 297         rlim64_t        roundlimit;
 298 
 299         ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 300 
 301         bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 302         ehdrp = &bigwad->ehdr;
 303         dlnp = bigwad->dl_name;
 304         pathbufp = bigwad->pathbuf;
 305 
 306         /*
 307          * Obtain ELF and program header information.
 308          */
 309         if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 310             &nphdrs)) != 0 ||
 311             (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 312             &phdrsize)) != 0)
 313                 goto out;
 314 
 315         /*
 316          * Prevent executing an ELF file that has no entry point.
 317          */
 318         if (ehdrp->e_entry == 0) {
 319                 uprintf("%s: Bad entry point\n", exec_file);
 320                 goto bad;
 321         }
 322 
 323         /*
 324          * Put data model that we're exec-ing to into the args passed to
 325          * exec_args(), so it will know what it is copying to on new stack.
 326          * Now that we know whether we are exec-ing a 32-bit or 64-bit
 327          * executable, we can set execsz with the appropriate NCARGS.
 328          */
 329 #ifdef  _LP64
 330         if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 331                 args->to_model = DATAMODEL_ILP32;
 332                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 333         } else {
 334                 args->to_model = DATAMODEL_LP64;
 335                 args->stk_prot &= ~PROT_EXEC;
 336 #if defined(__i386) || defined(__amd64)
 337                 args->dat_prot &= ~PROT_EXEC;
 338 #endif
 339                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 340         }
 341 #else   /* _LP64 */
 342         args->to_model = DATAMODEL_ILP32;
 343         *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 344 #endif  /* _LP64 */
 345 
 346         /*
 347          * We delay invoking the brand callback until we've figured out
 348          * what kind of elf binary we're trying to run, 32-bit or 64-bit.
 349          * We do this because now the brand library can just check
 350          * args->to_model to see if the target is 32-bit or 64-bit without
 351          * having do duplicate all the code above.
 352          */
 353         if ((level < 2) &&
 354             (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 355                 error = BROP(p)->b_elfexec(vp, uap, args,
 356                     idatap, level + 1, execsz, setid, exec_file, cred,
 357                     brand_action);
 358                 goto out;
 359         }
 360 
 361         /*
 362          * Determine aux size now so that stack can be built
 363          * in one shot (except actual copyout of aux image),
 364          * determine any non-default stack protections,
 365          * and still have this code be machine independent.
 366          */
 367         hsize = ehdrp->e_phentsize;
 368         phdrp = (Phdr *)phdrbase;
 369         for (i = nphdrs; i > 0; i--) {
 370                 switch (phdrp->p_type) {
 371                 case PT_INTERP:
 372                         hasauxv = hasintp = 1;
 373                         break;
 374                 case PT_PHDR:
 375                         hasu = 1;
 376                         break;
 377                 case PT_SUNWSTACK:
 378                         args->stk_prot = PROT_USER;
 379                         if (phdrp->p_flags & PF_R)
 380                                 args->stk_prot |= PROT_READ;
 381                         if (phdrp->p_flags & PF_W)
 382                                 args->stk_prot |= PROT_WRITE;
 383                         if (phdrp->p_flags & PF_X)
 384                                 args->stk_prot |= PROT_EXEC;
 385                         break;
 386                 case PT_LOAD:
 387                         dataphdrp = phdrp;
 388                         break;
 389                 case PT_SUNWCAP:
 390                         capphdr = phdrp;
 391                         break;
 392                 case PT_DYNAMIC:
 393                         dynamicphdr = phdrp;
 394                         break;
 395                 }
 396                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 397         }
 398 
 399         if (ehdrp->e_type != ET_EXEC) {
 400                 dataphdrp = NULL;
 401                 hasauxv = 1;
 402         }
 403 
 404         /* Copy BSS permissions to args->dat_prot */
 405         if (dataphdrp != NULL) {
 406                 args->dat_prot = PROT_USER;
 407                 if (dataphdrp->p_flags & PF_R)
 408                         args->dat_prot |= PROT_READ;
 409                 if (dataphdrp->p_flags & PF_W)
 410                         args->dat_prot |= PROT_WRITE;
 411                 if (dataphdrp->p_flags & PF_X)
 412                         args->dat_prot |= PROT_EXEC;
 413         }
 414 
 415         /*
 416          * If a auxvector will be required - reserve the space for
 417          * it now.  This may be increased by exec_args if there are
 418          * ISA-specific types (included in __KERN_NAUXV_IMPL).
 419          */
 420         if (hasauxv) {
 421                 /*
 422                  * If a AUX vector is being built - the base AUX
 423                  * entries are:
 424                  *
 425                  *      AT_BASE
 426                  *      AT_FLAGS
 427                  *      AT_PAGESZ
 428                  *      AT_SUN_AUXFLAGS
 429                  *      AT_SUN_HWCAP
 430                  *      AT_SUN_HWCAP2
 431                  *      AT_SUN_SECFLAGS
 432                  *      AT_SUN_PLATFORM (added in stk_copyout)
 433                  *      AT_SUN_EXECNAME (added in stk_copyout)
 434                  *      AT_NULL
 435                  *
 436                  * total == 10
 437                  */
 438                 if (hasintp && hasu) {
 439                         /*
 440                          * Has PT_INTERP & PT_PHDR - the auxvectors that
 441                          * will be built are:
 442                          *
 443                          *      AT_PHDR
 444                          *      AT_PHENT
 445                          *      AT_PHNUM
 446                          *      AT_ENTRY
 447                          *      AT_LDDATA
 448                          *
 449                          * total = 5
 450                          */
 451                         args->auxsize = (10 + 5) * sizeof (aux_entry_t);
 452                 } else if (hasintp) {
 453                         /*
 454                          * Has PT_INTERP but no PT_PHDR
 455                          *
 456                          *      AT_EXECFD
 457                          *      AT_LDDATA
 458                          *
 459                          * total = 2
 460                          */
 461                         args->auxsize = (10 + 2) * sizeof (aux_entry_t);
 462                 } else {
 463                         args->auxsize = 10 * sizeof (aux_entry_t);
 464                 }
 465         } else {
 466                 args->auxsize = 0;
 467         }
 468 
 469         /*
 470          * If this binary is using an emulator, we need to add an
 471          * AT_SUN_EMULATOR aux entry.
 472          */
 473         if (args->emulator != NULL)
 474                 args->auxsize += sizeof (aux_entry_t);
 475 
 476         if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 477                 branded = 1;
 478                 /*
 479                  * We will be adding 4 entries to the aux vectors.  One for
 480                  * the the brandname and 3 for the brand specific aux vectors.
 481                  */
 482                 args->auxsize += 4 * sizeof (aux_entry_t);
 483         }
 484 
 485         /* If the binary has an explicit ASLR flag, it must be honoured */
 486         if (dynamicphdr != NULL) {
 487                 Dyn *dp;
 488 
 489                 dynsize = dynamicphdr->p_filesz;
 490                 dyn = kmem_alloc(dynsize, KM_SLEEP);
 491 
 492                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)dyn, dynsize,
 493                     (offset_t)dynamicphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
 494                     CRED(), &resid)) != 0) {
 495                         uprintf("%s: cannot read .dynamic section\n",
 496                             exec_file);
 497                         goto out;
 498                 }
 499 
 500                 if (resid != 0)
 501                         goto out;
 502 
 503                 dp = dyn;
 504                 while (dp->d_tag != DT_NULL) {
 505                         if (dp->d_tag == DT_SUNW_ASLR) {
 506                                 if (dp->d_un.d_val != 0) {
 507                                         curproc->p_secflags.psf_effective |=
 508                                             PROC_SEC_ASLR;
 509                                         curproc->p_secflags.psf_inherit |=
 510                                             PROC_SEC_ASLR;
 511 
 512                                 } else {
 513                                         curproc->p_secflags.psf_effective &=
 514                                             ~PROC_SEC_ASLR;
 515                                         curproc->p_secflags.psf_inherit &=
 516                                             ~PROC_SEC_ASLR;
 517                                 }
 518                         }
 519                         dp++;
 520                 }
 521         }
 522 
 523         /* Hardware/Software capabilities */
 524         if (capphdr != NULL &&
 525             (capsize = capphdr->p_filesz) > 0 &&
 526             capsize <= 16 * sizeof (*cap)) {
 527                 int ncaps = capsize / sizeof (*cap);
 528                 Cap *cp;
 529 
 530                 cap = kmem_alloc(capsize, KM_SLEEP);
 531                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 532                     capsize, (offset_t)capphdr->p_offset,
 533                     UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 534                         uprintf("%s: Cannot read capabilities section\n",
 535                             exec_file);
 536                         goto out;
 537                 }
 538                 for (cp = cap; cp < cap + ncaps; cp++) {
 539                         if (cp->c_tag == CA_SUNW_SF_1 &&
 540                             (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
 541                                 if (args->to_model == DATAMODEL_LP64)
 542                                         args->addr32 = 1;
 543                                 break;
 544                         }
 545                 }
 546         }
 547 
 548         aux = bigwad->elfargs;
 549         /*
 550          * Move args to the user's stack.
 551          * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 552          */
 553         if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 554                 if (error == -1) {
 555                         error = ENOEXEC;
 556                         goto bad;
 557                 }
 558                 goto out;
 559         }
 560         /* we're single threaded after this point */
 561 
 562         /*
 563          * If this is an ET_DYN executable (shared object),
 564          * determine its memory size so that mapelfexec() can load it.
 565          */
 566         if (ehdrp->e_type == ET_DYN)
 567                 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
 568         else
 569                 len = 0;
 570 
 571         dtrphdr = NULL;
 572 
 573         if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &intphdr,
 574             &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 575             len, execsz, &brksize)) != 0)
 576                 goto bad;
 577 
 578         if (uphdr != NULL && intphdr == NULL)
 579                 goto bad;
 580 
 581         if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 582                 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 583                 goto bad;
 584         }
 585 
 586         if (intphdr != NULL) {
 587                 size_t          len;
 588                 uintptr_t       lddata;
 589                 char            *p;
 590                 struct vnode    *nvp;
 591 
 592                 dlnsize = intphdr->p_filesz;
 593 
 594                 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 595                         goto bad;
 596 
 597                 /*
 598                  * Read in "interpreter" pathname.
 599                  */
 600                 if ((error = vn_rdwr(UIO_READ, vp, dlnp, intphdr->p_filesz,
 601                     (offset_t)intphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
 602                     CRED(), &resid)) != 0) {
 603                         uprintf("%s: Cannot obtain interpreter pathname\n",
 604                             exec_file);
 605                         goto bad;
 606                 }
 607 
 608                 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 609                         goto bad;
 610 
 611                 /*
 612                  * Search for '$ORIGIN' token in interpreter path.
 613                  * If found, expand it.
 614                  */
 615                 for (p = dlnp; p = strchr(p, '$'); ) {
 616                         uint_t  len, curlen;
 617                         char    *_ptr;
 618 
 619                         if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 620                                 continue;
 621 
 622                         curlen = 0;
 623                         len = p - dlnp - 1;
 624                         if (len) {
 625                                 bcopy(dlnp, pathbufp, len);
 626                                 curlen += len;
 627                         }
 628                         if (_ptr = strrchr(args->pathname, '/')) {
 629                                 len = _ptr - args->pathname;
 630                                 if ((curlen + len) > MAXPATHLEN)
 631                                         break;
 632 
 633                                 bcopy(args->pathname, &pathbufp[curlen], len);
 634                                 curlen += len;
 635                         } else {
 636                                 /*
 637                                  * executable is a basename found in the
 638                                  * current directory.  So - just substitue
 639                                  * '.' for ORIGIN.
 640                                  */
 641                                 pathbufp[curlen] = '.';
 642                                 curlen++;
 643                         }
 644                         p += ORIGIN_STR_SIZE;
 645                         len = strlen(p);
 646 
 647                         if ((curlen + len) > MAXPATHLEN)
 648                                 break;
 649                         bcopy(p, &pathbufp[curlen], len);
 650                         curlen += len;
 651                         pathbufp[curlen++] = '\0';
 652                         bcopy(pathbufp, dlnp, curlen);
 653                 }
 654 
 655                 /*
 656                  * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 657                  * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 658                  * Just in case /usr is not mounted, change it now.
 659                  */
 660                 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 661                         dlnp += 4;
 662                 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 663                 if (error && dlnp != bigwad->dl_name) {
 664                         /* new kernel, old user-level */
 665                         error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 666                             NULLVPP, &nvp);
 667                 }
 668                 if (error) {
 669                         uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 670                         goto bad;
 671                 }
 672 
 673                 /*
 674                  * Setup the "aux" vector.
 675                  */
 676                 if (uphdr) {
 677                         if (ehdrp->e_type == ET_DYN) {
 678                                 /* don't use the first page */
 679                                 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 680                                 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 681                         } else {
 682                                 bigwad->exenv.ex_bssbase = bssbase;
 683                                 bigwad->exenv.ex_brkbase = brkbase;
 684                         }
 685                         bigwad->exenv.ex_brksize = brksize;
 686                         bigwad->exenv.ex_magic = elfmagic;
 687                         bigwad->exenv.ex_vp = vp;
 688                         setexecenv(&bigwad->exenv);
 689 
 690                         ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 691                         ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 692                         ADDAUX(aux, AT_PHNUM, nphdrs)
 693                         ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 694                 } else {
 695                         if ((error = execopen(&vp, &fd)) != 0) {
 696                                 VN_RELE(nvp);
 697                                 goto bad;
 698                         }
 699 
 700                         ADDAUX(aux, AT_EXECFD, fd)
 701                 }
 702 
 703                 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 704                         VN_RELE(nvp);
 705                         uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 706                         goto bad;
 707                 }
 708 
 709                 /*
 710                  * Now obtain the ELF header along with the entire program
 711                  * header contained in "nvp".
 712                  */
 713                 kmem_free(phdrbase, phdrsize);
 714                 phdrbase = NULL;
 715                 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 716                     &shstrndx, &nphdrs)) != 0 ||
 717                     (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 718                     &phdrsize)) != 0) {
 719                         VN_RELE(nvp);
 720                         uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 721                         goto bad;
 722                 }
 723 
 724                 /*
 725                  * Determine memory size of the "interpreter's" loadable
 726                  * sections.  This size is then used to obtain the virtual
 727                  * address of a hole, in the user's address space, large
 728                  * enough to map the "interpreter".
 729                  */
 730                 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 731                         VN_RELE(nvp);
 732                         uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 733                         goto bad;
 734                 }
 735 
 736                 dtrphdr = NULL;
 737 
 738                 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
 739                     &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 740                     execsz, NULL);
 741                 if (error || junk != NULL) {
 742                         VN_RELE(nvp);
 743                         uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 744                         goto bad;
 745                 }
 746 
 747                 /*
 748                  * We use the DTrace program header to initialize the
 749                  * architecture-specific user per-LWP location. The dtrace
 750                  * fasttrap provider requires ready access to per-LWP scratch
 751                  * space. We assume that there is only one such program header
 752                  * in the interpreter.
 753                  */
 754                 if (dtrphdr != NULL &&
 755                     dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 756                         VN_RELE(nvp);
 757                         uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 758                         goto bad;
 759                 }
 760 
 761                 VN_RELE(nvp);
 762                 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 763         }
 764 
 765         if (hasauxv) {
 766                 int auxf = AF_SUN_HWCAPVERIFY;
 767                 /*
 768                  * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
 769                  * exec_args()
 770                  */
 771                 ADDAUX(aux, AT_BASE, voffset)
 772                 ADDAUX(aux, AT_FLAGS, at_flags)
 773                 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 774                 /*
 775                  * Linker flags. (security)
 776                  * p_flag not yet set at this time.
 777                  * We rely on gexec() to provide us with the information.
 778                  * If the application is set-uid but this is not reflected
 779                  * in a mismatch between real/effective uids/gids, then
 780                  * don't treat this as a set-uid exec.  So we care about
 781                  * the EXECSETID_UGIDS flag but not the ...SETID flag.
 782                  */
 783                 if ((setid &= ~EXECSETID_SETID) != 0)
 784                         auxf |= AF_SUN_SETUGID;
 785 
 786                 /*
 787                  * If we're running a native process from within a branded
 788                  * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 789                  * that the native ld.so.1 is able to link with the native
 790                  * libraries instead of using the brand libraries that are
 791                  * installed in the zone.  We only do this for processes
 792                  * which we trust because we see they are already running
 793                  * under pfexec (where uid != euid).  This prevents a
 794                  * malicious user within the zone from crafting a wrapper to
 795                  * run native suid commands with unsecure libraries interposed.
 796                  */
 797                 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 798                     (setid &= ~EXECSETID_SETID) != 0))
 799                         auxf &= ~AF_SUN_SETUGID;
 800 
 801                 /*
 802                  * Record the user addr of the auxflags aux vector entry
 803                  * since brands may optionally want to manipulate this field.
 804                  */
 805                 args->auxp_auxflags =
 806                     (char *)((char *)args->stackend +
 807                     ((char *)&aux->a_type -
 808                     (char *)bigwad->elfargs));
 809                 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
 810 
 811                 /*
 812                  * Put the effective security-flags into the aux vector, for
 813                  * the sake of flags that need partial (or complete)
 814                  * implementation in userland.
 815                  */
 816                 ADDAUX(aux, AT_SUN_SECFLAGS, p->p_secflags.psf_effective);
 817                 /*
 818                  * Hardware capability flag word (performance hints)
 819                  * Used for choosing faster library routines.
 820                  * (Potentially different between 32-bit and 64-bit ABIs)
 821                  */
 822 #if defined(_LP64)
 823                 if (args->to_model == DATAMODEL_NATIVE) {
 824                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 825                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 826                 } else {
 827                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 828                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 829                 }
 830 #else
 831                 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 832                 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 833 #endif
 834                 if (branded) {
 835                         /*
 836                          * Reserve space for the brand-private aux vectors,
 837                          * and record the user addr of that space.
 838                          */
 839                         args->auxp_brand =
 840                             (char *)((char *)args->stackend +
 841                             ((char *)&aux->a_type -
 842                             (char *)bigwad->elfargs));
 843                         ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 844                         ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 845                         ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
 846                 }
 847 
 848                 ADDAUX(aux, AT_NULL, 0)
 849                 postfixsize = (char *)aux - (char *)bigwad->elfargs;
 850 
 851                 /*
 852                  * We make assumptions above when we determine how many aux
 853                  * vector entries we will be adding. However, if we have an
 854                  * invalid elf file, it is possible that mapelfexec might
 855                  * behave differently (but not return an error), in which case
 856                  * the number of aux entries we actually add will be different.
 857                  * We detect that now and error out.
 858                  */
 859                 if (postfixsize != args->auxsize) {
 860                         DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
 861                             int, args->auxsize);
 862                         goto bad;
 863                 }
 864                 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
 865         }
 866 
 867         /*
 868          * For the 64-bit kernel, the limit is big enough that rounding it up
 869          * to a page can overflow the 64-bit limit, so we check for btopr()
 870          * overflowing here by comparing it with the unrounded limit in pages.
 871          * If it hasn't overflowed, compare the exec size with the rounded up
 872          * limit in pages.  Otherwise, just compare with the unrounded limit.
 873          */
 874         limit = btop(p->p_vmem_ctl);
 875         roundlimit = btopr(p->p_vmem_ctl);
 876         if ((roundlimit > limit && *execsz > roundlimit) ||
 877             (roundlimit < limit && *execsz > limit)) {
 878                 mutex_enter(&p->p_lock);
 879                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
 880                     RCA_SAFE);
 881                 mutex_exit(&p->p_lock);
 882                 error = ENOMEM;
 883                 goto bad;
 884         }
 885 
 886         bzero(up->u_auxv, sizeof (up->u_auxv));
 887         if (postfixsize) {
 888                 int num_auxv;
 889 
 890                 /*
 891                  * Copy the aux vector to the user stack.
 892                  */
 893                 error = execpoststack(args, bigwad->elfargs, postfixsize);
 894                 if (error)
 895                         goto bad;
 896 
 897                 /*
 898                  * Copy auxv to the process's user structure for use by /proc.
 899                  * If this is a branded process, the brand's exec routine will
 900                  * copy it's private entries to the user structure later. It
 901                  * relies on the fact that the blank entries are at the end.
 902                  */
 903                 num_auxv = postfixsize / sizeof (aux_entry_t);
 904                 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
 905                 aux = bigwad->elfargs;
 906                 for (i = 0; i < num_auxv; i++) {
 907                         up->u_auxv[i].a_type = aux[i].a_type;
 908                         up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
 909                 }
 910         }
 911 
 912         /*
 913          * Pass back the starting address so we can set the program counter.
 914          */
 915         args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
 916 
 917         if (!uphdr) {
 918                 if (ehdrp->e_type == ET_DYN) {
 919                         /*
 920                          * If we are executing a shared library which doesn't
 921                          * have a interpreter (probably ld.so.1) then
 922                          * we don't set the brkbase now.  Instead we
 923                          * delay it's setting until the first call
 924                          * via grow.c::brk().  This permits ld.so.1 to
 925                          * initialize brkbase to the tail of the executable it
 926                          * loads (which is where it needs to be).
 927                          */
 928                         bigwad->exenv.ex_brkbase = (caddr_t)0;
 929                         bigwad->exenv.ex_bssbase = (caddr_t)0;
 930                         bigwad->exenv.ex_brksize = 0;
 931                 } else {
 932                         bigwad->exenv.ex_brkbase = brkbase;
 933                         bigwad->exenv.ex_bssbase = bssbase;
 934                         bigwad->exenv.ex_brksize = brksize;
 935                 }
 936                 bigwad->exenv.ex_magic = elfmagic;
 937                 bigwad->exenv.ex_vp = vp;
 938                 setexecenv(&bigwad->exenv);
 939         }
 940 
 941         ASSERT(error == 0);
 942         goto out;
 943 
 944 bad:
 945         if (fd != -1)           /* did we open the a.out yet */
 946                 (void) execclose(fd);
 947 
 948         psignal(p, SIGKILL);
 949 
 950         if (error == 0)
 951                 error = ENOEXEC;
 952 out:
 953         if (phdrbase != NULL)
 954                 kmem_free(phdrbase, phdrsize);
 955         if (cap != NULL)
 956                 kmem_free(cap, capsize);
 957         if (dyn != NULL)
 958                 kmem_free(dyn, dynsize);
 959         kmem_free(bigwad, sizeof (struct bigwad));
 960         return (error);
 961 }
 962 
 963 /*
 964  * Compute the memory size requirement for the ELF file.
 965  */
 966 static size_t
 967 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
 968 {
 969         size_t  len;
 970         Phdr    *phdrp = (Phdr *)phdrbase;
 971         int     hsize = ehdrp->e_phentsize;
 972         int     first = 1;
 973         int     dfirst = 1;     /* first data segment */
 974         uintptr_t loaddr = 0;
 975         uintptr_t hiaddr = 0;
 976         uintptr_t lo, hi;
 977         int     i;
 978 
 979         for (i = nphdrs; i > 0; i--) {
 980                 if (phdrp->p_type == PT_LOAD) {
 981                         lo = phdrp->p_vaddr;
 982                         hi = lo + phdrp->p_memsz;
 983                         if (first) {
 984                                 loaddr = lo;
 985                                 hiaddr = hi;
 986                                 first = 0;
 987                         } else {
 988                                 if (loaddr > lo)
 989                                         loaddr = lo;
 990                                 if (hiaddr < hi)
 991                                         hiaddr = hi;
 992                         }
 993 
 994                         /*
 995                          * save the address of the first data segment
 996                          * of a object - used for the AT_SUNW_LDDATA
 997                          * aux entry.
 998                          */
 999                         if ((lddata != NULL) && dfirst &&
1000                             (phdrp->p_flags & PF_W)) {
1001                                 *lddata = lo;
1002                                 dfirst = 0;
1003                         }
1004                 }
1005                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
1006         }
1007 
1008         len = hiaddr - (loaddr & PAGEMASK);
1009         len = roundup(len, PAGESIZE);
1010 
1011         return (len);
1012 }
1013 
1014 /*
1015  * Read in the ELF header and program header table.
1016  * SUSV3 requires:
1017  *      ENOEXEC File format is not recognized
1018  *      EINVAL  Format recognized but execution not supported
1019  */
1020 static int
1021 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
1022     int *nphdrs)
1023 {
1024         int error;
1025         ssize_t resid;
1026 
1027         /*
1028          * We got here by the first two bytes in ident,
1029          * now read the entire ELF header.
1030          */
1031         if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
1032             sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
1033             (rlim64_t)0, credp, &resid)) != 0)
1034                 return (error);
1035 
1036         /*
1037          * Since a separate version is compiled for handling 32-bit and
1038          * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
1039          * doesn't need to be able to deal with 32-bit ELF files.
1040          */
1041         if (resid != 0 ||
1042             ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1043             ehdr->e_ident[EI_MAG3] != ELFMAG3)
1044                 return (ENOEXEC);
1045 
1046         if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1047 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1048             ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1049 #else
1050             ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1051 #endif
1052             !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1053             ehdr->e_flags))
1054                 return (EINVAL);
1055 
1056         *nshdrs = ehdr->e_shnum;
1057         *shstrndx = ehdr->e_shstrndx;
1058         *nphdrs = ehdr->e_phnum;
1059 
1060         /*
1061          * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1062          * to read in the section header at index zero to acces the true
1063          * values for those fields.
1064          */
1065         if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1066             *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1067                 Shdr shdr;
1068 
1069                 if (ehdr->e_shoff == 0)
1070                         return (EINVAL);
1071 
1072                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1073                     sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1074                     (rlim64_t)0, credp, &resid)) != 0)
1075                         return (error);
1076 
1077                 if (*nshdrs == 0)
1078                         *nshdrs = shdr.sh_size;
1079                 if (*shstrndx == SHN_XINDEX)
1080                         *shstrndx = shdr.sh_link;
1081                 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1082                         *nphdrs = shdr.sh_info;
1083         }
1084 
1085         return (0);
1086 }
1087 
1088 #ifdef _ELF32_COMPAT
1089 extern size_t elf_nphdr_max;
1090 #else
1091 size_t elf_nphdr_max = 1000;
1092 #endif
1093 
1094 static int
1095 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1096     caddr_t *phbasep, ssize_t *phsizep)
1097 {
1098         ssize_t resid, minsize;
1099         int err;
1100 
1101         /*
1102          * Since we're going to be using e_phentsize to iterate down the
1103          * array of program headers, it must be 8-byte aligned or else
1104          * a we might cause a misaligned access. We use all members through
1105          * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1106          * e_phentsize must be at least large enough to include those
1107          * members.
1108          */
1109 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1110         minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1111 #else
1112         minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1113 #endif
1114         if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1115                 return (EINVAL);
1116 
1117         *phsizep = nphdrs * ehdr->e_phentsize;
1118 
1119         if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1120                 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1121                         return (ENOMEM);
1122         } else {
1123                 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1124         }
1125 
1126         if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1127             (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1128             credp, &resid)) != 0) {
1129                 kmem_free(*phbasep, *phsizep);
1130                 *phbasep = NULL;
1131                 return (err);
1132         }
1133 
1134         return (0);
1135 }
1136 
1137 #ifdef _ELF32_COMPAT
1138 extern size_t elf_nshdr_max;
1139 extern size_t elf_shstrtab_max;
1140 #else
1141 size_t elf_nshdr_max = 10000;
1142 size_t elf_shstrtab_max = 100 * 1024;
1143 #endif
1144 
1145 
1146 static int
1147 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1148     int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1149     char **shstrbasep, ssize_t *shstrsizep)
1150 {
1151         ssize_t resid, minsize;
1152         int err;
1153         Shdr *shdr;
1154 
1155         /*
1156          * Since we're going to be using e_shentsize to iterate down the
1157          * array of section headers, it must be 8-byte aligned or else
1158          * a we might cause a misaligned access. We use all members through
1159          * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1160          * must be at least large enough to include that member. The index
1161          * of the string table section must also be valid.
1162          */
1163         minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1164         if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1165             shstrndx >= nshdrs)
1166                 return (EINVAL);
1167 
1168         *shsizep = nshdrs * ehdr->e_shentsize;
1169 
1170         if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1171                 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1172                         return (ENOMEM);
1173         } else {
1174                 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1175         }
1176 
1177         if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1178             (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1179             credp, &resid)) != 0) {
1180                 kmem_free(*shbasep, *shsizep);
1181                 return (err);
1182         }
1183 
1184         /*
1185          * Pull the section string table out of the vnode; fail if the size
1186          * is zero.
1187          */
1188         shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1189         if ((*shstrsizep = shdr->sh_size) == 0) {
1190                 kmem_free(*shbasep, *shsizep);
1191                 return (EINVAL);
1192         }
1193 
1194         if (*shstrsizep > elf_shstrtab_max) {
1195                 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1196                     KM_NOSLEEP)) == NULL) {
1197                         kmem_free(*shbasep, *shsizep);
1198                         return (ENOMEM);
1199                 }
1200         } else {
1201                 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1202         }
1203 
1204         if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1205             (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1206             credp, &resid)) != 0) {
1207                 kmem_free(*shbasep, *shsizep);
1208                 kmem_free(*shstrbasep, *shstrsizep);
1209                 return (err);
1210         }
1211 
1212         /*
1213          * Make sure the strtab is null-terminated to make sure we
1214          * don't run off the end of the table.
1215          */
1216         (*shstrbasep)[*shstrsizep - 1] = '\0';
1217 
1218         return (0);
1219 }
1220 
1221 static int
1222 mapelfexec(
1223         vnode_t *vp,
1224         Ehdr *ehdr,
1225         int nphdrs,
1226         caddr_t phdrbase,
1227         Phdr **uphdr,
1228         Phdr **intphdr,
1229         Phdr **stphdr,
1230         Phdr **dtphdr,
1231         Phdr *dataphdrp,
1232         caddr_t *bssbase,
1233         caddr_t *brkbase,
1234         intptr_t *voffset,
1235         intptr_t *minaddr,
1236         size_t len,
1237         long *execsz,
1238         size_t *brksize)
1239 {
1240         Phdr *phdr;
1241         int i, prot, error;
1242         caddr_t addr = NULL;
1243         size_t zfodsz;
1244         int ptload = 0;
1245         int page;
1246         off_t offset;
1247         int hsize = ehdr->e_phentsize;
1248         caddr_t mintmp = (caddr_t)-1;
1249         extern int use_brk_lpg;
1250 
1251         if (ehdr->e_type == ET_DYN) {
1252                 uint_t flags = 0;
1253                 /*
1254                  * Obtain the virtual address of a hole in the
1255                  * address space to map the "interpreter".
1256                  */
1257                 if (secflag_enabled(curproc, PROC_SEC_ASLR))
1258                         flags |= _MAP_RANDOMIZE;
1259 
1260                 map_addr(&addr, len, (offset_t)0, 1, flags);
1261                 if (addr == NULL)
1262                         return (ENOMEM);
1263                 *voffset = (intptr_t)addr;
1264 
1265                 /*
1266                  * Calculate the minimum vaddr so it can be subtracted out.
1267                  * According to the ELF specification, since PT_LOAD sections
1268                  * must be sorted by increasing p_vaddr values, this is
1269                  * guaranteed to be the first PT_LOAD section.
1270                  */
1271                 phdr = (Phdr *)phdrbase;
1272                 for (i = nphdrs; i > 0; i--) {
1273                         if (phdr->p_type == PT_LOAD) {
1274                                 *voffset -= (uintptr_t)phdr->p_vaddr;
1275                                 break;
1276                         }
1277                         phdr = (Phdr *)((caddr_t)phdr + hsize);
1278                 }
1279 
1280         } else {
1281                 *voffset = 0;
1282         }
1283         phdr = (Phdr *)phdrbase;
1284         for (i = nphdrs; i > 0; i--) {
1285                 switch (phdr->p_type) {
1286                 case PT_LOAD:
1287                         if ((*intphdr != NULL) && (*uphdr == NULL))
1288                                 return (0);
1289 
1290                         ptload = 1;
1291                         prot = PROT_USER;
1292                         if (phdr->p_flags & PF_R)
1293                                 prot |= PROT_READ;
1294                         if (phdr->p_flags & PF_W)
1295                                 prot |= PROT_WRITE;
1296                         if (phdr->p_flags & PF_X)
1297                                 prot |= PROT_EXEC;
1298 
1299                         addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1300 
1301                         /*
1302                          * Keep track of the segment with the lowest starting
1303                          * address.
1304                          */
1305                         if (addr < mintmp)
1306                                 mintmp = addr;
1307 
1308                         zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1309 
1310                         offset = phdr->p_offset;
1311                         if (((uintptr_t)offset & PAGEOFFSET) ==
1312                             ((uintptr_t)addr & PAGEOFFSET) &&
1313                             (!(vp->v_flag & VNOMAP))) {
1314                                 page = 1;
1315                         } else {
1316                                 page = 0;
1317                         }
1318 
1319                         /*
1320                          * Set the heap pagesize for OOB when the bss size
1321                          * is known and use_brk_lpg is not 0.
1322                          */
1323                         if (brksize != NULL && use_brk_lpg &&
1324                             zfodsz != 0 && phdr == dataphdrp &&
1325                             (prot & PROT_WRITE)) {
1326                                 size_t tlen = P2NPHASE((uintptr_t)addr +
1327                                     phdr->p_filesz, PAGESIZE);
1328 
1329                                 if (zfodsz > tlen) {
1330                                         curproc->p_brkpageszc =
1331                                             page_szc(map_pgsz(MAPPGSZ_HEAP,
1332                                             curproc, addr + phdr->p_filesz +
1333                                             tlen, zfodsz - tlen, 0));
1334                                 }
1335                         }
1336 
1337                         if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1338                             (prot & PROT_WRITE)) {
1339                                 uint_t  szc = curproc->p_brkpageszc;
1340                                 size_t pgsz = page_get_pagesize(szc);
1341                                 caddr_t ebss = addr + phdr->p_memsz;
1342                                 /*
1343                                  * If we need extra space to keep the BSS an
1344                                  * integral number of pages in size, some of
1345                                  * that space may fall beyond p_brkbase, so we
1346                                  * need to set p_brksize to account for it
1347                                  * being (logically) part of the brk.
1348                                  */
1349                                 size_t extra_zfodsz;
1350 
1351                                 ASSERT(pgsz > PAGESIZE);
1352 
1353                                 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1354 
1355                                 if (error = execmap(vp, addr, phdr->p_filesz,
1356                                     zfodsz + extra_zfodsz, phdr->p_offset,
1357                                     prot, page, szc))
1358                                         goto bad;
1359                                 if (brksize != NULL)
1360                                         *brksize = extra_zfodsz;
1361                         } else {
1362                                 if (error = execmap(vp, addr, phdr->p_filesz,
1363                                     zfodsz, phdr->p_offset, prot, page, 0))
1364                                         goto bad;
1365                         }
1366 
1367                         if (bssbase != NULL && addr >= *bssbase &&
1368                             phdr == dataphdrp) {
1369                                 *bssbase = addr + phdr->p_filesz;
1370                         }
1371                         if (brkbase != NULL && addr >= *brkbase) {
1372                                 *brkbase = addr + phdr->p_memsz;
1373                         }
1374 
1375                         *execsz += btopr(phdr->p_memsz);
1376                         break;
1377 
1378                 case PT_INTERP:
1379                         if (ptload)
1380                                 goto bad;
1381                         *intphdr = phdr;
1382                         break;
1383 
1384                 case PT_SHLIB:
1385                         *stphdr = phdr;
1386                         break;
1387 
1388                 case PT_PHDR:
1389                         if (ptload)
1390                                 goto bad;
1391                         *uphdr = phdr;
1392                         break;
1393 
1394                 case PT_NULL:
1395                 case PT_DYNAMIC:
1396                 case PT_NOTE:
1397                         break;
1398 
1399                 case PT_SUNWDTRACE:
1400                         if (dtphdr != NULL)
1401                                 *dtphdr = phdr;
1402                         break;
1403 
1404                 default:
1405                         break;
1406                 }
1407                 phdr = (Phdr *)((caddr_t)phdr + hsize);
1408         }
1409 
1410         if (minaddr != NULL) {
1411                 ASSERT(mintmp != (caddr_t)-1);
1412                 *minaddr = (intptr_t)mintmp;
1413         }
1414 
1415         if (brkbase != NULL && secflag_enabled(curproc, PROC_SEC_ASLR)) {
1416                 size_t off;
1417                 uintptr_t base = (uintptr_t)*brkbase;
1418                 uintptr_t oend = base + *brksize;
1419 
1420                 ASSERT(ISP2(aslr_max_brk_skew));
1421 
1422                 (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1423                 base += P2PHASE(off, aslr_max_brk_skew);
1424                 base = P2ROUNDUP(base, PAGESIZE);
1425                 *brkbase = (caddr_t)base;
1426                 /*
1427                  * Above, we set *brksize to account for the possibility we
1428                  * had to grow the 'brk' in padding out the BSS to a page
1429                  * boundary.
1430                  *
1431                  * We now need to adjust that based on where we now are
1432                  * actually putting the brk.
1433                  */
1434                 if (oend > base)
1435                         *brksize = oend - base;
1436                 else
1437                         *brksize = 0;
1438         }
1439 
1440         return (0);
1441 bad:
1442         if (error == 0)
1443                 error = EINVAL;
1444         return (error);
1445 }
1446 
1447 int
1448 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1449     rlim64_t rlimit, cred_t *credp)
1450 {
1451         Note note;
1452         int error;
1453 
1454         bzero(&note, sizeof (note));
1455         bcopy("CORE", note.name, 4);
1456         note.nhdr.n_type = type;
1457         /*
1458          * The System V ABI states that n_namesz must be the length of the
1459          * string that follows the Nhdr structure including the terminating
1460          * null. The ABI also specifies that sufficient padding should be
1461          * included so that the description that follows the name string
1462          * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1463          * respectively. However, since this change was not made correctly
1464          * at the time of the 64-bit port, both 32- and 64-bit binaries
1465          * descriptions are only guaranteed to begin on a 4-byte boundary.
1466          */
1467         note.nhdr.n_namesz = 5;
1468         note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1469 
1470         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, &note,
1471             sizeof (note), rlimit, credp))
1472                 return (error);
1473 
1474         *offsetp += sizeof (note);
1475 
1476         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1477             note.nhdr.n_descsz, rlimit, credp))
1478                 return (error);
1479 
1480         *offsetp += note.nhdr.n_descsz;
1481         return (0);
1482 }
1483 
1484 /*
1485  * Copy the section data from one vnode to the section of another vnode.
1486  */
1487 static void
1488 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1489     void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1490 {
1491         ssize_t resid;
1492         size_t len, n = src->sh_size;
1493         offset_t off = 0;
1494 
1495         while (n != 0) {
1496                 len = MIN(size, n);
1497                 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1498                     UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1499                     resid >= len ||
1500                     core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1501                     buf, len - resid, rlimit, credp) != 0) {
1502                         dst->sh_size = 0;
1503                         dst->sh_offset = 0;
1504                         return;
1505                 }
1506 
1507                 ASSERT(n >= len - resid);
1508 
1509                 n -= len - resid;
1510                 off += len - resid;
1511         }
1512 
1513         *doffset += src->sh_size;
1514 }
1515 
1516 #ifdef _ELF32_COMPAT
1517 extern size_t elf_datasz_max;
1518 #else
1519 size_t elf_datasz_max = 1 * 1024 * 1024;
1520 #endif
1521 
1522 /*
1523  * This function processes mappings that correspond to load objects to
1524  * examine their respective sections for elfcore(). It's called once with
1525  * v set to NULL to count the number of sections that we're going to need
1526  * and then again with v set to some allocated buffer that we fill in with
1527  * all the section data.
1528  */
1529 static int
1530 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1531     Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1532 {
1533         vnode_t *lastvp = NULL;
1534         struct seg *seg;
1535         int i, j;
1536         void *data = NULL;
1537         size_t datasz = 0;
1538         shstrtab_t shstrtab;
1539         struct as *as = p->p_as;
1540         int error = 0;
1541 
1542         if (v != NULL)
1543                 shstrtab_init(&shstrtab);
1544 
1545         i = 1;
1546         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1547                 uint_t prot;
1548                 vnode_t *mvp;
1549                 void *tmp = NULL;
1550                 caddr_t saddr = seg->s_base;
1551                 caddr_t naddr;
1552                 caddr_t eaddr;
1553                 size_t segsize;
1554 
1555                 Ehdr ehdr;
1556                 int nshdrs, shstrndx, nphdrs;
1557                 caddr_t shbase;
1558                 ssize_t shsize;
1559                 char *shstrbase;
1560                 ssize_t shstrsize;
1561 
1562                 Shdr *shdr;
1563                 const char *name;
1564                 size_t sz;
1565                 uintptr_t off;
1566 
1567                 int ctf_ndx = 0;
1568                 int symtab_ndx = 0;
1569 
1570                 /*
1571                  * Since we're just looking for text segments of load
1572                  * objects, we only care about the protection bits; we don't
1573                  * care about the actual size of the segment so we use the
1574                  * reserved size. If the segment's size is zero, there's
1575                  * something fishy going on so we ignore this segment.
1576                  */
1577                 if (seg->s_ops != &segvn_ops ||
1578                     SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1579                     mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1580                     (segsize = pr_getsegsize(seg, 1)) == 0)
1581                         continue;
1582 
1583                 eaddr = saddr + segsize;
1584                 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1585                 pr_getprot_done(&tmp);
1586 
1587                 /*
1588                  * Skip this segment unless the protection bits look like
1589                  * what we'd expect for a text segment.
1590                  */
1591                 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1592                         continue;
1593 
1594                 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1595                     &nphdrs) != 0 ||
1596                     getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1597                     &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1598                         continue;
1599 
1600                 off = ehdr.e_shentsize;
1601                 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1602                         Shdr *symtab = NULL, *strtab;
1603 
1604                         shdr = (Shdr *)(shbase + off);
1605 
1606                         if (shdr->sh_name >= shstrsize)
1607                                 continue;
1608 
1609                         name = shstrbase + shdr->sh_name;
1610 
1611                         if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1612                                 if ((content & CC_CONTENT_CTF) == 0 ||
1613                                     ctf_ndx != 0)
1614                                         continue;
1615 
1616                                 if (shdr->sh_link > 0 &&
1617                                     shdr->sh_link < nshdrs) {
1618                                         symtab = (Shdr *)(shbase +
1619                                             shdr->sh_link * ehdr.e_shentsize);
1620                                 }
1621 
1622                                 if (v != NULL && i < nv - 1) {
1623                                         if (shdr->sh_size > datasz &&
1624                                             shdr->sh_size <= elf_datasz_max) {
1625                                                 if (data != NULL)
1626                                                         kmem_free(data, datasz);
1627 
1628                                                 datasz = shdr->sh_size;
1629                                                 data = kmem_alloc(datasz,
1630                                                     KM_SLEEP);
1631                                         }
1632 
1633                                         v[i].sh_name = shstrtab_ndx(&shstrtab,
1634                                             STR_CTF);
1635                                         v[i].sh_addr = (Addr)(uintptr_t)saddr;
1636                                         v[i].sh_type = SHT_PROGBITS;
1637                                         v[i].sh_addralign = 4;
1638                                         *doffsetp = roundup(*doffsetp,
1639                                             v[i].sh_addralign);
1640                                         v[i].sh_offset = *doffsetp;
1641                                         v[i].sh_size = shdr->sh_size;
1642                                         if (symtab == NULL)  {
1643                                                 v[i].sh_link = 0;
1644                                         } else if (symtab->sh_type ==
1645                                             SHT_SYMTAB &&
1646                                             symtab_ndx != 0) {
1647                                                 v[i].sh_link =
1648                                                     symtab_ndx;
1649                                         } else {
1650                                                 v[i].sh_link = i + 1;
1651                                         }
1652 
1653                                         copy_scn(shdr, mvp, &v[i], vp,
1654                                             doffsetp, data, datasz, credp,
1655                                             rlimit);
1656                                 }
1657 
1658                                 ctf_ndx = i++;
1659 
1660                                 /*
1661                                  * We've already dumped the symtab.
1662                                  */
1663                                 if (symtab != NULL &&
1664                                     symtab->sh_type == SHT_SYMTAB &&
1665                                     symtab_ndx != 0)
1666                                         continue;
1667 
1668                         } else if (strcmp(name,
1669                             shstrtab_data[STR_SYMTAB]) == 0) {
1670                                 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1671                                     symtab != 0)
1672                                         continue;
1673 
1674                                 symtab = shdr;
1675                         }
1676 
1677                         if (symtab != NULL) {
1678                                 if ((symtab->sh_type != SHT_DYNSYM &&
1679                                     symtab->sh_type != SHT_SYMTAB) ||
1680                                     symtab->sh_link == 0 ||
1681                                     symtab->sh_link >= nshdrs)
1682                                         continue;
1683 
1684                                 strtab = (Shdr *)(shbase +
1685                                     symtab->sh_link * ehdr.e_shentsize);
1686 
1687                                 if (strtab->sh_type != SHT_STRTAB)
1688                                         continue;
1689 
1690                                 if (v != NULL && i < nv - 2) {
1691                                         sz = MAX(symtab->sh_size,
1692                                             strtab->sh_size);
1693                                         if (sz > datasz &&
1694                                             sz <= elf_datasz_max) {
1695                                                 if (data != NULL)
1696                                                         kmem_free(data, datasz);
1697 
1698                                                 datasz = sz;
1699                                                 data = kmem_alloc(datasz,
1700                                                     KM_SLEEP);
1701                                         }
1702 
1703                                         if (symtab->sh_type == SHT_DYNSYM) {
1704                                                 v[i].sh_name = shstrtab_ndx(
1705                                                     &shstrtab, STR_DYNSYM);
1706                                                 v[i + 1].sh_name = shstrtab_ndx(
1707                                                     &shstrtab, STR_DYNSTR);
1708                                         } else {
1709                                                 v[i].sh_name = shstrtab_ndx(
1710                                                     &shstrtab, STR_SYMTAB);
1711                                                 v[i + 1].sh_name = shstrtab_ndx(
1712                                                     &shstrtab, STR_STRTAB);
1713                                         }
1714 
1715                                         v[i].sh_type = symtab->sh_type;
1716                                         v[i].sh_addr = symtab->sh_addr;
1717                                         if (ehdr.e_type == ET_DYN ||
1718                                             v[i].sh_addr == 0)
1719                                                 v[i].sh_addr +=
1720                                                     (Addr)(uintptr_t)saddr;
1721                                         v[i].sh_addralign =
1722                                             symtab->sh_addralign;
1723                                         *doffsetp = roundup(*doffsetp,
1724                                             v[i].sh_addralign);
1725                                         v[i].sh_offset = *doffsetp;
1726                                         v[i].sh_size = symtab->sh_size;
1727                                         v[i].sh_link = i + 1;
1728                                         v[i].sh_entsize = symtab->sh_entsize;
1729                                         v[i].sh_info = symtab->sh_info;
1730 
1731                                         copy_scn(symtab, mvp, &v[i], vp,
1732                                             doffsetp, data, datasz, credp,
1733                                             rlimit);
1734 
1735                                         v[i + 1].sh_type = SHT_STRTAB;
1736                                         v[i + 1].sh_flags = SHF_STRINGS;
1737                                         v[i + 1].sh_addr = symtab->sh_addr;
1738                                         if (ehdr.e_type == ET_DYN ||
1739                                             v[i + 1].sh_addr == 0)
1740                                                 v[i + 1].sh_addr +=
1741                                                     (Addr)(uintptr_t)saddr;
1742                                         v[i + 1].sh_addralign =
1743                                             strtab->sh_addralign;
1744                                         *doffsetp = roundup(*doffsetp,
1745                                             v[i + 1].sh_addralign);
1746                                         v[i + 1].sh_offset = *doffsetp;
1747                                         v[i + 1].sh_size = strtab->sh_size;
1748 
1749                                         copy_scn(strtab, mvp, &v[i + 1], vp,
1750                                             doffsetp, data, datasz, credp,
1751                                             rlimit);
1752                                 }
1753 
1754                                 if (symtab->sh_type == SHT_SYMTAB)
1755                                         symtab_ndx = i;
1756                                 i += 2;
1757                         }
1758                 }
1759 
1760                 kmem_free(shstrbase, shstrsize);
1761                 kmem_free(shbase, shsize);
1762 
1763                 lastvp = mvp;
1764         }
1765 
1766         if (v == NULL) {
1767                 if (i == 1)
1768                         *nshdrsp = 0;
1769                 else
1770                         *nshdrsp = i + 1;
1771                 goto done;
1772         }
1773 
1774         if (i != nv - 1) {
1775                 cmn_err(CE_WARN, "elfcore: core dump failed for "
1776                     "process %d; address space is changing", p->p_pid);
1777                 error = EIO;
1778                 goto done;
1779         }
1780 
1781         v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
1782         v[i].sh_size = shstrtab_size(&shstrtab);
1783         v[i].sh_addralign = 1;
1784         *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
1785         v[i].sh_offset = *doffsetp;
1786         v[i].sh_flags = SHF_STRINGS;
1787         v[i].sh_type = SHT_STRTAB;
1788 
1789         if (v[i].sh_size > datasz) {
1790                 if (data != NULL)
1791                         kmem_free(data, datasz);
1792 
1793                 datasz = v[i].sh_size;
1794                 data = kmem_alloc(datasz,
1795                     KM_SLEEP);
1796         }
1797 
1798         shstrtab_dump(&shstrtab, data);
1799 
1800         if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
1801             data, v[i].sh_size, rlimit, credp)) != 0)
1802                 goto done;
1803 
1804         *doffsetp += v[i].sh_size;
1805 
1806 done:
1807         if (data != NULL)
1808                 kmem_free(data, datasz);
1809 
1810         return (error);
1811 }
1812 
1813 int
1814 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
1815     core_content_t content)
1816 {
1817         offset_t poffset, soffset;
1818         Off doffset;
1819         int error, i, nphdrs, nshdrs;
1820         int overflow = 0;
1821         struct seg *seg;
1822         struct as *as = p->p_as;
1823         union {
1824                 Ehdr ehdr;
1825                 Phdr phdr[1];
1826                 Shdr shdr[1];
1827         } *bigwad;
1828         size_t bigsize;
1829         size_t phdrsz, shdrsz;
1830         Ehdr *ehdr;
1831         Phdr *v;
1832         caddr_t brkbase;
1833         size_t brksize;
1834         caddr_t stkbase;
1835         size_t stksize;
1836         int ntries = 0;
1837         klwp_t *lwp = ttolwp(curthread);
1838 
1839 top:
1840         /*
1841          * Make sure we have everything we need (registers, etc.).
1842          * All other lwps have already stopped and are in an orderly state.
1843          */
1844         ASSERT(p == ttoproc(curthread));
1845         prstop(0, 0);
1846 
1847         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1848         nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
1849 
1850         /*
1851          * Count the number of section headers we're going to need.
1852          */
1853         nshdrs = 0;
1854         if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
1855                 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
1856                     NULL, &nshdrs);
1857         }
1858         AS_LOCK_EXIT(as, &as->a_lock);
1859 
1860         ASSERT(nshdrs == 0 || nshdrs > 1);
1861 
1862         /*
1863          * The core file contents may required zero section headers, but if
1864          * we overflow the 16 bits allotted to the program header count in
1865          * the ELF header, we'll need that program header at index zero.
1866          */
1867         if (nshdrs == 0 && nphdrs >= PN_XNUM)
1868                 nshdrs = 1;
1869 
1870         phdrsz = nphdrs * sizeof (Phdr);
1871         shdrsz = nshdrs * sizeof (Shdr);
1872 
1873         bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
1874         bigwad = kmem_alloc(bigsize, KM_SLEEP);
1875 
1876         ehdr = &bigwad->ehdr;
1877         bzero(ehdr, sizeof (*ehdr));
1878 
1879         ehdr->e_ident[EI_MAG0] = ELFMAG0;
1880         ehdr->e_ident[EI_MAG1] = ELFMAG1;
1881         ehdr->e_ident[EI_MAG2] = ELFMAG2;
1882         ehdr->e_ident[EI_MAG3] = ELFMAG3;
1883         ehdr->e_ident[EI_CLASS] = ELFCLASS;
1884         ehdr->e_type = ET_CORE;
1885 
1886 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1887 
1888 #if defined(__sparc)
1889         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1890         ehdr->e_machine = EM_SPARC;
1891 #elif defined(__i386) || defined(__i386_COMPAT)
1892         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1893         ehdr->e_machine = EM_386;
1894 #else
1895 #error "no recognized machine type is defined"
1896 #endif
1897 
1898 #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1899 
1900 #if defined(__sparc)
1901         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1902         ehdr->e_machine = EM_SPARCV9;
1903 #elif defined(__amd64)
1904         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1905         ehdr->e_machine = EM_AMD64;
1906 #else
1907 #error "no recognized 64-bit machine type is defined"
1908 #endif
1909 
1910 #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1911 
1912         /*
1913          * If the count of program headers or section headers or the index
1914          * of the section string table can't fit in the mere 16 bits
1915          * shortsightedly allotted to them in the ELF header, we use the
1916          * extended formats and put the real values in the section header
1917          * as index 0.
1918          */
1919         ehdr->e_version = EV_CURRENT;
1920         ehdr->e_ehsize = sizeof (Ehdr);
1921 
1922         if (nphdrs >= PN_XNUM)
1923                 ehdr->e_phnum = PN_XNUM;
1924         else
1925                 ehdr->e_phnum = (unsigned short)nphdrs;
1926 
1927         ehdr->e_phoff = sizeof (Ehdr);
1928         ehdr->e_phentsize = sizeof (Phdr);
1929 
1930         if (nshdrs > 0) {
1931                 if (nshdrs >= SHN_LORESERVE)
1932                         ehdr->e_shnum = 0;
1933                 else
1934                         ehdr->e_shnum = (unsigned short)nshdrs;
1935 
1936                 if (nshdrs - 1 >= SHN_LORESERVE)
1937                         ehdr->e_shstrndx = SHN_XINDEX;
1938                 else
1939                         ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
1940 
1941                 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
1942                 ehdr->e_shentsize = sizeof (Shdr);
1943         }
1944 
1945         if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
1946             sizeof (Ehdr), rlimit, credp))
1947                 goto done;
1948 
1949         poffset = sizeof (Ehdr);
1950         soffset = sizeof (Ehdr) + phdrsz;
1951         doffset = sizeof (Ehdr) + phdrsz + shdrsz;
1952 
1953         v = &bigwad->phdr[0];
1954         bzero(v, phdrsz);
1955 
1956         setup_old_note_header(&v[0], p);
1957         v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
1958         doffset += v[0].p_filesz;
1959 
1960         setup_note_header(&v[1], p);
1961         v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
1962         doffset += v[1].p_filesz;
1963 
1964         mutex_enter(&p->p_lock);
1965 
1966         brkbase = p->p_brkbase;
1967         brksize = p->p_brksize;
1968 
1969         stkbase = p->p_usrstack - p->p_stksize;
1970         stksize = p->p_stksize;
1971 
1972         mutex_exit(&p->p_lock);
1973 
1974         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1975         i = 2;
1976         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1977                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1978                 caddr_t saddr, naddr;
1979                 void *tmp = NULL;
1980                 extern struct seg_ops segspt_shmops;
1981 
1982                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1983                         uint_t prot;
1984                         size_t size;
1985                         int type;
1986                         vnode_t *mvp;
1987 
1988                         prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1989                         prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
1990                         if ((size = (size_t)(naddr - saddr)) == 0)
1991                                 continue;
1992                         if (i == nphdrs) {
1993                                 overflow++;
1994                                 continue;
1995                         }
1996                         v[i].p_type = PT_LOAD;
1997                         v[i].p_vaddr = (Addr)(uintptr_t)saddr;
1998                         v[i].p_memsz = size;
1999                         if (prot & PROT_READ)
2000                                 v[i].p_flags |= PF_R;
2001                         if (prot & PROT_WRITE)
2002                                 v[i].p_flags |= PF_W;
2003                         if (prot & PROT_EXEC)
2004                                 v[i].p_flags |= PF_X;
2005 
2006                         /*
2007                          * Figure out which mappings to include in the core.
2008                          */
2009                         type = SEGOP_GETTYPE(seg, saddr);
2010 
2011                         if (saddr == stkbase && size == stksize) {
2012                                 if (!(content & CC_CONTENT_STACK))
2013                                         goto exclude;
2014 
2015                         } else if (saddr == brkbase && size == brksize) {
2016                                 if (!(content & CC_CONTENT_HEAP))
2017                                         goto exclude;
2018 
2019                         } else if (seg->s_ops == &segspt_shmops) {
2020                                 if (type & MAP_NORESERVE) {
2021                                         if (!(content & CC_CONTENT_DISM))
2022                                                 goto exclude;
2023                                 } else {
2024                                         if (!(content & CC_CONTENT_ISM))
2025                                                 goto exclude;
2026                                 }
2027 
2028                         } else if (seg->s_ops != &segvn_ops) {
2029                                 goto exclude;
2030 
2031                         } else if (type & MAP_SHARED) {
2032                                 if (shmgetid(p, saddr) != SHMID_NONE) {
2033                                         if (!(content & CC_CONTENT_SHM))
2034                                                 goto exclude;
2035 
2036                                 } else if (SEGOP_GETVP(seg, seg->s_base,
2037                                     &mvp) != 0 || mvp == NULL ||
2038                                     mvp->v_type != VREG) {
2039                                         if (!(content & CC_CONTENT_SHANON))
2040                                                 goto exclude;
2041 
2042                                 } else {
2043                                         if (!(content & CC_CONTENT_SHFILE))
2044                                                 goto exclude;
2045                                 }
2046 
2047                         } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
2048                             mvp == NULL || mvp->v_type != VREG) {
2049                                 if (!(content & CC_CONTENT_ANON))
2050                                         goto exclude;
2051 
2052                         } else if (prot == (PROT_READ | PROT_EXEC)) {
2053                                 if (!(content & CC_CONTENT_TEXT))
2054                                         goto exclude;
2055 
2056                         } else if (prot == PROT_READ) {
2057                                 if (!(content & CC_CONTENT_RODATA))
2058                                         goto exclude;
2059 
2060                         } else {
2061                                 if (!(content & CC_CONTENT_DATA))
2062                                         goto exclude;
2063                         }
2064 
2065                         doffset = roundup(doffset, sizeof (Word));
2066                         v[i].p_offset = doffset;
2067                         v[i].p_filesz = size;
2068                         doffset += size;
2069 exclude:
2070                         i++;
2071                 }
2072                 ASSERT(tmp == NULL);
2073         }
2074         AS_LOCK_EXIT(as, &as->a_lock);
2075 
2076         if (overflow || i != nphdrs) {
2077                 if (ntries++ == 0) {
2078                         kmem_free(bigwad, bigsize);
2079                         overflow = 0;
2080                         goto top;
2081                 }
2082                 cmn_err(CE_WARN, "elfcore: core dump failed for "
2083                     "process %d; address space is changing", p->p_pid);
2084                 error = EIO;
2085                 goto done;
2086         }
2087 
2088         if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2089             v, phdrsz, rlimit, credp)) != 0)
2090                 goto done;
2091 
2092         if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2093             credp)) != 0)
2094                 goto done;
2095 
2096         if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2097             credp, content)) != 0)
2098                 goto done;
2099 
2100         for (i = 2; i < nphdrs; i++) {
2101                 prkillinfo_t killinfo;
2102                 sigqueue_t *sq;
2103                 int sig, j;
2104 
2105                 if (v[i].p_filesz == 0)
2106                         continue;
2107 
2108                 /*
2109                  * If dumping out this segment fails, rather than failing
2110                  * the core dump entirely, we reset the size of the mapping
2111                  * to zero to indicate that the data is absent from the core
2112                  * file and or in the PF_SUNW_FAILURE flag to differentiate
2113                  * this from mappings that were excluded due to the core file
2114                  * content settings.
2115                  */
2116                 if ((error = core_seg(p, vp, v[i].p_offset,
2117                     (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2118                     rlimit, credp)) == 0) {
2119                         continue;
2120                 }
2121 
2122                 if ((sig = lwp->lwp_cursig) == 0) {
2123                         /*
2124                          * We failed due to something other than a signal.
2125                          * Since the space reserved for the segment is now
2126                          * unused, we stash the errno in the first four
2127                          * bytes. This undocumented interface will let us
2128                          * understand the nature of the failure.
2129                          */
2130                         (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2131                             &error, sizeof (error), rlimit, credp);
2132 
2133                         v[i].p_filesz = 0;
2134                         v[i].p_flags |= PF_SUNW_FAILURE;
2135                         if ((error = core_write(vp, UIO_SYSSPACE,
2136                             poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2137                             rlimit, credp)) != 0)
2138                                 goto done;
2139 
2140                         continue;
2141                 }
2142 
2143                 /*
2144                  * We took a signal.  We want to abort the dump entirely, but
2145                  * we also want to indicate what failed and why.  We therefore
2146                  * use the space reserved for the first failing segment to
2147                  * write our error (which, for purposes of compatability with
2148                  * older core dump readers, we set to EINTR) followed by any
2149                  * siginfo associated with the signal.
2150                  */
2151                 bzero(&killinfo, sizeof (killinfo));
2152                 killinfo.prk_error = EINTR;
2153 
2154                 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2155 
2156                 if (sq != NULL) {
2157                         bcopy(&sq->sq_info, &killinfo.prk_info,
2158                             sizeof (sq->sq_info));
2159                 } else {
2160                         killinfo.prk_info.si_signo = lwp->lwp_cursig;
2161                         killinfo.prk_info.si_code = SI_NOINFO;
2162                 }
2163 
2164 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2165                 /*
2166                  * If this is a 32-bit process, we need to translate from the
2167                  * native siginfo to the 32-bit variant.  (Core readers must
2168                  * always have the same data model as their target or must
2169                  * be aware of -- and compensate for -- data model differences.)
2170                  */
2171                 if (curproc->p_model == DATAMODEL_ILP32) {
2172                         siginfo32_t si32;
2173 
2174                         siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2175                         bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2176                 }
2177 #endif
2178 
2179                 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2180                     &killinfo, sizeof (killinfo), rlimit, credp);
2181 
2182                 /*
2183                  * For the segment on which we took the signal, indicate that
2184                  * its data now refers to a siginfo.
2185                  */
2186                 v[i].p_filesz = 0;
2187                 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2188                     PF_SUNW_SIGINFO;
2189 
2190                 /*
2191                  * And for every other segment, indicate that its absence
2192                  * is due to a signal.
2193                  */
2194                 for (j = i + 1; j < nphdrs; j++) {
2195                         v[j].p_filesz = 0;
2196                         v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2197                 }
2198 
2199                 /*
2200                  * Finally, write out our modified program headers.
2201                  */
2202                 if ((error = core_write(vp, UIO_SYSSPACE,
2203                     poffset + sizeof (v[i]) * i, &v[i],
2204                     sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2205                         goto done;
2206 
2207                 break;
2208         }
2209 
2210         if (nshdrs > 0) {
2211                 bzero(&bigwad->shdr[0], shdrsz);
2212 
2213                 if (nshdrs >= SHN_LORESERVE)
2214                         bigwad->shdr[0].sh_size = nshdrs;
2215 
2216                 if (nshdrs - 1 >= SHN_LORESERVE)
2217                         bigwad->shdr[0].sh_link = nshdrs - 1;
2218 
2219                 if (nphdrs >= PN_XNUM)
2220                         bigwad->shdr[0].sh_info = nphdrs;
2221 
2222                 if (nshdrs > 1) {
2223                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2224                         if ((error = process_scns(content, p, credp, vp,
2225                             &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2226                             NULL)) != 0) {
2227                                 AS_LOCK_EXIT(as, &as->a_lock);
2228                                 goto done;
2229                         }
2230                         AS_LOCK_EXIT(as, &as->a_lock);
2231                 }
2232 
2233                 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2234                     &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2235                         goto done;
2236         }
2237 
2238 done:
2239         kmem_free(bigwad, bigsize);
2240         return (error);
2241 }
2242 
2243 #ifndef _ELF32_COMPAT
2244 
2245 static struct execsw esw = {
2246 #ifdef  _LP64
2247         elf64magicstr,
2248 #else   /* _LP64 */
2249         elf32magicstr,
2250 #endif  /* _LP64 */
2251         0,
2252         5,
2253         elfexec,
2254         elfcore
2255 };
2256 
2257 static struct modlexec modlexec = {
2258         &mod_execops, "exec module for elf", &esw
2259 };
2260 
2261 #ifdef  _LP64
2262 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2263                         intpdata_t *idatap, int level, long *execsz,
2264                         int setid, caddr_t exec_file, cred_t *cred,
2265                         int brand_action);
2266 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2267                         rlim64_t rlimit, int sig, core_content_t content);
2268 
2269 static struct execsw esw32 = {
2270         elf32magicstr,
2271         0,
2272         5,
2273         elf32exec,
2274         elf32core
2275 };
2276 
2277 static struct modlexec modlexec32 = {
2278         &mod_execops, "32-bit exec module for elf", &esw32
2279 };
2280 #endif  /* _LP64 */
2281 
2282 static struct modlinkage modlinkage = {
2283         MODREV_1,
2284         (void *)&modlexec,
2285 #ifdef  _LP64
2286         (void *)&modlexec32,
2287 #endif  /* _LP64 */
2288         NULL
2289 };
2290 
2291 int
2292 _init(void)
2293 {
2294         return (mod_install(&modlinkage));
2295 }
2296 
2297 int
2298 _fini(void)
2299 {
2300         return (mod_remove(&modlinkage));
2301 }
2302 
2303 int
2304 _info(struct modinfo *modinfop)
2305 {
2306         return (mod_info(&modlinkage, modinfop));
2307 }
2308 
2309 #endif  /* !_ELF32_COMPAT */