1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 /*
  29  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/user.h>
  39 #include <sys/errno.h>
  40 #include <sys/vnode.h>
  41 #include <sys/mman.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/systm.h>
  47 #include <sys/elf.h>
  48 #include <sys/vmsystm.h>
  49 #include <sys/debug.h>
  50 #include <sys/auxv.h>
  51 #include <sys/exec.h>
  52 #include <sys/prsystm.h>
  53 #include <vm/as.h>
  54 #include <vm/rm.h>
  55 #include <vm/seg.h>
  56 #include <vm/seg_vn.h>
  57 #include <sys/modctl.h>
  58 #include <sys/systeminfo.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/machelf.h>
  61 #include <sys/shm_impl.h>
  62 #include <sys/archsystm.h>
  63 #include <sys/fasttrap.h>
  64 #include <sys/brand.h>
  65 #include "elf_impl.h"
  66 #include <sys/sdt.h>
  67 #include <sys/siginfo.h>
  68 
  69 extern int at_flags;
  70 
  71 #define ORIGIN_STR      "ORIGIN"
  72 #define ORIGIN_STR_SIZE 6
  73 
  74 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  75 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  76     ssize_t *);
  77 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  78     ssize_t *, caddr_t *, ssize_t *);
  79 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  80 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  81     Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  82     caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  83 
  84 typedef enum {
  85         STR_CTF,
  86         STR_SYMTAB,
  87         STR_DYNSYM,
  88         STR_STRTAB,
  89         STR_DYNSTR,
  90         STR_SHSTRTAB,
  91         STR_NUM
  92 } shstrtype_t;
  93 
  94 static const char *shstrtab_data[] = {
  95         ".SUNW_ctf",
  96         ".symtab",
  97         ".dynsym",
  98         ".strtab",
  99         ".dynstr",
 100         ".shstrtab"
 101 };
 102 
 103 typedef struct shstrtab {
 104         int     sst_ndx[STR_NUM];
 105         int     sst_cur;
 106 } shstrtab_t;
 107 
 108 static void
 109 shstrtab_init(shstrtab_t *s)
 110 {
 111         bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 112         s->sst_cur = 1;
 113 }
 114 
 115 static int
 116 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 117 {
 118         int ret;
 119 
 120         if ((ret = s->sst_ndx[type]) != 0)
 121                 return (ret);
 122 
 123         ret = s->sst_ndx[type] = s->sst_cur;
 124         s->sst_cur += strlen(shstrtab_data[type]) + 1;
 125 
 126         return (ret);
 127 }
 128 
 129 static size_t
 130 shstrtab_size(const shstrtab_t *s)
 131 {
 132         return (s->sst_cur);
 133 }
 134 
 135 static void
 136 shstrtab_dump(const shstrtab_t *s, char *buf)
 137 {
 138         int i, ndx;
 139 
 140         *buf = '\0';
 141         for (i = 0; i < STR_NUM; i++) {
 142                 if ((ndx = s->sst_ndx[i]) != 0)
 143                         (void) strcpy(buf + ndx, shstrtab_data[i]);
 144         }
 145 }
 146 
 147 static int
 148 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 149 {
 150         ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 151 
 152         /*
 153          * See the comment in fasttrap.h for information on how to safely
 154          * update this program header.
 155          */
 156         if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 157             (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 158                 return (-1);
 159 
 160         args->thrptr = phdrp->p_vaddr + base;
 161 
 162         return (0);
 163 }
 164 
 165 /*
 166  * Map in the executable pointed to by vp. Returns 0 on success.
 167  */
 168 int
 169 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 170     intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
 171     caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
 172 {
 173         size_t          len;
 174         struct vattr    vat;
 175         caddr_t         phdrbase = NULL;
 176         ssize_t         phdrsize;
 177         int             nshdrs, shstrndx, nphdrs;
 178         int             error = 0;
 179         Phdr            *uphdr = NULL;
 180         Phdr            *junk = NULL;
 181         Phdr            *dynphdr = NULL;
 182         Phdr            *dtrphdr = NULL;
 183         uintptr_t       lddata;
 184         long            execsz;
 185         intptr_t        minaddr;
 186 
 187         if (lddatap != NULL)
 188                 *lddatap = NULL;
 189 
 190         if (error = execpermissions(vp, &vat, args)) {
 191                 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 192                 return (error);
 193         }
 194 
 195         if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 196             &nphdrs)) != 0 ||
 197             (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 198             &phdrsize)) != 0) {
 199                 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 200                 return (error);
 201         }
 202 
 203         if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 204                 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
 205                 kmem_free(phdrbase, phdrsize);
 206                 return (ENOEXEC);
 207         }
 208         if (lddatap != NULL)
 209                 *lddatap = lddata;
 210 
 211         if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 212             &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 213             len, &execsz, brksize)) {
 214                 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
 215                 kmem_free(phdrbase, phdrsize);
 216                 return (error);
 217         }
 218 
 219         /*
 220          * Inform our caller if the executable needs an interpreter.
 221          */
 222         *interp = (dynphdr == NULL) ? 0 : 1;
 223 
 224         /*
 225          * If this is a statically linked executable, voffset should indicate
 226          * the address of the executable itself (it normally holds the address
 227          * of the interpreter).
 228          */
 229         if (ehdr->e_type == ET_EXEC && *interp == 0)
 230                 *voffset = minaddr;
 231 
 232         if (uphdr != NULL) {
 233                 *uphdr_vaddr = uphdr->p_vaddr;
 234         } else {
 235                 *uphdr_vaddr = (Addr)-1;
 236         }
 237 
 238         kmem_free(phdrbase, phdrsize);
 239         return (error);
 240 }
 241 
 242 /*ARGSUSED*/
 243 int
 244 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 245     int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 246     int brand_action)
 247 {
 248         caddr_t         phdrbase = NULL;
 249         caddr_t         bssbase = 0;
 250         caddr_t         brkbase = 0;
 251         size_t          brksize = 0;
 252         ssize_t         dlnsize;
 253         aux_entry_t     *aux;
 254         int             error;
 255         ssize_t         resid;
 256         int             fd = -1;
 257         intptr_t        voffset;
 258         Phdr            *dyphdr = NULL;
 259         Phdr            *stphdr = NULL;
 260         Phdr            *uphdr = NULL;
 261         Phdr            *junk = NULL;
 262         size_t          len;
 263         ssize_t         phdrsize;
 264         int             postfixsize = 0;
 265         int             i, hsize;
 266         Phdr            *phdrp;
 267         Phdr            *dataphdrp = NULL;
 268         Phdr            *dtrphdr;
 269         Phdr            *capphdr = NULL;
 270         Cap             *cap = NULL;
 271         ssize_t         capsize;
 272         int             hasu = 0;
 273         int             hasauxv = 0;
 274         int             hasdy = 0;
 275         int             branded = 0;
 276 
 277         struct proc *p = ttoproc(curthread);
 278         struct user *up = PTOU(p);
 279         struct bigwad {
 280                 Ehdr    ehdr;
 281                 aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 282                 char            dl_name[MAXPATHLEN];
 283                 char            pathbuf[MAXPATHLEN];
 284                 struct vattr    vattr;
 285                 struct execenv  exenv;
 286         } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 287         Ehdr            *ehdrp;
 288         int             nshdrs, shstrndx, nphdrs;
 289         char            *dlnp;
 290         char            *pathbufp;
 291         rlim64_t        limit;
 292         rlim64_t        roundlimit;
 293 
 294         ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 295 
 296         bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 297         ehdrp = &bigwad->ehdr;
 298         dlnp = bigwad->dl_name;
 299         pathbufp = bigwad->pathbuf;
 300 
 301         /*
 302          * Obtain ELF and program header information.
 303          */
 304         if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 305             &nphdrs)) != 0 ||
 306             (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 307             &phdrsize)) != 0)
 308                 goto out;
 309 
 310         /*
 311          * Prevent executing an ELF file that has no entry point.
 312          */
 313         if (ehdrp->e_entry == 0) {
 314                 uprintf("%s: Bad entry point\n", exec_file);
 315                 goto bad;
 316         }
 317 
 318         /*
 319          * Put data model that we're exec-ing to into the args passed to
 320          * exec_args(), so it will know what it is copying to on new stack.
 321          * Now that we know whether we are exec-ing a 32-bit or 64-bit
 322          * executable, we can set execsz with the appropriate NCARGS.
 323          */
 324 #ifdef  _LP64
 325         if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 326                 args->to_model = DATAMODEL_ILP32;
 327                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 328         } else {
 329                 args->to_model = DATAMODEL_LP64;
 330                 args->stk_prot &= ~PROT_EXEC;
 331 #if defined(__i386) || defined(__amd64)
 332                 args->dat_prot &= ~PROT_EXEC;
 333 #endif
 334                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 335         }
 336 #else   /* _LP64 */
 337         args->to_model = DATAMODEL_ILP32;
 338         *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 339 #endif  /* _LP64 */
 340 
 341         /*
 342          * We delay invoking the brand callback until we've figured out
 343          * what kind of elf binary we're trying to run, 32-bit or 64-bit.
 344          * We do this because now the brand library can just check
 345          * args->to_model to see if the target is 32-bit or 64-bit without
 346          * having do duplicate all the code above.
 347          *
 348          * The level checks associated with brand handling below are used to
 349          * prevent a loop since the brand elfexec function typically comes back
 350          * through this function. We must check <= here since the nested
 351          * handling in the #! interpreter code will increment the level before
 352          * calling gexec to run the final elfexec interpreter.
 353          */
 354         if ((level <= INTP_MAXDEPTH) &&
 355             (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 356                 error = BROP(p)->b_elfexec(vp, uap, args,
 357                     idatap, level + 1, execsz, setid, exec_file, cred,
 358                     brand_action);
 359                 goto out;
 360         }
 361 
 362         /*
 363          * Determine aux size now so that stack can be built
 364          * in one shot (except actual copyout of aux image),
 365          * determine any non-default stack protections,
 366          * and still have this code be machine independent.
 367          */
 368         hsize = ehdrp->e_phentsize;
 369         phdrp = (Phdr *)phdrbase;
 370         for (i = nphdrs; i > 0; i--) {
 371                 switch (phdrp->p_type) {
 372                 case PT_INTERP:
 373                         hasauxv = hasdy = 1;
 374                         break;
 375                 case PT_PHDR:
 376                         hasu = 1;
 377                         break;
 378                 case PT_SUNWSTACK:
 379                         args->stk_prot = PROT_USER;
 380                         if (phdrp->p_flags & PF_R)
 381                                 args->stk_prot |= PROT_READ;
 382                         if (phdrp->p_flags & PF_W)
 383                                 args->stk_prot |= PROT_WRITE;
 384                         if (phdrp->p_flags & PF_X)
 385                                 args->stk_prot |= PROT_EXEC;
 386                         break;
 387                 case PT_LOAD:
 388                         dataphdrp = phdrp;
 389                         break;
 390                 case PT_SUNWCAP:
 391                         capphdr = phdrp;
 392                         break;
 393                 }
 394                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 395         }
 396 
 397         if (ehdrp->e_type != ET_EXEC) {
 398                 dataphdrp = NULL;
 399                 hasauxv = 1;
 400         }
 401 
 402         /* Copy BSS permissions to args->dat_prot */
 403         if (dataphdrp != NULL) {
 404                 args->dat_prot = PROT_USER;
 405                 if (dataphdrp->p_flags & PF_R)
 406                         args->dat_prot |= PROT_READ;
 407                 if (dataphdrp->p_flags & PF_W)
 408                         args->dat_prot |= PROT_WRITE;
 409                 if (dataphdrp->p_flags & PF_X)
 410                         args->dat_prot |= PROT_EXEC;
 411         }
 412 
 413         /*
 414          * If a auxvector will be required - reserve the space for
 415          * it now.  This may be increased by exec_args if there are
 416          * ISA-specific types (included in __KERN_NAUXV_IMPL).
 417          */
 418         if (hasauxv) {
 419                 /*
 420                  * If a AUX vector is being built - the base AUX
 421                  * entries are:
 422                  *
 423                  *      AT_BASE
 424                  *      AT_FLAGS
 425                  *      AT_PAGESZ
 426                  *      AT_SUN_AUXFLAGS
 427                  *      AT_SUN_HWCAP
 428                  *      AT_SUN_HWCAP2
 429                  *      AT_SUN_PLATFORM (added in stk_copyout)
 430                  *      AT_SUN_EXECNAME (added in stk_copyout)
 431                  *      AT_NULL
 432                  *
 433                  * total == 9
 434                  */
 435                 if (hasdy && hasu) {
 436                         /*
 437                          * Has PT_INTERP & PT_PHDR - the auxvectors that
 438                          * will be built are:
 439                          *
 440                          *      AT_PHDR
 441                          *      AT_PHENT
 442                          *      AT_PHNUM
 443                          *      AT_ENTRY
 444                          *      AT_LDDATA
 445                          *
 446                          * total = 5
 447                          */
 448                         args->auxsize = (9 + 5) * sizeof (aux_entry_t);
 449                 } else if (hasdy) {
 450                         /*
 451                          * Has PT_INTERP but no PT_PHDR
 452                          *
 453                          *      AT_EXECFD
 454                          *      AT_LDDATA
 455                          *
 456                          * total = 2
 457                          */
 458                         args->auxsize = (9 + 2) * sizeof (aux_entry_t);
 459                 } else {
 460                         args->auxsize = 9 * sizeof (aux_entry_t);
 461                 }
 462         } else {
 463                 args->auxsize = 0;
 464         }
 465 
 466         /*
 467          * If this binary is using an emulator, we need to add an
 468          * AT_SUN_EMULATOR aux entry.
 469          */
 470         if (args->emulator != NULL)
 471                 args->auxsize += sizeof (aux_entry_t);
 472 
 473         if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 474                 branded = 1;
 475                 /*
 476                  * We will be adding 4 entries to the aux vectors.  One for
 477                  * the the brandname and 3 for the brand specific aux vectors.
 478                  */
 479                 args->auxsize += 4 * sizeof (aux_entry_t);
 480         }
 481 
 482         /* Hardware/Software capabilities */
 483         if (capphdr != NULL &&
 484             (capsize = capphdr->p_filesz) > 0 &&
 485             capsize <= 16 * sizeof (*cap)) {
 486                 int ncaps = capsize / sizeof (*cap);
 487                 Cap *cp;
 488 
 489                 cap = kmem_alloc(capsize, KM_SLEEP);
 490                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 491                     capsize, (offset_t)capphdr->p_offset,
 492                     UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 493                         uprintf("%s: Cannot read capabilities section\n",
 494                             exec_file);
 495                         goto out;
 496                 }
 497                 for (cp = cap; cp < cap + ncaps; cp++) {
 498                         if (cp->c_tag == CA_SUNW_SF_1 &&
 499                             (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
 500                                 if (args->to_model == DATAMODEL_LP64)
 501                                         args->addr32 = 1;
 502                                 break;
 503                         }
 504                 }
 505         }
 506 
 507         aux = bigwad->elfargs;
 508         /*
 509          * Move args to the user's stack.
 510          * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 511          */
 512         if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 513                 if (error == -1) {
 514                         error = ENOEXEC;
 515                         goto bad;
 516                 }
 517                 goto out;
 518         }
 519         /* we're single threaded after this point */
 520 
 521         /*
 522          * If this is an ET_DYN executable (shared object),
 523          * determine its memory size so that mapelfexec() can load it.
 524          */
 525         if (ehdrp->e_type == ET_DYN)
 526                 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
 527         else
 528                 len = 0;
 529 
 530         dtrphdr = NULL;
 531 
 532         if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
 533             &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 534             len, execsz, &brksize)) != 0)
 535                 goto bad;
 536 
 537         if (uphdr != NULL && dyphdr == NULL)
 538                 goto bad;
 539 
 540         if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 541                 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 542                 goto bad;
 543         }
 544 
 545         if (dyphdr != NULL) {
 546                 size_t          len;
 547                 uintptr_t       lddata;
 548                 char            *p;
 549                 struct vnode    *nvp;
 550 
 551                 dlnsize = dyphdr->p_filesz;
 552 
 553                 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 554                         goto bad;
 555 
 556                 /*
 557                  * Read in "interpreter" pathname.
 558                  */
 559                 if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
 560                     (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
 561                     CRED(), &resid)) != 0) {
 562                         uprintf("%s: Cannot obtain interpreter pathname\n",
 563                             exec_file);
 564                         goto bad;
 565                 }
 566 
 567                 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 568                         goto bad;
 569 
 570                 /*
 571                  * Search for '$ORIGIN' token in interpreter path.
 572                  * If found, expand it.
 573                  */
 574                 for (p = dlnp; p = strchr(p, '$'); ) {
 575                         uint_t  len, curlen;
 576                         char    *_ptr;
 577 
 578                         if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 579                                 continue;
 580 
 581                         /*
 582                          * We don't support $ORIGIN on setid programs to close
 583                          * a potential attack vector.
 584                          */
 585                         if ((setid & EXECSETID_SETID) != 0) {
 586                                 error = ENOEXEC;
 587                                 goto bad;
 588                         }
 589 
 590                         curlen = 0;
 591                         len = p - dlnp - 1;
 592                         if (len) {
 593                                 bcopy(dlnp, pathbufp, len);
 594                                 curlen += len;
 595                         }
 596                         if (_ptr = strrchr(args->pathname, '/')) {
 597                                 len = _ptr - args->pathname;
 598                                 if ((curlen + len) > MAXPATHLEN)
 599                                         break;
 600 
 601                                 bcopy(args->pathname, &pathbufp[curlen], len);
 602                                 curlen += len;
 603                         } else {
 604                                 /*
 605                                  * executable is a basename found in the
 606                                  * current directory.  So - just substitue
 607                                  * '.' for ORIGIN.
 608                                  */
 609                                 pathbufp[curlen] = '.';
 610                                 curlen++;
 611                         }
 612                         p += ORIGIN_STR_SIZE;
 613                         len = strlen(p);
 614 
 615                         if ((curlen + len) > MAXPATHLEN)
 616                                 break;
 617                         bcopy(p, &pathbufp[curlen], len);
 618                         curlen += len;
 619                         pathbufp[curlen++] = '\0';
 620                         bcopy(pathbufp, dlnp, curlen);
 621                 }
 622 
 623                 /*
 624                  * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 625                  * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 626                  * Just in case /usr is not mounted, change it now.
 627                  */
 628                 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 629                         dlnp += 4;
 630                 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 631                 if (error && dlnp != bigwad->dl_name) {
 632                         /* new kernel, old user-level */
 633                         error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 634                             NULLVPP, &nvp);
 635                 }
 636                 if (error) {
 637                         uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 638                         goto bad;
 639                 }
 640 
 641                 /*
 642                  * Setup the "aux" vector.
 643                  */
 644                 if (uphdr) {
 645                         if (ehdrp->e_type == ET_DYN) {
 646                                 /* don't use the first page */
 647                                 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 648                                 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 649                         } else {
 650                                 bigwad->exenv.ex_bssbase = bssbase;
 651                                 bigwad->exenv.ex_brkbase = brkbase;
 652                         }
 653                         bigwad->exenv.ex_brksize = brksize;
 654                         bigwad->exenv.ex_magic = elfmagic;
 655                         bigwad->exenv.ex_vp = vp;
 656                         setexecenv(&bigwad->exenv);
 657 
 658                         ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 659                         ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 660                         ADDAUX(aux, AT_PHNUM, nphdrs)
 661                         ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 662                 } else {
 663                         if ((error = execopen(&vp, &fd)) != 0) {
 664                                 VN_RELE(nvp);
 665                                 goto bad;
 666                         }
 667 
 668                         ADDAUX(aux, AT_EXECFD, fd)
 669                 }
 670 
 671                 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 672                         VN_RELE(nvp);
 673                         uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 674                         goto bad;
 675                 }
 676 
 677                 /*
 678                  * Now obtain the ELF header along with the entire program
 679                  * header contained in "nvp".
 680                  */
 681                 kmem_free(phdrbase, phdrsize);
 682                 phdrbase = NULL;
 683                 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 684                     &shstrndx, &nphdrs)) != 0 ||
 685                     (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 686                     &phdrsize)) != 0) {
 687                         VN_RELE(nvp);
 688                         uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 689                         goto bad;
 690                 }
 691 
 692                 /*
 693                  * Determine memory size of the "interpreter's" loadable
 694                  * sections.  This size is then used to obtain the virtual
 695                  * address of a hole, in the user's address space, large
 696                  * enough to map the "interpreter".
 697                  */
 698                 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 699                         VN_RELE(nvp);
 700                         uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 701                         goto bad;
 702                 }
 703 
 704                 dtrphdr = NULL;
 705 
 706                 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
 707                     &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 708                     execsz, NULL);
 709                 if (error || junk != NULL) {
 710                         VN_RELE(nvp);
 711                         uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 712                         goto bad;
 713                 }
 714 
 715                 /*
 716                  * We use the DTrace program header to initialize the
 717                  * architecture-specific user per-LWP location. The dtrace
 718                  * fasttrap provider requires ready access to per-LWP scratch
 719                  * space. We assume that there is only one such program header
 720                  * in the interpreter.
 721                  */
 722                 if (dtrphdr != NULL &&
 723                     dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 724                         VN_RELE(nvp);
 725                         uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 726                         goto bad;
 727                 }
 728 
 729                 VN_RELE(nvp);
 730                 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 731         }
 732 
 733         if (hasauxv) {
 734                 int auxf = AF_SUN_HWCAPVERIFY;
 735                 /*
 736                  * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
 737                  * exec_args()
 738                  */
 739                 ADDAUX(aux, AT_BASE, voffset)
 740                 ADDAUX(aux, AT_FLAGS, at_flags)
 741                 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 742                 /*
 743                  * Linker flags. (security)
 744                  * p_flag not yet set at this time.
 745                  * We rely on gexec() to provide us with the information.
 746                  * If the application is set-uid but this is not reflected
 747                  * in a mismatch between real/effective uids/gids, then
 748                  * don't treat this as a set-uid exec.  So we care about
 749                  * the EXECSETID_UGIDS flag but not the ...SETID flag.
 750                  */
 751                 if ((setid &= ~EXECSETID_SETID) != 0)
 752                         auxf |= AF_SUN_SETUGID;
 753 
 754                 /*
 755                  * If we're running a native process from within a branded
 756                  * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 757                  * that the native ld.so.1 is able to link with the native
 758                  * libraries instead of using the brand libraries that are
 759                  * installed in the zone.  We only do this for processes
 760                  * which we trust because we see they are already running
 761                  * under pfexec (where uid != euid).  This prevents a
 762                  * malicious user within the zone from crafting a wrapper to
 763                  * run native suid commands with unsecure libraries interposed.
 764                  */
 765                 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 766                     (setid &= ~EXECSETID_SETID) != 0))
 767                         auxf &= ~AF_SUN_SETUGID;
 768 
 769                 /*
 770                  * Record the user addr of the auxflags aux vector entry
 771                  * since brands may optionally want to manipulate this field.
 772                  */
 773                 args->auxp_auxflags =
 774                     (char *)((char *)args->stackend +
 775                     ((char *)&aux->a_type -
 776                     (char *)bigwad->elfargs));
 777                 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
 778                 /*
 779                  * Hardware capability flag word (performance hints)
 780                  * Used for choosing faster library routines.
 781                  * (Potentially different between 32-bit and 64-bit ABIs)
 782                  */
 783 #if defined(_LP64)
 784                 if (args->to_model == DATAMODEL_NATIVE) {
 785                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 786                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 787                 } else {
 788                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 789                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 790                 }
 791 #else
 792                 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 793                 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 794 #endif
 795                 if (branded) {
 796                         /*
 797                          * Reserve space for the brand-private aux vectors,
 798                          * and record the user addr of that space.
 799                          */
 800                         args->auxp_brand =
 801                             (char *)((char *)args->stackend +
 802                             ((char *)&aux->a_type -
 803                             (char *)bigwad->elfargs));
 804                         ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 805                         ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 806                         ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
 807                 }
 808 
 809                 ADDAUX(aux, AT_NULL, 0)
 810                 postfixsize = (char *)aux - (char *)bigwad->elfargs;
 811 
 812                 /*
 813                  * We make assumptions above when we determine how many aux
 814                  * vector entries we will be adding. However, if we have an
 815                  * invalid elf file, it is possible that mapelfexec might
 816                  * behave differently (but not return an error), in which case
 817                  * the number of aux entries we actually add will be different.
 818                  * We detect that now and error out.
 819                  */
 820                 if (postfixsize != args->auxsize) {
 821                         DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
 822                             int, args->auxsize);
 823                         goto bad;
 824                 }
 825                 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
 826         }
 827 
 828         /*
 829          * For the 64-bit kernel, the limit is big enough that rounding it up
 830          * to a page can overflow the 64-bit limit, so we check for btopr()
 831          * overflowing here by comparing it with the unrounded limit in pages.
 832          * If it hasn't overflowed, compare the exec size with the rounded up
 833          * limit in pages.  Otherwise, just compare with the unrounded limit.
 834          */
 835         limit = btop(p->p_vmem_ctl);
 836         roundlimit = btopr(p->p_vmem_ctl);
 837         if ((roundlimit > limit && *execsz > roundlimit) ||
 838             (roundlimit < limit && *execsz > limit)) {
 839                 mutex_enter(&p->p_lock);
 840                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
 841                     RCA_SAFE);
 842                 mutex_exit(&p->p_lock);
 843                 error = ENOMEM;
 844                 goto bad;
 845         }
 846 
 847         bzero(up->u_auxv, sizeof (up->u_auxv));
 848         if (postfixsize) {
 849                 int num_auxv;
 850 
 851                 /*
 852                  * Copy the aux vector to the user stack.
 853                  */
 854                 error = execpoststack(args, bigwad->elfargs, postfixsize);
 855                 if (error)
 856                         goto bad;
 857 
 858                 /*
 859                  * Copy auxv to the process's user structure for use by /proc.
 860                  * If this is a branded process, the brand's exec routine will
 861                  * copy it's private entries to the user structure later. It
 862                  * relies on the fact that the blank entries are at the end.
 863                  */
 864                 num_auxv = postfixsize / sizeof (aux_entry_t);
 865                 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
 866                 aux = bigwad->elfargs;
 867                 for (i = 0; i < num_auxv; i++) {
 868                         up->u_auxv[i].a_type = aux[i].a_type;
 869                         up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
 870                 }
 871         }
 872 
 873         /*
 874          * Pass back the starting address so we can set the program counter.
 875          */
 876         args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
 877 
 878         if (!uphdr) {
 879                 if (ehdrp->e_type == ET_DYN) {
 880                         /*
 881                          * If we are executing a shared library which doesn't
 882                          * have a interpreter (probably ld.so.1) then
 883                          * we don't set the brkbase now.  Instead we
 884                          * delay it's setting until the first call
 885                          * via grow.c::brk().  This permits ld.so.1 to
 886                          * initialize brkbase to the tail of the executable it
 887                          * loads (which is where it needs to be).
 888                          */
 889                         bigwad->exenv.ex_brkbase = (caddr_t)0;
 890                         bigwad->exenv.ex_bssbase = (caddr_t)0;
 891                         bigwad->exenv.ex_brksize = 0;
 892                 } else {
 893                         bigwad->exenv.ex_brkbase = brkbase;
 894                         bigwad->exenv.ex_bssbase = bssbase;
 895                         bigwad->exenv.ex_brksize = brksize;
 896                 }
 897                 bigwad->exenv.ex_magic = elfmagic;
 898                 bigwad->exenv.ex_vp = vp;
 899                 setexecenv(&bigwad->exenv);
 900         }
 901 
 902         ASSERT(error == 0);
 903         goto out;
 904 
 905 bad:
 906         if (fd != -1)           /* did we open the a.out yet */
 907                 (void) execclose(fd);
 908 
 909         psignal(p, SIGKILL);
 910 
 911         if (error == 0)
 912                 error = ENOEXEC;
 913 out:
 914         if (phdrbase != NULL)
 915                 kmem_free(phdrbase, phdrsize);
 916         if (cap != NULL)
 917                 kmem_free(cap, capsize);
 918         kmem_free(bigwad, sizeof (struct bigwad));
 919         return (error);
 920 }
 921 
 922 /*
 923  * Compute the memory size requirement for the ELF file.
 924  */
 925 static size_t
 926 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
 927 {
 928         size_t  len;
 929         Phdr    *phdrp = (Phdr *)phdrbase;
 930         int     hsize = ehdrp->e_phentsize;
 931         int     first = 1;
 932         int     dfirst = 1;     /* first data segment */
 933         uintptr_t loaddr = 0;
 934         uintptr_t hiaddr = 0;
 935         uintptr_t lo, hi;
 936         int     i;
 937 
 938         for (i = nphdrs; i > 0; i--) {
 939                 if (phdrp->p_type == PT_LOAD) {
 940                         lo = phdrp->p_vaddr;
 941                         hi = lo + phdrp->p_memsz;
 942                         if (first) {
 943                                 loaddr = lo;
 944                                 hiaddr = hi;
 945                                 first = 0;
 946                         } else {
 947                                 if (loaddr > lo)
 948                                         loaddr = lo;
 949                                 if (hiaddr < hi)
 950                                         hiaddr = hi;
 951                         }
 952 
 953                         /*
 954                          * save the address of the first data segment
 955                          * of a object - used for the AT_SUNW_LDDATA
 956                          * aux entry.
 957                          */
 958                         if ((lddata != NULL) && dfirst &&
 959                             (phdrp->p_flags & PF_W)) {
 960                                 *lddata = lo;
 961                                 dfirst = 0;
 962                         }
 963                 }
 964                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 965         }
 966 
 967         len = hiaddr - (loaddr & PAGEMASK);
 968         len = roundup(len, PAGESIZE);
 969 
 970         return (len);
 971 }
 972 
 973 /*
 974  * Read in the ELF header and program header table.
 975  * SUSV3 requires:
 976  *      ENOEXEC File format is not recognized
 977  *      EINVAL  Format recognized but execution not supported
 978  */
 979 static int
 980 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
 981     int *nphdrs)
 982 {
 983         int error;
 984         ssize_t resid;
 985 
 986         /*
 987          * We got here by the first two bytes in ident,
 988          * now read the entire ELF header.
 989          */
 990         if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
 991             sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
 992             (rlim64_t)0, credp, &resid)) != 0)
 993                 return (error);
 994 
 995         /*
 996          * Since a separate version is compiled for handling 32-bit and
 997          * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
 998          * doesn't need to be able to deal with 32-bit ELF files.
 999          */
1000         if (resid != 0 ||
1001             ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
1002             ehdr->e_ident[EI_MAG3] != ELFMAG3)
1003                 return (ENOEXEC);
1004 
1005         if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
1006 #if defined(_ILP32) || defined(_ELF32_COMPAT)
1007             ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
1008 #else
1009             ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
1010 #endif
1011             !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
1012             ehdr->e_flags))
1013                 return (EINVAL);
1014 
1015         *nshdrs = ehdr->e_shnum;
1016         *shstrndx = ehdr->e_shstrndx;
1017         *nphdrs = ehdr->e_phnum;
1018 
1019         /*
1020          * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1021          * to read in the section header at index zero to acces the true
1022          * values for those fields.
1023          */
1024         if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1025             *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1026                 Shdr shdr;
1027 
1028                 if (ehdr->e_shoff == 0)
1029                         return (EINVAL);
1030 
1031                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1032                     sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1033                     (rlim64_t)0, credp, &resid)) != 0)
1034                         return (error);
1035 
1036                 if (*nshdrs == 0)
1037                         *nshdrs = shdr.sh_size;
1038                 if (*shstrndx == SHN_XINDEX)
1039                         *shstrndx = shdr.sh_link;
1040                 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1041                         *nphdrs = shdr.sh_info;
1042         }
1043 
1044         return (0);
1045 }
1046 
1047 #ifdef _ELF32_COMPAT
1048 extern size_t elf_nphdr_max;
1049 #else
1050 size_t elf_nphdr_max = 1000;
1051 #endif
1052 
1053 static int
1054 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1055     caddr_t *phbasep, ssize_t *phsizep)
1056 {
1057         ssize_t resid, minsize;
1058         int err;
1059 
1060         /*
1061          * Since we're going to be using e_phentsize to iterate down the
1062          * array of program headers, it must be 8-byte aligned or else
1063          * a we might cause a misaligned access. We use all members through
1064          * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1065          * e_phentsize must be at least large enough to include those
1066          * members.
1067          */
1068 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1069         minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1070 #else
1071         minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1072 #endif
1073         if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1074                 return (EINVAL);
1075 
1076         *phsizep = nphdrs * ehdr->e_phentsize;
1077 
1078         if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1079                 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1080                         return (ENOMEM);
1081         } else {
1082                 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1083         }
1084 
1085         if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1086             (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1087             credp, &resid)) != 0) {
1088                 kmem_free(*phbasep, *phsizep);
1089                 *phbasep = NULL;
1090                 return (err);
1091         }
1092 
1093         return (0);
1094 }
1095 
1096 #ifdef _ELF32_COMPAT
1097 extern size_t elf_nshdr_max;
1098 extern size_t elf_shstrtab_max;
1099 #else
1100 size_t elf_nshdr_max = 10000;
1101 size_t elf_shstrtab_max = 100 * 1024;
1102 #endif
1103 
1104 
1105 static int
1106 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1107     int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1108     char **shstrbasep, ssize_t *shstrsizep)
1109 {
1110         ssize_t resid, minsize;
1111         int err;
1112         Shdr *shdr;
1113 
1114         /*
1115          * Since we're going to be using e_shentsize to iterate down the
1116          * array of section headers, it must be 8-byte aligned or else
1117          * a we might cause a misaligned access. We use all members through
1118          * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1119          * must be at least large enough to include that member. The index
1120          * of the string table section must also be valid.
1121          */
1122         minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1123         if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1124             shstrndx >= nshdrs)
1125                 return (EINVAL);
1126 
1127         *shsizep = nshdrs * ehdr->e_shentsize;
1128 
1129         if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1130                 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1131                         return (ENOMEM);
1132         } else {
1133                 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1134         }
1135 
1136         if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1137             (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1138             credp, &resid)) != 0) {
1139                 kmem_free(*shbasep, *shsizep);
1140                 return (err);
1141         }
1142 
1143         /*
1144          * Pull the section string table out of the vnode; fail if the size
1145          * is zero.
1146          */
1147         shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1148         if ((*shstrsizep = shdr->sh_size) == 0) {
1149                 kmem_free(*shbasep, *shsizep);
1150                 return (EINVAL);
1151         }
1152 
1153         if (*shstrsizep > elf_shstrtab_max) {
1154                 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1155                     KM_NOSLEEP)) == NULL) {
1156                         kmem_free(*shbasep, *shsizep);
1157                         return (ENOMEM);
1158                 }
1159         } else {
1160                 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1161         }
1162 
1163         if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1164             (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1165             credp, &resid)) != 0) {
1166                 kmem_free(*shbasep, *shsizep);
1167                 kmem_free(*shstrbasep, *shstrsizep);
1168                 return (err);
1169         }
1170 
1171         /*
1172          * Make sure the strtab is null-terminated to make sure we
1173          * don't run off the end of the table.
1174          */
1175         (*shstrbasep)[*shstrsizep - 1] = '\0';
1176 
1177         return (0);
1178 }
1179 
1180 static int
1181 mapelfexec(
1182         vnode_t *vp,
1183         Ehdr *ehdr,
1184         int nphdrs,
1185         caddr_t phdrbase,
1186         Phdr **uphdr,
1187         Phdr **dyphdr,
1188         Phdr **stphdr,
1189         Phdr **dtphdr,
1190         Phdr *dataphdrp,
1191         caddr_t *bssbase,
1192         caddr_t *brkbase,
1193         intptr_t *voffset,
1194         intptr_t *minaddr,
1195         size_t len,
1196         long *execsz,
1197         size_t *brksize)
1198 {
1199         Phdr *phdr;
1200         int i, prot, error;
1201         caddr_t addr = NULL;
1202         size_t zfodsz;
1203         int ptload = 0;
1204         int page;
1205         off_t offset;
1206         int hsize = ehdr->e_phentsize;
1207         caddr_t mintmp = (caddr_t)-1;
1208         extern int use_brk_lpg;
1209 
1210         if (ehdr->e_type == ET_DYN) {
1211                 /*
1212                  * Obtain the virtual address of a hole in the
1213                  * address space to map the "interpreter".
1214                  */
1215                 map_addr(&addr, len, (offset_t)0, 1, 0);
1216                 if (addr == NULL)
1217                         return (ENOMEM);
1218                 *voffset = (intptr_t)addr;
1219 
1220                 /*
1221                  * Calculate the minimum vaddr so it can be subtracted out.
1222                  * According to the ELF specification, since PT_LOAD sections
1223                  * must be sorted by increasing p_vaddr values, this is
1224                  * guaranteed to be the first PT_LOAD section.
1225                  */
1226                 phdr = (Phdr *)phdrbase;
1227                 for (i = nphdrs; i > 0; i--) {
1228                         if (phdr->p_type == PT_LOAD) {
1229                                 *voffset -= (uintptr_t)phdr->p_vaddr;
1230                                 break;
1231                         }
1232                         phdr = (Phdr *)((caddr_t)phdr + hsize);
1233                 }
1234 
1235         } else {
1236                 *voffset = 0;
1237         }
1238         phdr = (Phdr *)phdrbase;
1239         for (i = nphdrs; i > 0; i--) {
1240                 switch (phdr->p_type) {
1241                 case PT_LOAD:
1242                         if ((*dyphdr != NULL) && (*uphdr == NULL))
1243                                 return (0);
1244 
1245                         ptload = 1;
1246                         prot = PROT_USER;
1247                         if (phdr->p_flags & PF_R)
1248                                 prot |= PROT_READ;
1249                         if (phdr->p_flags & PF_W)
1250                                 prot |= PROT_WRITE;
1251                         if (phdr->p_flags & PF_X)
1252                                 prot |= PROT_EXEC;
1253 
1254                         addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1255 
1256                         /*
1257                          * Keep track of the segment with the lowest starting
1258                          * address.
1259                          */
1260                         if (addr < mintmp)
1261                                 mintmp = addr;
1262 
1263                         zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1264 
1265                         offset = phdr->p_offset;
1266                         if (((uintptr_t)offset & PAGEOFFSET) ==
1267                             ((uintptr_t)addr & PAGEOFFSET) &&
1268                             (!(vp->v_flag & VNOMAP))) {
1269                                 page = 1;
1270                         } else {
1271                                 page = 0;
1272                         }
1273 
1274                         /*
1275                          * Set the heap pagesize for OOB when the bss size
1276                          * is known and use_brk_lpg is not 0.
1277                          */
1278                         if (brksize != NULL && use_brk_lpg &&
1279                             zfodsz != 0 && phdr == dataphdrp &&
1280                             (prot & PROT_WRITE)) {
1281                                 size_t tlen = P2NPHASE((uintptr_t)addr +
1282                                     phdr->p_filesz, PAGESIZE);
1283 
1284                                 if (zfodsz > tlen) {
1285                                         curproc->p_brkpageszc =
1286                                             page_szc(map_pgsz(MAPPGSZ_HEAP,
1287                                             curproc, addr + phdr->p_filesz +
1288                                             tlen, zfodsz - tlen, 0));
1289                                 }
1290                         }
1291 
1292                         if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1293                             (prot & PROT_WRITE)) {
1294                                 uint_t  szc = curproc->p_brkpageszc;
1295                                 size_t pgsz = page_get_pagesize(szc);
1296                                 caddr_t ebss = addr + phdr->p_memsz;
1297                                 size_t extra_zfodsz;
1298 
1299                                 ASSERT(pgsz > PAGESIZE);
1300 
1301                                 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1302 
1303                                 if (error = execmap(vp, addr, phdr->p_filesz,
1304                                     zfodsz + extra_zfodsz, phdr->p_offset,
1305                                     prot, page, szc))
1306                                         goto bad;
1307                                 if (brksize != NULL)
1308                                         *brksize = extra_zfodsz;
1309                         } else {
1310                                 if (error = execmap(vp, addr, phdr->p_filesz,
1311                                     zfodsz, phdr->p_offset, prot, page, 0))
1312                                         goto bad;
1313                         }
1314 
1315                         if (bssbase != NULL && addr >= *bssbase &&
1316                             phdr == dataphdrp) {
1317                                 *bssbase = addr + phdr->p_filesz;
1318                         }
1319                         if (brkbase != NULL && addr >= *brkbase) {
1320                                 *brkbase = addr + phdr->p_memsz;
1321                         }
1322 
1323                         *execsz += btopr(phdr->p_memsz);
1324                         break;
1325 
1326                 case PT_INTERP:
1327                         if (ptload)
1328                                 goto bad;
1329                         *dyphdr = phdr;
1330                         break;
1331 
1332                 case PT_SHLIB:
1333                         *stphdr = phdr;
1334                         break;
1335 
1336                 case PT_PHDR:
1337                         if (ptload)
1338                                 goto bad;
1339                         *uphdr = phdr;
1340                         break;
1341 
1342                 case PT_NULL:
1343                 case PT_DYNAMIC:
1344                 case PT_NOTE:
1345                         break;
1346 
1347                 case PT_SUNWDTRACE:
1348                         if (dtphdr != NULL)
1349                                 *dtphdr = phdr;
1350                         break;
1351 
1352                 default:
1353                         break;
1354                 }
1355                 phdr = (Phdr *)((caddr_t)phdr + hsize);
1356         }
1357 
1358         if (minaddr != NULL) {
1359                 ASSERT(mintmp != (caddr_t)-1);
1360                 *minaddr = (intptr_t)mintmp;
1361         }
1362 
1363         return (0);
1364 bad:
1365         if (error == 0)
1366                 error = EINVAL;
1367         return (error);
1368 }
1369 
1370 int
1371 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1372     rlim64_t rlimit, cred_t *credp)
1373 {
1374         Note note;
1375         int error;
1376 
1377         bzero(&note, sizeof (note));
1378         bcopy("CORE", note.name, 4);
1379         note.nhdr.n_type = type;
1380         /*
1381          * The System V ABI states that n_namesz must be the length of the
1382          * string that follows the Nhdr structure including the terminating
1383          * null. The ABI also specifies that sufficient padding should be
1384          * included so that the description that follows the name string
1385          * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1386          * respectively. However, since this change was not made correctly
1387          * at the time of the 64-bit port, both 32- and 64-bit binaries
1388          * descriptions are only guaranteed to begin on a 4-byte boundary.
1389          */
1390         note.nhdr.n_namesz = 5;
1391         note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1392 
1393         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, &note,
1394             sizeof (note), rlimit, credp))
1395                 return (error);
1396 
1397         *offsetp += sizeof (note);
1398 
1399         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1400             note.nhdr.n_descsz, rlimit, credp))
1401                 return (error);
1402 
1403         *offsetp += note.nhdr.n_descsz;
1404         return (0);
1405 }
1406 
1407 /*
1408  * Copy the section data from one vnode to the section of another vnode.
1409  */
1410 static void
1411 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1412     void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1413 {
1414         ssize_t resid;
1415         size_t len, n = src->sh_size;
1416         offset_t off = 0;
1417 
1418         while (n != 0) {
1419                 len = MIN(size, n);
1420                 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1421                     UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1422                     resid >= len ||
1423                     core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1424                     buf, len - resid, rlimit, credp) != 0) {
1425                         dst->sh_size = 0;
1426                         dst->sh_offset = 0;
1427                         return;
1428                 }
1429 
1430                 ASSERT(n >= len - resid);
1431 
1432                 n -= len - resid;
1433                 off += len - resid;
1434         }
1435 
1436         *doffset += src->sh_size;
1437 }
1438 
1439 #ifdef _ELF32_COMPAT
1440 extern size_t elf_datasz_max;
1441 #else
1442 size_t elf_datasz_max = 1 * 1024 * 1024;
1443 #endif
1444 
1445 /*
1446  * This function processes mappings that correspond to load objects to
1447  * examine their respective sections for elfcore(). It's called once with
1448  * v set to NULL to count the number of sections that we're going to need
1449  * and then again with v set to some allocated buffer that we fill in with
1450  * all the section data.
1451  */
1452 static int
1453 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1454     Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1455 {
1456         vnode_t *lastvp = NULL;
1457         struct seg *seg;
1458         int i, j;
1459         void *data = NULL;
1460         size_t datasz = 0;
1461         shstrtab_t shstrtab;
1462         struct as *as = p->p_as;
1463         int error = 0;
1464 
1465         if (v != NULL)
1466                 shstrtab_init(&shstrtab);
1467 
1468         i = 1;
1469         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1470                 uint_t prot;
1471                 vnode_t *mvp;
1472                 void *tmp = NULL;
1473                 caddr_t saddr = seg->s_base;
1474                 caddr_t naddr;
1475                 caddr_t eaddr;
1476                 size_t segsize;
1477 
1478                 Ehdr ehdr;
1479                 int nshdrs, shstrndx, nphdrs;
1480                 caddr_t shbase;
1481                 ssize_t shsize;
1482                 char *shstrbase;
1483                 ssize_t shstrsize;
1484 
1485                 Shdr *shdr;
1486                 const char *name;
1487                 size_t sz;
1488                 uintptr_t off;
1489 
1490                 int ctf_ndx = 0;
1491                 int symtab_ndx = 0;
1492 
1493                 /*
1494                  * Since we're just looking for text segments of load
1495                  * objects, we only care about the protection bits; we don't
1496                  * care about the actual size of the segment so we use the
1497                  * reserved size. If the segment's size is zero, there's
1498                  * something fishy going on so we ignore this segment.
1499                  */
1500                 if (seg->s_ops != &segvn_ops ||
1501                     SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1502                     mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1503                     (segsize = pr_getsegsize(seg, 1)) == 0)
1504                         continue;
1505 
1506                 eaddr = saddr + segsize;
1507                 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1508                 pr_getprot_done(&tmp);
1509 
1510                 /*
1511                  * Skip this segment unless the protection bits look like
1512                  * what we'd expect for a text segment.
1513                  */
1514                 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1515                         continue;
1516 
1517                 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1518                     &nphdrs) != 0 ||
1519                     getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1520                     &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1521                         continue;
1522 
1523                 off = ehdr.e_shentsize;
1524                 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1525                         Shdr *symtab = NULL, *strtab;
1526 
1527                         shdr = (Shdr *)(shbase + off);
1528 
1529                         if (shdr->sh_name >= shstrsize)
1530                                 continue;
1531 
1532                         name = shstrbase + shdr->sh_name;
1533 
1534                         if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1535                                 if ((content & CC_CONTENT_CTF) == 0 ||
1536                                     ctf_ndx != 0)
1537                                         continue;
1538 
1539                                 if (shdr->sh_link > 0 &&
1540                                     shdr->sh_link < nshdrs) {
1541                                         symtab = (Shdr *)(shbase +
1542                                             shdr->sh_link * ehdr.e_shentsize);
1543                                 }
1544 
1545                                 if (v != NULL && i < nv - 1) {
1546                                         if (shdr->sh_size > datasz &&
1547                                             shdr->sh_size <= elf_datasz_max) {
1548                                                 if (data != NULL)
1549                                                         kmem_free(data, datasz);
1550 
1551                                                 datasz = shdr->sh_size;
1552                                                 data = kmem_alloc(datasz,
1553                                                     KM_SLEEP);
1554                                         }
1555 
1556                                         v[i].sh_name = shstrtab_ndx(&shstrtab,
1557                                             STR_CTF);
1558                                         v[i].sh_addr = (Addr)(uintptr_t)saddr;
1559                                         v[i].sh_type = SHT_PROGBITS;
1560                                         v[i].sh_addralign = 4;
1561                                         *doffsetp = roundup(*doffsetp,
1562                                             v[i].sh_addralign);
1563                                         v[i].sh_offset = *doffsetp;
1564                                         v[i].sh_size = shdr->sh_size;
1565                                         if (symtab == NULL)  {
1566                                                 v[i].sh_link = 0;
1567                                         } else if (symtab->sh_type ==
1568                                             SHT_SYMTAB &&
1569                                             symtab_ndx != 0) {
1570                                                 v[i].sh_link =
1571                                                     symtab_ndx;
1572                                         } else {
1573                                                 v[i].sh_link = i + 1;
1574                                         }
1575 
1576                                         copy_scn(shdr, mvp, &v[i], vp,
1577                                             doffsetp, data, datasz, credp,
1578                                             rlimit);
1579                                 }
1580 
1581                                 ctf_ndx = i++;
1582 
1583                                 /*
1584                                  * We've already dumped the symtab.
1585                                  */
1586                                 if (symtab != NULL &&
1587                                     symtab->sh_type == SHT_SYMTAB &&
1588                                     symtab_ndx != 0)
1589                                         continue;
1590 
1591                         } else if (strcmp(name,
1592                             shstrtab_data[STR_SYMTAB]) == 0) {
1593                                 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1594                                     symtab != 0)
1595                                         continue;
1596 
1597                                 symtab = shdr;
1598                         }
1599 
1600                         if (symtab != NULL) {
1601                                 if ((symtab->sh_type != SHT_DYNSYM &&
1602                                     symtab->sh_type != SHT_SYMTAB) ||
1603                                     symtab->sh_link == 0 ||
1604                                     symtab->sh_link >= nshdrs)
1605                                         continue;
1606 
1607                                 strtab = (Shdr *)(shbase +
1608                                     symtab->sh_link * ehdr.e_shentsize);
1609 
1610                                 if (strtab->sh_type != SHT_STRTAB)
1611                                         continue;
1612 
1613                                 if (v != NULL && i < nv - 2) {
1614                                         sz = MAX(symtab->sh_size,
1615                                             strtab->sh_size);
1616                                         if (sz > datasz &&
1617                                             sz <= elf_datasz_max) {
1618                                                 if (data != NULL)
1619                                                         kmem_free(data, datasz);
1620 
1621                                                 datasz = sz;
1622                                                 data = kmem_alloc(datasz,
1623                                                     KM_SLEEP);
1624                                         }
1625 
1626                                         if (symtab->sh_type == SHT_DYNSYM) {
1627                                                 v[i].sh_name = shstrtab_ndx(
1628                                                     &shstrtab, STR_DYNSYM);
1629                                                 v[i + 1].sh_name = shstrtab_ndx(
1630                                                     &shstrtab, STR_DYNSTR);
1631                                         } else {
1632                                                 v[i].sh_name = shstrtab_ndx(
1633                                                     &shstrtab, STR_SYMTAB);
1634                                                 v[i + 1].sh_name = shstrtab_ndx(
1635                                                     &shstrtab, STR_STRTAB);
1636                                         }
1637 
1638                                         v[i].sh_type = symtab->sh_type;
1639                                         v[i].sh_addr = symtab->sh_addr;
1640                                         if (ehdr.e_type == ET_DYN ||
1641                                             v[i].sh_addr == 0)
1642                                                 v[i].sh_addr +=
1643                                                     (Addr)(uintptr_t)saddr;
1644                                         v[i].sh_addralign =
1645                                             symtab->sh_addralign;
1646                                         *doffsetp = roundup(*doffsetp,
1647                                             v[i].sh_addralign);
1648                                         v[i].sh_offset = *doffsetp;
1649                                         v[i].sh_size = symtab->sh_size;
1650                                         v[i].sh_link = i + 1;
1651                                         v[i].sh_entsize = symtab->sh_entsize;
1652                                         v[i].sh_info = symtab->sh_info;
1653 
1654                                         copy_scn(symtab, mvp, &v[i], vp,
1655                                             doffsetp, data, datasz, credp,
1656                                             rlimit);
1657 
1658                                         v[i + 1].sh_type = SHT_STRTAB;
1659                                         v[i + 1].sh_flags = SHF_STRINGS;
1660                                         v[i + 1].sh_addr = symtab->sh_addr;
1661                                         if (ehdr.e_type == ET_DYN ||
1662                                             v[i + 1].sh_addr == 0)
1663                                                 v[i + 1].sh_addr +=
1664                                                     (Addr)(uintptr_t)saddr;
1665                                         v[i + 1].sh_addralign =
1666                                             strtab->sh_addralign;
1667                                         *doffsetp = roundup(*doffsetp,
1668                                             v[i + 1].sh_addralign);
1669                                         v[i + 1].sh_offset = *doffsetp;
1670                                         v[i + 1].sh_size = strtab->sh_size;
1671 
1672                                         copy_scn(strtab, mvp, &v[i + 1], vp,
1673                                             doffsetp, data, datasz, credp,
1674                                             rlimit);
1675                                 }
1676 
1677                                 if (symtab->sh_type == SHT_SYMTAB)
1678                                         symtab_ndx = i;
1679                                 i += 2;
1680                         }
1681                 }
1682 
1683                 kmem_free(shstrbase, shstrsize);
1684                 kmem_free(shbase, shsize);
1685 
1686                 lastvp = mvp;
1687         }
1688 
1689         if (v == NULL) {
1690                 if (i == 1)
1691                         *nshdrsp = 0;
1692                 else
1693                         *nshdrsp = i + 1;
1694                 goto done;
1695         }
1696 
1697         if (i != nv - 1) {
1698                 cmn_err(CE_WARN, "elfcore: core dump failed for "
1699                     "process %d; address space is changing", p->p_pid);
1700                 error = EIO;
1701                 goto done;
1702         }
1703 
1704         v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
1705         v[i].sh_size = shstrtab_size(&shstrtab);
1706         v[i].sh_addralign = 1;
1707         *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
1708         v[i].sh_offset = *doffsetp;
1709         v[i].sh_flags = SHF_STRINGS;
1710         v[i].sh_type = SHT_STRTAB;
1711 
1712         if (v[i].sh_size > datasz) {
1713                 if (data != NULL)
1714                         kmem_free(data, datasz);
1715 
1716                 datasz = v[i].sh_size;
1717                 data = kmem_alloc(datasz,
1718                     KM_SLEEP);
1719         }
1720 
1721         shstrtab_dump(&shstrtab, data);
1722 
1723         if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
1724             data, v[i].sh_size, rlimit, credp)) != 0)
1725                 goto done;
1726 
1727         *doffsetp += v[i].sh_size;
1728 
1729 done:
1730         if (data != NULL)
1731                 kmem_free(data, datasz);
1732 
1733         return (error);
1734 }
1735 
1736 int
1737 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
1738     core_content_t content)
1739 {
1740         offset_t poffset, soffset;
1741         Off doffset;
1742         int error, i, nphdrs, nshdrs;
1743         int overflow = 0;
1744         struct seg *seg;
1745         struct as *as = p->p_as;
1746         union {
1747                 Ehdr ehdr;
1748                 Phdr phdr[1];
1749                 Shdr shdr[1];
1750         } *bigwad;
1751         size_t bigsize;
1752         size_t phdrsz, shdrsz;
1753         Ehdr *ehdr;
1754         Phdr *v;
1755         caddr_t brkbase;
1756         size_t brksize;
1757         caddr_t stkbase;
1758         size_t stksize;
1759         int ntries = 0;
1760         klwp_t *lwp = ttolwp(curthread);
1761 
1762 top:
1763         /*
1764          * Make sure we have everything we need (registers, etc.).
1765          * All other lwps have already stopped and are in an orderly state.
1766          */
1767         ASSERT(p == ttoproc(curthread));
1768         prstop(0, 0);
1769 
1770         AS_LOCK_ENTER(as, RW_WRITER);
1771         nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
1772 
1773         /*
1774          * Count the number of section headers we're going to need.
1775          */
1776         nshdrs = 0;
1777         if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
1778                 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
1779                     NULL, &nshdrs);
1780         }
1781         AS_LOCK_EXIT(as);
1782 
1783         ASSERT(nshdrs == 0 || nshdrs > 1);
1784 
1785         /*
1786          * The core file contents may required zero section headers, but if
1787          * we overflow the 16 bits allotted to the program header count in
1788          * the ELF header, we'll need that program header at index zero.
1789          */
1790         if (nshdrs == 0 && nphdrs >= PN_XNUM)
1791                 nshdrs = 1;
1792 
1793         phdrsz = nphdrs * sizeof (Phdr);
1794         shdrsz = nshdrs * sizeof (Shdr);
1795 
1796         bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
1797         bigwad = kmem_alloc(bigsize, KM_SLEEP);
1798 
1799         ehdr = &bigwad->ehdr;
1800         bzero(ehdr, sizeof (*ehdr));
1801 
1802         ehdr->e_ident[EI_MAG0] = ELFMAG0;
1803         ehdr->e_ident[EI_MAG1] = ELFMAG1;
1804         ehdr->e_ident[EI_MAG2] = ELFMAG2;
1805         ehdr->e_ident[EI_MAG3] = ELFMAG3;
1806         ehdr->e_ident[EI_CLASS] = ELFCLASS;
1807         ehdr->e_type = ET_CORE;
1808 
1809 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1810 
1811 #if defined(__sparc)
1812         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1813         ehdr->e_machine = EM_SPARC;
1814 #elif defined(__i386) || defined(__i386_COMPAT)
1815         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1816         ehdr->e_machine = EM_386;
1817 #else
1818 #error "no recognized machine type is defined"
1819 #endif
1820 
1821 #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1822 
1823 #if defined(__sparc)
1824         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1825         ehdr->e_machine = EM_SPARCV9;
1826 #elif defined(__amd64)
1827         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1828         ehdr->e_machine = EM_AMD64;
1829 #else
1830 #error "no recognized 64-bit machine type is defined"
1831 #endif
1832 
1833 #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1834 
1835         /*
1836          * If the count of program headers or section headers or the index
1837          * of the section string table can't fit in the mere 16 bits
1838          * shortsightedly allotted to them in the ELF header, we use the
1839          * extended formats and put the real values in the section header
1840          * as index 0.
1841          */
1842         ehdr->e_version = EV_CURRENT;
1843         ehdr->e_ehsize = sizeof (Ehdr);
1844 
1845         if (nphdrs >= PN_XNUM)
1846                 ehdr->e_phnum = PN_XNUM;
1847         else
1848                 ehdr->e_phnum = (unsigned short)nphdrs;
1849 
1850         ehdr->e_phoff = sizeof (Ehdr);
1851         ehdr->e_phentsize = sizeof (Phdr);
1852 
1853         if (nshdrs > 0) {
1854                 if (nshdrs >= SHN_LORESERVE)
1855                         ehdr->e_shnum = 0;
1856                 else
1857                         ehdr->e_shnum = (unsigned short)nshdrs;
1858 
1859                 if (nshdrs - 1 >= SHN_LORESERVE)
1860                         ehdr->e_shstrndx = SHN_XINDEX;
1861                 else
1862                         ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
1863 
1864                 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
1865                 ehdr->e_shentsize = sizeof (Shdr);
1866         }
1867 
1868         if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
1869             sizeof (Ehdr), rlimit, credp))
1870                 goto done;
1871 
1872         poffset = sizeof (Ehdr);
1873         soffset = sizeof (Ehdr) + phdrsz;
1874         doffset = sizeof (Ehdr) + phdrsz + shdrsz;
1875 
1876         v = &bigwad->phdr[0];
1877         bzero(v, phdrsz);
1878 
1879         setup_old_note_header(&v[0], p);
1880         v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
1881         doffset += v[0].p_filesz;
1882 
1883         setup_note_header(&v[1], p);
1884         v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
1885         doffset += v[1].p_filesz;
1886 
1887         mutex_enter(&p->p_lock);
1888 
1889         brkbase = p->p_brkbase;
1890         brksize = p->p_brksize;
1891 
1892         stkbase = p->p_usrstack - p->p_stksize;
1893         stksize = p->p_stksize;
1894 
1895         mutex_exit(&p->p_lock);
1896 
1897         AS_LOCK_ENTER(as, RW_WRITER);
1898         i = 2;
1899         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1900                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1901                 caddr_t saddr, naddr;
1902                 void *tmp = NULL;
1903                 extern struct seg_ops segspt_shmops;
1904 
1905                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1906                         uint_t prot;
1907                         size_t size;
1908                         int type;
1909                         vnode_t *mvp;
1910 
1911                         prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1912                         prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
1913                         if ((size = (size_t)(naddr - saddr)) == 0)
1914                                 continue;
1915                         if (i == nphdrs) {
1916                                 overflow++;
1917                                 continue;
1918                         }
1919                         v[i].p_type = PT_LOAD;
1920                         v[i].p_vaddr = (Addr)(uintptr_t)saddr;
1921                         v[i].p_memsz = size;
1922                         if (prot & PROT_READ)
1923                                 v[i].p_flags |= PF_R;
1924                         if (prot & PROT_WRITE)
1925                                 v[i].p_flags |= PF_W;
1926                         if (prot & PROT_EXEC)
1927                                 v[i].p_flags |= PF_X;
1928 
1929                         /*
1930                          * Figure out which mappings to include in the core.
1931                          */
1932                         type = SEGOP_GETTYPE(seg, saddr);
1933 
1934                         if (saddr == stkbase && size == stksize) {
1935                                 if (!(content & CC_CONTENT_STACK))
1936                                         goto exclude;
1937 
1938                         } else if (saddr == brkbase && size == brksize) {
1939                                 if (!(content & CC_CONTENT_HEAP))
1940                                         goto exclude;
1941 
1942                         } else if (seg->s_ops == &segspt_shmops) {
1943                                 if (type & MAP_NORESERVE) {
1944                                         if (!(content & CC_CONTENT_DISM))
1945                                                 goto exclude;
1946                                 } else {
1947                                         if (!(content & CC_CONTENT_ISM))
1948                                                 goto exclude;
1949                                 }
1950 
1951                         } else if (seg->s_ops != &segvn_ops) {
1952                                 goto exclude;
1953 
1954                         } else if (type & MAP_SHARED) {
1955                                 if (shmgetid(p, saddr) != SHMID_NONE) {
1956                                         if (!(content & CC_CONTENT_SHM))
1957                                                 goto exclude;
1958 
1959                                 } else if (SEGOP_GETVP(seg, seg->s_base,
1960                                     &mvp) != 0 || mvp == NULL ||
1961                                     mvp->v_type != VREG) {
1962                                         if (!(content & CC_CONTENT_SHANON))
1963                                                 goto exclude;
1964 
1965                                 } else {
1966                                         if (!(content & CC_CONTENT_SHFILE))
1967                                                 goto exclude;
1968                                 }
1969 
1970                         } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1971                             mvp == NULL || mvp->v_type != VREG) {
1972                                 if (!(content & CC_CONTENT_ANON))
1973                                         goto exclude;
1974 
1975                         } else if (prot == (PROT_READ | PROT_EXEC)) {
1976                                 if (!(content & CC_CONTENT_TEXT))
1977                                         goto exclude;
1978 
1979                         } else if (prot == PROT_READ) {
1980                                 if (!(content & CC_CONTENT_RODATA))
1981                                         goto exclude;
1982 
1983                         } else {
1984                                 if (!(content & CC_CONTENT_DATA))
1985                                         goto exclude;
1986                         }
1987 
1988                         doffset = roundup(doffset, sizeof (Word));
1989                         v[i].p_offset = doffset;
1990                         v[i].p_filesz = size;
1991                         doffset += size;
1992 exclude:
1993                         i++;
1994                 }
1995                 ASSERT(tmp == NULL);
1996         }
1997         AS_LOCK_EXIT(as);
1998 
1999         if (overflow || i != nphdrs) {
2000                 if (ntries++ == 0) {
2001                         kmem_free(bigwad, bigsize);
2002                         overflow = 0;
2003                         goto top;
2004                 }
2005                 cmn_err(CE_WARN, "elfcore: core dump failed for "
2006                     "process %d; address space is changing", p->p_pid);
2007                 error = EIO;
2008                 goto done;
2009         }
2010 
2011         if ((error = core_write(vp, UIO_SYSSPACE, poffset,
2012             v, phdrsz, rlimit, credp)) != 0)
2013                 goto done;
2014 
2015         if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2016             credp)) != 0)
2017                 goto done;
2018 
2019         if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2020             credp, content)) != 0)
2021                 goto done;
2022 
2023         for (i = 2; i < nphdrs; i++) {
2024                 prkillinfo_t killinfo;
2025                 sigqueue_t *sq;
2026                 int sig, j;
2027 
2028                 if (v[i].p_filesz == 0)
2029                         continue;
2030 
2031                 /*
2032                  * If dumping out this segment fails, rather than failing
2033                  * the core dump entirely, we reset the size of the mapping
2034                  * to zero to indicate that the data is absent from the core
2035                  * file and or in the PF_SUNW_FAILURE flag to differentiate
2036                  * this from mappings that were excluded due to the core file
2037                  * content settings.
2038                  */
2039                 if ((error = core_seg(p, vp, v[i].p_offset,
2040                     (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2041                     rlimit, credp)) == 0) {
2042                         continue;
2043                 }
2044 
2045                 if ((sig = lwp->lwp_cursig) == 0) {
2046                         /*
2047                          * We failed due to something other than a signal.
2048                          * Since the space reserved for the segment is now
2049                          * unused, we stash the errno in the first four
2050                          * bytes. This undocumented interface will let us
2051                          * understand the nature of the failure.
2052                          */
2053                         (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2054                             &error, sizeof (error), rlimit, credp);
2055 
2056                         v[i].p_filesz = 0;
2057                         v[i].p_flags |= PF_SUNW_FAILURE;
2058                         if ((error = core_write(vp, UIO_SYSSPACE,
2059                             poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2060                             rlimit, credp)) != 0)
2061                                 goto done;
2062 
2063                         continue;
2064                 }
2065 
2066                 /*
2067                  * We took a signal.  We want to abort the dump entirely, but
2068                  * we also want to indicate what failed and why.  We therefore
2069                  * use the space reserved for the first failing segment to
2070                  * write our error (which, for purposes of compatability with
2071                  * older core dump readers, we set to EINTR) followed by any
2072                  * siginfo associated with the signal.
2073                  */
2074                 bzero(&killinfo, sizeof (killinfo));
2075                 killinfo.prk_error = EINTR;
2076 
2077                 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2078 
2079                 if (sq != NULL) {
2080                         bcopy(&sq->sq_info, &killinfo.prk_info,
2081                             sizeof (sq->sq_info));
2082                 } else {
2083                         killinfo.prk_info.si_signo = lwp->lwp_cursig;
2084                         killinfo.prk_info.si_code = SI_NOINFO;
2085                 }
2086 
2087 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2088                 /*
2089                  * If this is a 32-bit process, we need to translate from the
2090                  * native siginfo to the 32-bit variant.  (Core readers must
2091                  * always have the same data model as their target or must
2092                  * be aware of -- and compensate for -- data model differences.)
2093                  */
2094                 if (curproc->p_model == DATAMODEL_ILP32) {
2095                         siginfo32_t si32;
2096 
2097                         siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2098                         bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2099                 }
2100 #endif
2101 
2102                 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2103                     &killinfo, sizeof (killinfo), rlimit, credp);
2104 
2105                 /*
2106                  * For the segment on which we took the signal, indicate that
2107                  * its data now refers to a siginfo.
2108                  */
2109                 v[i].p_filesz = 0;
2110                 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2111                     PF_SUNW_SIGINFO;
2112 
2113                 /*
2114                  * And for every other segment, indicate that its absence
2115                  * is due to a signal.
2116                  */
2117                 for (j = i + 1; j < nphdrs; j++) {
2118                         v[j].p_filesz = 0;
2119                         v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2120                 }
2121 
2122                 /*
2123                  * Finally, write out our modified program headers.
2124                  */
2125                 if ((error = core_write(vp, UIO_SYSSPACE,
2126                     poffset + sizeof (v[i]) * i, &v[i],
2127                     sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2128                         goto done;
2129 
2130                 break;
2131         }
2132 
2133         if (nshdrs > 0) {
2134                 bzero(&bigwad->shdr[0], shdrsz);
2135 
2136                 if (nshdrs >= SHN_LORESERVE)
2137                         bigwad->shdr[0].sh_size = nshdrs;
2138 
2139                 if (nshdrs - 1 >= SHN_LORESERVE)
2140                         bigwad->shdr[0].sh_link = nshdrs - 1;
2141 
2142                 if (nphdrs >= PN_XNUM)
2143                         bigwad->shdr[0].sh_info = nphdrs;
2144 
2145                 if (nshdrs > 1) {
2146                         AS_LOCK_ENTER(as, RW_WRITER);
2147                         if ((error = process_scns(content, p, credp, vp,
2148                             &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2149                             NULL)) != 0) {
2150                                 AS_LOCK_EXIT(as);
2151                                 goto done;
2152                         }
2153                         AS_LOCK_EXIT(as);
2154                 }
2155 
2156                 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2157                     &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2158                         goto done;
2159         }
2160 
2161 done:
2162         kmem_free(bigwad, bigsize);
2163         return (error);
2164 }
2165 
2166 #ifndef _ELF32_COMPAT
2167 
2168 static struct execsw esw = {
2169 #ifdef  _LP64
2170         elf64magicstr,
2171 #else   /* _LP64 */
2172         elf32magicstr,
2173 #endif  /* _LP64 */
2174         0,
2175         5,
2176         elfexec,
2177         elfcore
2178 };
2179 
2180 static struct modlexec modlexec = {
2181         &mod_execops, "exec module for elf", &esw
2182 };
2183 
2184 #ifdef  _LP64
2185 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2186                         intpdata_t *idatap, int level, long *execsz,
2187                         int setid, caddr_t exec_file, cred_t *cred,
2188                         int brand_action);
2189 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2190                         rlim64_t rlimit, int sig, core_content_t content);
2191 
2192 static struct execsw esw32 = {
2193         elf32magicstr,
2194         0,
2195         5,
2196         elf32exec,
2197         elf32core
2198 };
2199 
2200 static struct modlexec modlexec32 = {
2201         &mod_execops, "32-bit exec module for elf", &esw32
2202 };
2203 #endif  /* _LP64 */
2204 
2205 static struct modlinkage modlinkage = {
2206         MODREV_1,
2207         (void *)&modlexec,
2208 #ifdef  _LP64
2209         (void *)&modlexec32,
2210 #endif  /* _LP64 */
2211         NULL
2212 };
2213 
2214 int
2215 _init(void)
2216 {
2217         return (mod_install(&modlinkage));
2218 }
2219 
2220 int
2221 _fini(void)
2222 {
2223         return (mod_remove(&modlinkage));
2224 }
2225 
2226 int
2227 _info(struct modinfo *modinfop)
2228 {
2229         return (mod_info(&modlinkage, modinfop));
2230 }
2231 
2232 #endif  /* !_ELF32_COMPAT */