1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 /*
  29  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/param.h>
  34 #include <sys/thread.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/signal.h>
  37 #include <sys/cred.h>
  38 #include <sys/user.h>
  39 #include <sys/errno.h>
  40 #include <sys/vnode.h>
  41 #include <sys/mman.h>
  42 #include <sys/kmem.h>
  43 #include <sys/proc.h>
  44 #include <sys/pathname.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/systm.h>
  47 #include <sys/elf.h>
  48 #include <sys/vmsystm.h>
  49 #include <sys/debug.h>
  50 #include <sys/auxv.h>
  51 #include <sys/exec.h>
  52 #include <sys/prsystm.h>
  53 #include <vm/as.h>
  54 #include <vm/rm.h>
  55 #include <vm/seg.h>
  56 #include <vm/seg_vn.h>
  57 #include <sys/modctl.h>
  58 #include <sys/systeminfo.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/machelf.h>
  61 #include <sys/shm_impl.h>
  62 #include <sys/archsystm.h>
  63 #include <sys/fasttrap.h>
  64 #include <sys/brand.h>
  65 #include "elf_impl.h"
  66 #include <sys/sdt.h>
  67 #include <sys/siginfo.h>
  68 
  69 extern int at_flags;
  70 
  71 #define ORIGIN_STR      "ORIGIN"
  72 #define ORIGIN_STR_SIZE 6
  73 
  74 static int getelfhead(vnode_t *, cred_t *, Ehdr *, int *, int *, int *);
  75 static int getelfphdr(vnode_t *, cred_t *, const Ehdr *, int, caddr_t *,
  76     ssize_t *);
  77 static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
  78     ssize_t *, caddr_t *, ssize_t *);
  79 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
  80 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
  81     Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
  82     caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
  83 
  84 typedef enum {
  85         STR_CTF,
  86         STR_SYMTAB,
  87         STR_DYNSYM,
  88         STR_STRTAB,
  89         STR_DYNSTR,
  90         STR_SHSTRTAB,
  91         STR_NUM
  92 } shstrtype_t;
  93 
  94 static const char *shstrtab_data[] = {
  95         ".SUNW_ctf",
  96         ".symtab",
  97         ".dynsym",
  98         ".strtab",
  99         ".dynstr",
 100         ".shstrtab"
 101 };
 102 
 103 typedef struct shstrtab {
 104         int     sst_ndx[STR_NUM];
 105         int     sst_cur;
 106 } shstrtab_t;
 107 
 108 static void
 109 shstrtab_init(shstrtab_t *s)
 110 {
 111         bzero(&s->sst_ndx, sizeof (s->sst_ndx));
 112         s->sst_cur = 1;
 113 }
 114 
 115 static int
 116 shstrtab_ndx(shstrtab_t *s, shstrtype_t type)
 117 {
 118         int ret;
 119 
 120         if ((ret = s->sst_ndx[type]) != 0)
 121                 return (ret);
 122 
 123         ret = s->sst_ndx[type] = s->sst_cur;
 124         s->sst_cur += strlen(shstrtab_data[type]) + 1;
 125 
 126         return (ret);
 127 }
 128 
 129 static size_t
 130 shstrtab_size(const shstrtab_t *s)
 131 {
 132         return (s->sst_cur);
 133 }
 134 
 135 static void
 136 shstrtab_dump(const shstrtab_t *s, char *buf)
 137 {
 138         int i, ndx;
 139 
 140         *buf = '\0';
 141         for (i = 0; i < STR_NUM; i++) {
 142                 if ((ndx = s->sst_ndx[i]) != 0)
 143                         (void) strcpy(buf + ndx, shstrtab_data[i]);
 144         }
 145 }
 146 
 147 static int
 148 dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 149 {
 150         ASSERT(phdrp->p_type == PT_SUNWDTRACE);
 151 
 152         /*
 153          * See the comment in fasttrap.h for information on how to safely
 154          * update this program header.
 155          */
 156         if (phdrp->p_memsz < PT_SUNWDTRACE_SIZE ||
 157             (phdrp->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X))
 158                 return (-1);
 159 
 160         args->thrptr = phdrp->p_vaddr + base;
 161 
 162         return (0);
 163 }
 164 
 165 /*
 166  * Map in the executable pointed to by vp. Returns 0 on success.
 167  */
 168 int
 169 mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
 170     intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
 171     caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
 172 {
 173         size_t          len;
 174         struct vattr    vat;
 175         caddr_t         phdrbase = NULL;
 176         ssize_t         phdrsize;
 177         int             nshdrs, shstrndx, nphdrs;
 178         int             error = 0;
 179         Phdr            *uphdr = NULL;
 180         Phdr            *junk = NULL;
 181         Phdr            *dynphdr = NULL;
 182         Phdr            *dtrphdr = NULL;
 183         uintptr_t       lddata;
 184         long            execsz;
 185         intptr_t        minaddr;
 186 
 187         if (lddatap != NULL)
 188                 *lddatap = NULL;
 189 
 190         if (error = execpermissions(vp, &vat, args)) {
 191                 uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
 192                 return (error);
 193         }
 194 
 195         if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
 196             &nphdrs)) != 0 ||
 197             (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
 198             &phdrsize)) != 0) {
 199                 uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
 200                 return (error);
 201         }
 202 
 203         if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
 204                 uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
 205                 kmem_free(phdrbase, phdrsize);
 206                 return (ENOEXEC);
 207         }
 208         if (lddatap != NULL)
 209                 *lddatap = lddata;
 210 
 211         if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
 212             &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
 213             len, &execsz, brksize)) {
 214                 uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
 215                 kmem_free(phdrbase, phdrsize);
 216                 return (error);
 217         }
 218 
 219         /*
 220          * Inform our caller if the executable needs an interpreter.
 221          */
 222         *interp = (dynphdr == NULL) ? 0 : 1;
 223 
 224         /*
 225          * If this is a statically linked executable, voffset should indicate
 226          * the address of the executable itself (it normally holds the address
 227          * of the interpreter).
 228          */
 229         if (ehdr->e_type == ET_EXEC && *interp == 0)
 230                 *voffset = minaddr;
 231 
 232         if (uphdr != NULL) {
 233                 *uphdr_vaddr = uphdr->p_vaddr;
 234         } else {
 235                 *uphdr_vaddr = (Addr)-1;
 236         }
 237 
 238         kmem_free(phdrbase, phdrsize);
 239         return (error);
 240 }
 241 
 242 /*ARGSUSED*/
 243 int
 244 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 245     int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
 246     int brand_action)
 247 {
 248         caddr_t         phdrbase = NULL;
 249         caddr_t         bssbase = 0;
 250         caddr_t         brkbase = 0;
 251         size_t          brksize = 0;
 252         ssize_t         dlnsize;
 253         aux_entry_t     *aux;
 254         int             error;
 255         ssize_t         resid;
 256         int             fd = -1;
 257         intptr_t        voffset;
 258         Phdr            *dyphdr = NULL;
 259         Phdr            *stphdr = NULL;
 260         Phdr            *uphdr = NULL;
 261         Phdr            *junk = NULL;
 262         size_t          len;
 263         ssize_t         phdrsize;
 264         int             postfixsize = 0;
 265         int             i, hsize;
 266         Phdr            *phdrp;
 267         Phdr            *dataphdrp = NULL;
 268         Phdr            *dtrphdr;
 269         Phdr            *capphdr = NULL;
 270         Cap             *cap = NULL;
 271         ssize_t         capsize;
 272         int             hasu = 0;
 273         int             hasauxv = 0;
 274         int             hasdy = 0;
 275         int             branded = 0;
 276 
 277         struct proc *p = ttoproc(curthread);
 278         struct user *up = PTOU(p);
 279         struct bigwad {
 280                 Ehdr    ehdr;
 281                 aux_entry_t     elfargs[__KERN_NAUXV_IMPL];
 282                 char            dl_name[MAXPATHLEN];
 283                 char            pathbuf[MAXPATHLEN];
 284                 struct vattr    vattr;
 285                 struct execenv  exenv;
 286         } *bigwad;      /* kmem_alloc this behemoth so we don't blow stack */
 287         Ehdr            *ehdrp;
 288         int             nshdrs, shstrndx, nphdrs;
 289         char            *dlnp;
 290         char            *pathbufp;
 291         rlim64_t        limit;
 292         rlim64_t        roundlimit;
 293 
 294         ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 295 
 296         bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 297         ehdrp = &bigwad->ehdr;
 298         dlnp = bigwad->dl_name;
 299         pathbufp = bigwad->pathbuf;
 300 
 301         /*
 302          * Obtain ELF and program header information.
 303          */
 304         if ((error = getelfhead(vp, CRED(), ehdrp, &nshdrs, &shstrndx,
 305             &nphdrs)) != 0 ||
 306             (error = getelfphdr(vp, CRED(), ehdrp, nphdrs, &phdrbase,
 307             &phdrsize)) != 0)
 308                 goto out;
 309 
 310         /*
 311          * Prevent executing an ELF file that has no entry point.
 312          */
 313         if (ehdrp->e_entry == 0) {
 314                 uprintf("%s: Bad entry point\n", exec_file);
 315                 goto bad;
 316         }
 317 
 318         /*
 319          * Put data model that we're exec-ing to into the args passed to
 320          * exec_args(), so it will know what it is copying to on new stack.
 321          * Now that we know whether we are exec-ing a 32-bit or 64-bit
 322          * executable, we can set execsz with the appropriate NCARGS.
 323          */
 324 #ifdef  _LP64
 325         if (ehdrp->e_ident[EI_CLASS] == ELFCLASS32) {
 326                 args->to_model = DATAMODEL_ILP32;
 327                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
 328         } else {
 329                 args->to_model = DATAMODEL_LP64;
 330                 args->stk_prot &= ~PROT_EXEC;
 331 #if defined(__i386) || defined(__amd64)
 332                 args->dat_prot &= ~PROT_EXEC;
 333 #endif
 334                 *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS64-1);
 335         }
 336 #else   /* _LP64 */
 337         args->to_model = DATAMODEL_ILP32;
 338         *execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS-1);
 339 #endif  /* _LP64 */
 340 
 341         /*
 342          * We delay invoking the brand callback until we've figured out
 343          * what kind of elf binary we're trying to run, 32-bit or 64-bit.
 344          * We do this because now the brand library can just check
 345          * args->to_model to see if the target is 32-bit or 64-bit without
 346          * having do duplicate all the code above.
 347          */
 348         if ((level < 2) &&
 349             (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 350                 error = BROP(p)->b_elfexec(vp, uap, args,
 351                     idatap, level + 1, execsz, setid, exec_file, cred,
 352                     brand_action);
 353                 goto out;
 354         }
 355 
 356         /*
 357          * Determine aux size now so that stack can be built
 358          * in one shot (except actual copyout of aux image),
 359          * determine any non-default stack protections,
 360          * and still have this code be machine independent.
 361          */
 362         hsize = ehdrp->e_phentsize;
 363         phdrp = (Phdr *)phdrbase;
 364         for (i = nphdrs; i > 0; i--) {
 365                 switch (phdrp->p_type) {
 366                 case PT_INTERP:
 367                         hasauxv = hasdy = 1;
 368                         break;
 369                 case PT_PHDR:
 370                         hasu = 1;
 371                         break;
 372                 case PT_SUNWSTACK:
 373                         args->stk_prot = PROT_USER;
 374                         if (phdrp->p_flags & PF_R)
 375                                 args->stk_prot |= PROT_READ;
 376                         if (phdrp->p_flags & PF_W)
 377                                 args->stk_prot |= PROT_WRITE;
 378                         if (phdrp->p_flags & PF_X)
 379                                 args->stk_prot |= PROT_EXEC;
 380                         break;
 381                 case PT_LOAD:
 382                         dataphdrp = phdrp;
 383                         break;
 384                 case PT_SUNWCAP:
 385                         capphdr = phdrp;
 386                         break;
 387                 }
 388                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 389         }
 390 
 391         if (ehdrp->e_type != ET_EXEC) {
 392                 dataphdrp = NULL;
 393                 hasauxv = 1;
 394         }
 395 
 396         /* Copy BSS permissions to args->dat_prot */
 397         if (dataphdrp != NULL) {
 398                 args->dat_prot = PROT_USER;
 399                 if (dataphdrp->p_flags & PF_R)
 400                         args->dat_prot |= PROT_READ;
 401                 if (dataphdrp->p_flags & PF_W)
 402                         args->dat_prot |= PROT_WRITE;
 403                 if (dataphdrp->p_flags & PF_X)
 404                         args->dat_prot |= PROT_EXEC;
 405         }
 406 
 407         /*
 408          * If a auxvector will be required - reserve the space for
 409          * it now.  This may be increased by exec_args if there are
 410          * ISA-specific types (included in __KERN_NAUXV_IMPL).
 411          */
 412         if (hasauxv) {
 413                 /*
 414                  * If a AUX vector is being built - the base AUX
 415                  * entries are:
 416                  *
 417                  *      AT_BASE
 418                  *      AT_FLAGS
 419                  *      AT_PAGESZ
 420                  *      AT_SUN_AUXFLAGS
 421                  *      AT_SUN_HWCAP
 422                  *      AT_SUN_HWCAP2
 423                  *      AT_SUN_PLATFORM (added in stk_copyout)
 424                  *      AT_SUN_EXECNAME (added in stk_copyout)
 425                  *      AT_NULL
 426                  *
 427                  * total == 9
 428                  */
 429                 if (hasdy && hasu) {
 430                         /*
 431                          * Has PT_INTERP & PT_PHDR - the auxvectors that
 432                          * will be built are:
 433                          *
 434                          *      AT_PHDR
 435                          *      AT_PHENT
 436                          *      AT_PHNUM
 437                          *      AT_ENTRY
 438                          *      AT_LDDATA
 439                          *
 440                          * total = 5
 441                          */
 442                         args->auxsize = (9 + 5) * sizeof (aux_entry_t);
 443                 } else if (hasdy) {
 444                         /*
 445                          * Has PT_INTERP but no PT_PHDR
 446                          *
 447                          *      AT_EXECFD
 448                          *      AT_LDDATA
 449                          *
 450                          * total = 2
 451                          */
 452                         args->auxsize = (9 + 2) * sizeof (aux_entry_t);
 453                 } else {
 454                         args->auxsize = 9 * sizeof (aux_entry_t);
 455                 }
 456         } else {
 457                 args->auxsize = 0;
 458         }
 459 
 460         /*
 461          * If this binary is using an emulator, we need to add an
 462          * AT_SUN_EMULATOR aux entry.
 463          */
 464         if (args->emulator != NULL)
 465                 args->auxsize += sizeof (aux_entry_t);
 466 
 467         if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
 468                 branded = 1;
 469                 /*
 470                  * We will be adding 4 entries to the aux vectors.  One for
 471                  * the the brandname and 3 for the brand specific aux vectors.
 472                  */
 473                 args->auxsize += 4 * sizeof (aux_entry_t);
 474         }
 475 
 476         /* Hardware/Software capabilities */
 477         if (capphdr != NULL &&
 478             (capsize = capphdr->p_filesz) > 0 &&
 479             capsize <= 16 * sizeof (*cap)) {
 480                 int ncaps = capsize / sizeof (*cap);
 481                 Cap *cp;
 482 
 483                 cap = kmem_alloc(capsize, KM_SLEEP);
 484                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)cap,
 485                     capsize, (offset_t)capphdr->p_offset,
 486                     UIO_SYSSPACE, 0, (rlim64_t)0, CRED(), &resid)) != 0) {
 487                         uprintf("%s: Cannot read capabilities section\n",
 488                             exec_file);
 489                         goto out;
 490                 }
 491                 for (cp = cap; cp < cap + ncaps; cp++) {
 492                         if (cp->c_tag == CA_SUNW_SF_1 &&
 493                             (cp->c_un.c_val & SF1_SUNW_ADDR32)) {
 494                                 if (args->to_model == DATAMODEL_LP64)
 495                                         args->addr32 = 1;
 496                                 break;
 497                         }
 498                 }
 499         }
 500 
 501         aux = bigwad->elfargs;
 502         /*
 503          * Move args to the user's stack.
 504          * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
 505          */
 506         if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
 507                 if (error == -1) {
 508                         error = ENOEXEC;
 509                         goto bad;
 510                 }
 511                 goto out;
 512         }
 513         /* we're single threaded after this point */
 514 
 515         /*
 516          * If this is an ET_DYN executable (shared object),
 517          * determine its memory size so that mapelfexec() can load it.
 518          */
 519         if (ehdrp->e_type == ET_DYN)
 520                 len = elfsize(ehdrp, nphdrs, phdrbase, NULL);
 521         else
 522                 len = 0;
 523 
 524         dtrphdr = NULL;
 525 
 526         if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
 527             &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
 528             len, execsz, &brksize)) != 0)
 529                 goto bad;
 530 
 531         if (uphdr != NULL && dyphdr == NULL)
 532                 goto bad;
 533 
 534         if (dtrphdr != NULL && dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 535                 uprintf("%s: Bad DTrace phdr in %s\n", exec_file, exec_file);
 536                 goto bad;
 537         }
 538 
 539         if (dyphdr != NULL) {
 540                 size_t          len;
 541                 uintptr_t       lddata;
 542                 char            *p;
 543                 struct vnode    *nvp;
 544 
 545                 dlnsize = dyphdr->p_filesz;
 546 
 547                 if (dlnsize > MAXPATHLEN || dlnsize <= 0)
 548                         goto bad;
 549 
 550                 /*
 551                  * Read in "interpreter" pathname.
 552                  */
 553                 if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
 554                     (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
 555                     CRED(), &resid)) != 0) {
 556                         uprintf("%s: Cannot obtain interpreter pathname\n",
 557                             exec_file);
 558                         goto bad;
 559                 }
 560 
 561                 if (resid != 0 || dlnp[dlnsize - 1] != '\0')
 562                         goto bad;
 563 
 564                 /*
 565                  * Search for '$ORIGIN' token in interpreter path.
 566                  * If found, expand it.
 567                  */
 568                 for (p = dlnp; p = strchr(p, '$'); ) {
 569                         uint_t  len, curlen;
 570                         char    *_ptr;
 571 
 572                         if (strncmp(++p, ORIGIN_STR, ORIGIN_STR_SIZE))
 573                                 continue;
 574 
 575                         curlen = 0;
 576                         len = p - dlnp - 1;
 577                         if (len) {
 578                                 bcopy(dlnp, pathbufp, len);
 579                                 curlen += len;
 580                         }
 581                         if (_ptr = strrchr(args->pathname, '/')) {
 582                                 len = _ptr - args->pathname;
 583                                 if ((curlen + len) > MAXPATHLEN)
 584                                         break;
 585 
 586                                 bcopy(args->pathname, &pathbufp[curlen], len);
 587                                 curlen += len;
 588                         } else {
 589                                 /*
 590                                  * executable is a basename found in the
 591                                  * current directory.  So - just substitue
 592                                  * '.' for ORIGIN.
 593                                  */
 594                                 pathbufp[curlen] = '.';
 595                                 curlen++;
 596                         }
 597                         p += ORIGIN_STR_SIZE;
 598                         len = strlen(p);
 599 
 600                         if ((curlen + len) > MAXPATHLEN)
 601                                 break;
 602                         bcopy(p, &pathbufp[curlen], len);
 603                         curlen += len;
 604                         pathbufp[curlen++] = '\0';
 605                         bcopy(pathbufp, dlnp, curlen);
 606                 }
 607 
 608                 /*
 609                  * /usr/lib/ld.so.1 is known to be a symlink to /lib/ld.so.1
 610                  * (and /usr/lib/64/ld.so.1 is a symlink to /lib/64/ld.so.1).
 611                  * Just in case /usr is not mounted, change it now.
 612                  */
 613                 if (strcmp(dlnp, USR_LIB_RTLD) == 0)
 614                         dlnp += 4;
 615                 error = lookupname(dlnp, UIO_SYSSPACE, FOLLOW, NULLVPP, &nvp);
 616                 if (error && dlnp != bigwad->dl_name) {
 617                         /* new kernel, old user-level */
 618                         error = lookupname(dlnp -= 4, UIO_SYSSPACE, FOLLOW,
 619                             NULLVPP, &nvp);
 620                 }
 621                 if (error) {
 622                         uprintf("%s: Cannot find %s\n", exec_file, dlnp);
 623                         goto bad;
 624                 }
 625 
 626                 /*
 627                  * Setup the "aux" vector.
 628                  */
 629                 if (uphdr) {
 630                         if (ehdrp->e_type == ET_DYN) {
 631                                 /* don't use the first page */
 632                                 bigwad->exenv.ex_brkbase = (caddr_t)PAGESIZE;
 633                                 bigwad->exenv.ex_bssbase = (caddr_t)PAGESIZE;
 634                         } else {
 635                                 bigwad->exenv.ex_bssbase = bssbase;
 636                                 bigwad->exenv.ex_brkbase = brkbase;
 637                         }
 638                         bigwad->exenv.ex_brksize = brksize;
 639                         bigwad->exenv.ex_magic = elfmagic;
 640                         bigwad->exenv.ex_vp = vp;
 641                         setexecenv(&bigwad->exenv);
 642 
 643                         ADDAUX(aux, AT_PHDR, uphdr->p_vaddr + voffset)
 644                         ADDAUX(aux, AT_PHENT, ehdrp->e_phentsize)
 645                         ADDAUX(aux, AT_PHNUM, nphdrs)
 646                         ADDAUX(aux, AT_ENTRY, ehdrp->e_entry + voffset)
 647                 } else {
 648                         if ((error = execopen(&vp, &fd)) != 0) {
 649                                 VN_RELE(nvp);
 650                                 goto bad;
 651                         }
 652 
 653                         ADDAUX(aux, AT_EXECFD, fd)
 654                 }
 655 
 656                 if ((error = execpermissions(nvp, &bigwad->vattr, args)) != 0) {
 657                         VN_RELE(nvp);
 658                         uprintf("%s: Cannot execute %s\n", exec_file, dlnp);
 659                         goto bad;
 660                 }
 661 
 662                 /*
 663                  * Now obtain the ELF header along with the entire program
 664                  * header contained in "nvp".
 665                  */
 666                 kmem_free(phdrbase, phdrsize);
 667                 phdrbase = NULL;
 668                 if ((error = getelfhead(nvp, CRED(), ehdrp, &nshdrs,
 669                     &shstrndx, &nphdrs)) != 0 ||
 670                     (error = getelfphdr(nvp, CRED(), ehdrp, nphdrs, &phdrbase,
 671                     &phdrsize)) != 0) {
 672                         VN_RELE(nvp);
 673                         uprintf("%s: Cannot read %s\n", exec_file, dlnp);
 674                         goto bad;
 675                 }
 676 
 677                 /*
 678                  * Determine memory size of the "interpreter's" loadable
 679                  * sections.  This size is then used to obtain the virtual
 680                  * address of a hole, in the user's address space, large
 681                  * enough to map the "interpreter".
 682                  */
 683                 if ((len = elfsize(ehdrp, nphdrs, phdrbase, &lddata)) == 0) {
 684                         VN_RELE(nvp);
 685                         uprintf("%s: Nothing to load in %s\n", exec_file, dlnp);
 686                         goto bad;
 687                 }
 688 
 689                 dtrphdr = NULL;
 690 
 691                 error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
 692                     &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
 693                     execsz, NULL);
 694                 if (error || junk != NULL) {
 695                         VN_RELE(nvp);
 696                         uprintf("%s: Cannot map %s\n", exec_file, dlnp);
 697                         goto bad;
 698                 }
 699 
 700                 /*
 701                  * We use the DTrace program header to initialize the
 702                  * architecture-specific user per-LWP location. The dtrace
 703                  * fasttrap provider requires ready access to per-LWP scratch
 704                  * space. We assume that there is only one such program header
 705                  * in the interpreter.
 706                  */
 707                 if (dtrphdr != NULL &&
 708                     dtrace_safe_phdr(dtrphdr, args, voffset) != 0) {
 709                         VN_RELE(nvp);
 710                         uprintf("%s: Bad DTrace phdr in %s\n", exec_file, dlnp);
 711                         goto bad;
 712                 }
 713 
 714                 VN_RELE(nvp);
 715                 ADDAUX(aux, AT_SUN_LDDATA, voffset + lddata)
 716         }
 717 
 718         if (hasauxv) {
 719                 int auxf = AF_SUN_HWCAPVERIFY;
 720                 /*
 721                  * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
 722                  * exec_args()
 723                  */
 724                 ADDAUX(aux, AT_BASE, voffset)
 725                 ADDAUX(aux, AT_FLAGS, at_flags)
 726                 ADDAUX(aux, AT_PAGESZ, PAGESIZE)
 727                 /*
 728                  * Linker flags. (security)
 729                  * p_flag not yet set at this time.
 730                  * We rely on gexec() to provide us with the information.
 731                  * If the application is set-uid but this is not reflected
 732                  * in a mismatch between real/effective uids/gids, then
 733                  * don't treat this as a set-uid exec.  So we care about
 734                  * the EXECSETID_UGIDS flag but not the ...SETID flag.
 735                  */
 736                 if ((setid &= ~EXECSETID_SETID) != 0)
 737                         auxf |= AF_SUN_SETUGID;
 738 
 739                 /*
 740                  * If we're running a native process from within a branded
 741                  * zone under pfexec then we clear the AF_SUN_SETUGID flag so
 742                  * that the native ld.so.1 is able to link with the native
 743                  * libraries instead of using the brand libraries that are
 744                  * installed in the zone.  We only do this for processes
 745                  * which we trust because we see they are already running
 746                  * under pfexec (where uid != euid).  This prevents a
 747                  * malicious user within the zone from crafting a wrapper to
 748                  * run native suid commands with unsecure libraries interposed.
 749                  */
 750                 if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
 751                     (setid &= ~EXECSETID_SETID) != 0))
 752                         auxf &= ~AF_SUN_SETUGID;
 753 
 754                 /*
 755                  * Record the user addr of the auxflags aux vector entry
 756                  * since brands may optionally want to manipulate this field.
 757                  */
 758                 args->auxp_auxflags =
 759                     (char *)((char *)args->stackend +
 760                     ((char *)&aux->a_type -
 761                     (char *)bigwad->elfargs));
 762                 ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
 763                 /*
 764                  * Hardware capability flag word (performance hints)
 765                  * Used for choosing faster library routines.
 766                  * (Potentially different between 32-bit and 64-bit ABIs)
 767                  */
 768 #if defined(_LP64)
 769                 if (args->to_model == DATAMODEL_NATIVE) {
 770                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 771                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 772                 } else {
 773                         ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap32)
 774                         ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap32_2)
 775                 }
 776 #else
 777                 ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 778                 ADDAUX(aux, AT_SUN_HWCAP2, auxv_hwcap_2)
 779 #endif
 780                 if (branded) {
 781                         /*
 782                          * Reserve space for the brand-private aux vectors,
 783                          * and record the user addr of that space.
 784                          */
 785                         args->auxp_brand =
 786                             (char *)((char *)args->stackend +
 787                             ((char *)&aux->a_type -
 788                             (char *)bigwad->elfargs));
 789                         ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
 790                         ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
 791                         ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
 792                 }
 793 
 794                 ADDAUX(aux, AT_NULL, 0)
 795                 postfixsize = (char *)aux - (char *)bigwad->elfargs;
 796 
 797                 /*
 798                  * We make assumptions above when we determine how many aux
 799                  * vector entries we will be adding. However, if we have an
 800                  * invalid elf file, it is possible that mapelfexec might
 801                  * behave differently (but not return an error), in which case
 802                  * the number of aux entries we actually add will be different.
 803                  * We detect that now and error out.
 804                  */
 805                 if (postfixsize != args->auxsize) {
 806                         DTRACE_PROBE2(elfexec_badaux, int, postfixsize,
 807                             int, args->auxsize);
 808                         goto bad;
 809                 }
 810                 ASSERT(postfixsize <= __KERN_NAUXV_IMPL * sizeof (aux_entry_t));
 811         }
 812 
 813         /*
 814          * For the 64-bit kernel, the limit is big enough that rounding it up
 815          * to a page can overflow the 64-bit limit, so we check for btopr()
 816          * overflowing here by comparing it with the unrounded limit in pages.
 817          * If it hasn't overflowed, compare the exec size with the rounded up
 818          * limit in pages.  Otherwise, just compare with the unrounded limit.
 819          */
 820         limit = btop(p->p_vmem_ctl);
 821         roundlimit = btopr(p->p_vmem_ctl);
 822         if ((roundlimit > limit && *execsz > roundlimit) ||
 823             (roundlimit < limit && *execsz > limit)) {
 824                 mutex_enter(&p->p_lock);
 825                 (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p,
 826                     RCA_SAFE);
 827                 mutex_exit(&p->p_lock);
 828                 error = ENOMEM;
 829                 goto bad;
 830         }
 831 
 832         bzero(up->u_auxv, sizeof (up->u_auxv));
 833         if (postfixsize) {
 834                 int num_auxv;
 835 
 836                 /*
 837                  * Copy the aux vector to the user stack.
 838                  */
 839                 error = execpoststack(args, bigwad->elfargs, postfixsize);
 840                 if (error)
 841                         goto bad;
 842 
 843                 /*
 844                  * Copy auxv to the process's user structure for use by /proc.
 845                  * If this is a branded process, the brand's exec routine will
 846                  * copy it's private entries to the user structure later. It
 847                  * relies on the fact that the blank entries are at the end.
 848                  */
 849                 num_auxv = postfixsize / sizeof (aux_entry_t);
 850                 ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
 851                 aux = bigwad->elfargs;
 852                 for (i = 0; i < num_auxv; i++) {
 853                         up->u_auxv[i].a_type = aux[i].a_type;
 854                         up->u_auxv[i].a_un.a_val = (aux_val_t)aux[i].a_un.a_val;
 855                 }
 856         }
 857 
 858         /*
 859          * Pass back the starting address so we can set the program counter.
 860          */
 861         args->entry = (uintptr_t)(ehdrp->e_entry + voffset);
 862 
 863         if (!uphdr) {
 864                 if (ehdrp->e_type == ET_DYN) {
 865                         /*
 866                          * If we are executing a shared library which doesn't
 867                          * have a interpreter (probably ld.so.1) then
 868                          * we don't set the brkbase now.  Instead we
 869                          * delay it's setting until the first call
 870                          * via grow.c::brk().  This permits ld.so.1 to
 871                          * initialize brkbase to the tail of the executable it
 872                          * loads (which is where it needs to be).
 873                          */
 874                         bigwad->exenv.ex_brkbase = (caddr_t)0;
 875                         bigwad->exenv.ex_bssbase = (caddr_t)0;
 876                         bigwad->exenv.ex_brksize = 0;
 877                 } else {
 878                         bigwad->exenv.ex_brkbase = brkbase;
 879                         bigwad->exenv.ex_bssbase = bssbase;
 880                         bigwad->exenv.ex_brksize = brksize;
 881                 }
 882                 bigwad->exenv.ex_magic = elfmagic;
 883                 bigwad->exenv.ex_vp = vp;
 884                 setexecenv(&bigwad->exenv);
 885         }
 886 
 887         ASSERT(error == 0);
 888         goto out;
 889 
 890 bad:
 891         if (fd != -1)           /* did we open the a.out yet */
 892                 (void) execclose(fd);
 893 
 894         psignal(p, SIGKILL);
 895 
 896         if (error == 0)
 897                 error = ENOEXEC;
 898 out:
 899         if (phdrbase != NULL)
 900                 kmem_free(phdrbase, phdrsize);
 901         if (cap != NULL)
 902                 kmem_free(cap, capsize);
 903         kmem_free(bigwad, sizeof (struct bigwad));
 904         return (error);
 905 }
 906 
 907 /*
 908  * Compute the memory size requirement for the ELF file.
 909  */
 910 static size_t
 911 elfsize(Ehdr *ehdrp, int nphdrs, caddr_t phdrbase, uintptr_t *lddata)
 912 {
 913         size_t  len;
 914         Phdr    *phdrp = (Phdr *)phdrbase;
 915         int     hsize = ehdrp->e_phentsize;
 916         int     first = 1;
 917         int     dfirst = 1;     /* first data segment */
 918         uintptr_t loaddr = 0;
 919         uintptr_t hiaddr = 0;
 920         uintptr_t lo, hi;
 921         int     i;
 922 
 923         for (i = nphdrs; i > 0; i--) {
 924                 if (phdrp->p_type == PT_LOAD) {
 925                         lo = phdrp->p_vaddr;
 926                         hi = lo + phdrp->p_memsz;
 927                         if (first) {
 928                                 loaddr = lo;
 929                                 hiaddr = hi;
 930                                 first = 0;
 931                         } else {
 932                                 if (loaddr > lo)
 933                                         loaddr = lo;
 934                                 if (hiaddr < hi)
 935                                         hiaddr = hi;
 936                         }
 937 
 938                         /*
 939                          * save the address of the first data segment
 940                          * of a object - used for the AT_SUNW_LDDATA
 941                          * aux entry.
 942                          */
 943                         if ((lddata != NULL) && dfirst &&
 944                             (phdrp->p_flags & PF_W)) {
 945                                 *lddata = lo;
 946                                 dfirst = 0;
 947                         }
 948                 }
 949                 phdrp = (Phdr *)((caddr_t)phdrp + hsize);
 950         }
 951 
 952         len = hiaddr - (loaddr & PAGEMASK);
 953         len = roundup(len, PAGESIZE);
 954 
 955         return (len);
 956 }
 957 
 958 /*
 959  * Read in the ELF header and program header table.
 960  * SUSV3 requires:
 961  *      ENOEXEC File format is not recognized
 962  *      EINVAL  Format recognized but execution not supported
 963  */
 964 static int
 965 getelfhead(vnode_t *vp, cred_t *credp, Ehdr *ehdr, int *nshdrs, int *shstrndx,
 966     int *nphdrs)
 967 {
 968         int error;
 969         ssize_t resid;
 970 
 971         /*
 972          * We got here by the first two bytes in ident,
 973          * now read the entire ELF header.
 974          */
 975         if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)ehdr,
 976             sizeof (Ehdr), (offset_t)0, UIO_SYSSPACE, 0,
 977             (rlim64_t)0, credp, &resid)) != 0)
 978                 return (error);
 979 
 980         /*
 981          * Since a separate version is compiled for handling 32-bit and
 982          * 64-bit ELF executables on a 64-bit kernel, the 64-bit version
 983          * doesn't need to be able to deal with 32-bit ELF files.
 984          */
 985         if (resid != 0 ||
 986             ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
 987             ehdr->e_ident[EI_MAG3] != ELFMAG3)
 988                 return (ENOEXEC);
 989 
 990         if ((ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) ||
 991 #if defined(_ILP32) || defined(_ELF32_COMPAT)
 992             ehdr->e_ident[EI_CLASS] != ELFCLASS32 ||
 993 #else
 994             ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
 995 #endif
 996             !elfheadcheck(ehdr->e_ident[EI_DATA], ehdr->e_machine,
 997             ehdr->e_flags))
 998                 return (EINVAL);
 999 
1000         *nshdrs = ehdr->e_shnum;
1001         *shstrndx = ehdr->e_shstrndx;
1002         *nphdrs = ehdr->e_phnum;
1003 
1004         /*
1005          * If e_shnum, e_shstrndx, or e_phnum is its sentinel value, we need
1006          * to read in the section header at index zero to acces the true
1007          * values for those fields.
1008          */
1009         if ((*nshdrs == 0 && ehdr->e_shoff != 0) ||
1010             *shstrndx == SHN_XINDEX || *nphdrs == PN_XNUM) {
1011                 Shdr shdr;
1012 
1013                 if (ehdr->e_shoff == 0)
1014                         return (EINVAL);
1015 
1016                 if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&shdr,
1017                     sizeof (shdr), (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0,
1018                     (rlim64_t)0, credp, &resid)) != 0)
1019                         return (error);
1020 
1021                 if (*nshdrs == 0)
1022                         *nshdrs = shdr.sh_size;
1023                 if (*shstrndx == SHN_XINDEX)
1024                         *shstrndx = shdr.sh_link;
1025                 if (*nphdrs == PN_XNUM && shdr.sh_info != 0)
1026                         *nphdrs = shdr.sh_info;
1027         }
1028 
1029         return (0);
1030 }
1031 
1032 #ifdef _ELF32_COMPAT
1033 extern size_t elf_nphdr_max;
1034 #else
1035 size_t elf_nphdr_max = 1000;
1036 #endif
1037 
1038 static int
1039 getelfphdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr, int nphdrs,
1040     caddr_t *phbasep, ssize_t *phsizep)
1041 {
1042         ssize_t resid, minsize;
1043         int err;
1044 
1045         /*
1046          * Since we're going to be using e_phentsize to iterate down the
1047          * array of program headers, it must be 8-byte aligned or else
1048          * a we might cause a misaligned access. We use all members through
1049          * p_flags on 32-bit ELF files and p_memsz on 64-bit ELF files so
1050          * e_phentsize must be at least large enough to include those
1051          * members.
1052          */
1053 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1054         minsize = offsetof(Phdr, p_flags) + sizeof (((Phdr *)NULL)->p_flags);
1055 #else
1056         minsize = offsetof(Phdr, p_memsz) + sizeof (((Phdr *)NULL)->p_memsz);
1057 #endif
1058         if (ehdr->e_phentsize < minsize || (ehdr->e_phentsize & 3))
1059                 return (EINVAL);
1060 
1061         *phsizep = nphdrs * ehdr->e_phentsize;
1062 
1063         if (*phsizep > sizeof (Phdr) * elf_nphdr_max) {
1064                 if ((*phbasep = kmem_alloc(*phsizep, KM_NOSLEEP)) == NULL)
1065                         return (ENOMEM);
1066         } else {
1067                 *phbasep = kmem_alloc(*phsizep, KM_SLEEP);
1068         }
1069 
1070         if ((err = vn_rdwr(UIO_READ, vp, *phbasep, *phsizep,
1071             (offset_t)ehdr->e_phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1072             credp, &resid)) != 0) {
1073                 kmem_free(*phbasep, *phsizep);
1074                 *phbasep = NULL;
1075                 return (err);
1076         }
1077 
1078         return (0);
1079 }
1080 
1081 #ifdef _ELF32_COMPAT
1082 extern size_t elf_nshdr_max;
1083 extern size_t elf_shstrtab_max;
1084 #else
1085 size_t elf_nshdr_max = 10000;
1086 size_t elf_shstrtab_max = 100 * 1024;
1087 #endif
1088 
1089 
1090 static int
1091 getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
1092     int nshdrs, int shstrndx, caddr_t *shbasep, ssize_t *shsizep,
1093     char **shstrbasep, ssize_t *shstrsizep)
1094 {
1095         ssize_t resid, minsize;
1096         int err;
1097         Shdr *shdr;
1098 
1099         /*
1100          * Since we're going to be using e_shentsize to iterate down the
1101          * array of section headers, it must be 8-byte aligned or else
1102          * a we might cause a misaligned access. We use all members through
1103          * sh_entsize (on both 32- and 64-bit ELF files) so e_shentsize
1104          * must be at least large enough to include that member. The index
1105          * of the string table section must also be valid.
1106          */
1107         minsize = offsetof(Shdr, sh_entsize) + sizeof (shdr->sh_entsize);
1108         if (ehdr->e_shentsize < minsize || (ehdr->e_shentsize & 3) ||
1109             shstrndx >= nshdrs)
1110                 return (EINVAL);
1111 
1112         *shsizep = nshdrs * ehdr->e_shentsize;
1113 
1114         if (*shsizep > sizeof (Shdr) * elf_nshdr_max) {
1115                 if ((*shbasep = kmem_alloc(*shsizep, KM_NOSLEEP)) == NULL)
1116                         return (ENOMEM);
1117         } else {
1118                 *shbasep = kmem_alloc(*shsizep, KM_SLEEP);
1119         }
1120 
1121         if ((err = vn_rdwr(UIO_READ, vp, *shbasep, *shsizep,
1122             (offset_t)ehdr->e_shoff, UIO_SYSSPACE, 0, (rlim64_t)0,
1123             credp, &resid)) != 0) {
1124                 kmem_free(*shbasep, *shsizep);
1125                 return (err);
1126         }
1127 
1128         /*
1129          * Pull the section string table out of the vnode; fail if the size
1130          * is zero.
1131          */
1132         shdr = (Shdr *)(*shbasep + shstrndx * ehdr->e_shentsize);
1133         if ((*shstrsizep = shdr->sh_size) == 0) {
1134                 kmem_free(*shbasep, *shsizep);
1135                 return (EINVAL);
1136         }
1137 
1138         if (*shstrsizep > elf_shstrtab_max) {
1139                 if ((*shstrbasep = kmem_alloc(*shstrsizep,
1140                     KM_NOSLEEP)) == NULL) {
1141                         kmem_free(*shbasep, *shsizep);
1142                         return (ENOMEM);
1143                 }
1144         } else {
1145                 *shstrbasep = kmem_alloc(*shstrsizep, KM_SLEEP);
1146         }
1147 
1148         if ((err = vn_rdwr(UIO_READ, vp, *shstrbasep, *shstrsizep,
1149             (offset_t)shdr->sh_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
1150             credp, &resid)) != 0) {
1151                 kmem_free(*shbasep, *shsizep);
1152                 kmem_free(*shstrbasep, *shstrsizep);
1153                 return (err);
1154         }
1155 
1156         /*
1157          * Make sure the strtab is null-terminated to make sure we
1158          * don't run off the end of the table.
1159          */
1160         (*shstrbasep)[*shstrsizep - 1] = '\0';
1161 
1162         return (0);
1163 }
1164 
1165 static int
1166 mapelfexec(
1167         vnode_t *vp,
1168         Ehdr *ehdr,
1169         int nphdrs,
1170         caddr_t phdrbase,
1171         Phdr **uphdr,
1172         Phdr **dyphdr,
1173         Phdr **stphdr,
1174         Phdr **dtphdr,
1175         Phdr *dataphdrp,
1176         caddr_t *bssbase,
1177         caddr_t *brkbase,
1178         intptr_t *voffset,
1179         intptr_t *minaddr,
1180         size_t len,
1181         long *execsz,
1182         size_t *brksize)
1183 {
1184         Phdr *phdr;
1185         int i, prot, error;
1186         caddr_t addr = NULL;
1187         size_t zfodsz;
1188         int ptload = 0;
1189         int page;
1190         off_t offset;
1191         int hsize = ehdr->e_phentsize;
1192         caddr_t mintmp = (caddr_t)-1;
1193         extern int use_brk_lpg;
1194 
1195         if (ehdr->e_type == ET_DYN) {
1196                 /*
1197                  * Obtain the virtual address of a hole in the
1198                  * address space to map the "interpreter".
1199                  */
1200                 map_addr(&addr, len, (offset_t)0, 1, 0);
1201                 if (addr == NULL)
1202                         return (ENOMEM);
1203                 *voffset = (intptr_t)addr;
1204 
1205                 /*
1206                  * Calculate the minimum vaddr so it can be subtracted out.
1207                  * According to the ELF specification, since PT_LOAD sections
1208                  * must be sorted by increasing p_vaddr values, this is
1209                  * guaranteed to be the first PT_LOAD section.
1210                  */
1211                 phdr = (Phdr *)phdrbase;
1212                 for (i = nphdrs; i > 0; i--) {
1213                         if (phdr->p_type == PT_LOAD) {
1214                                 *voffset -= (uintptr_t)phdr->p_vaddr;
1215                                 break;
1216                         }
1217                         phdr = (Phdr *)((caddr_t)phdr + hsize);
1218                 }
1219 
1220         } else {
1221                 *voffset = 0;
1222         }
1223         phdr = (Phdr *)phdrbase;
1224         for (i = nphdrs; i > 0; i--) {
1225                 switch (phdr->p_type) {
1226                 case PT_LOAD:
1227                         if ((*dyphdr != NULL) && (*uphdr == NULL))
1228                                 return (0);
1229 
1230                         ptload = 1;
1231                         prot = PROT_USER;
1232                         if (phdr->p_flags & PF_R)
1233                                 prot |= PROT_READ;
1234                         if (phdr->p_flags & PF_W)
1235                                 prot |= PROT_WRITE;
1236                         if (phdr->p_flags & PF_X)
1237                                 prot |= PROT_EXEC;
1238 
1239                         addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
1240 
1241                         /*
1242                          * Keep track of the segment with the lowest starting
1243                          * address.
1244                          */
1245                         if (addr < mintmp)
1246                                 mintmp = addr;
1247 
1248                         zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
1249 
1250                         offset = phdr->p_offset;
1251                         if (((uintptr_t)offset & PAGEOFFSET) ==
1252                             ((uintptr_t)addr & PAGEOFFSET) &&
1253                             (!(vp->v_flag & VNOMAP))) {
1254                                 page = 1;
1255                         } else {
1256                                 page = 0;
1257                         }
1258 
1259                         /*
1260                          * Set the heap pagesize for OOB when the bss size
1261                          * is known and use_brk_lpg is not 0.
1262                          */
1263                         if (brksize != NULL && use_brk_lpg &&
1264                             zfodsz != 0 && phdr == dataphdrp &&
1265                             (prot & PROT_WRITE)) {
1266                                 size_t tlen = P2NPHASE((uintptr_t)addr +
1267                                     phdr->p_filesz, PAGESIZE);
1268 
1269                                 if (zfodsz > tlen) {
1270                                         curproc->p_brkpageszc =
1271                                             page_szc(map_pgsz(MAPPGSZ_HEAP,
1272                                             curproc, addr + phdr->p_filesz +
1273                                             tlen, zfodsz - tlen, 0));
1274                                 }
1275                         }
1276 
1277                         if (curproc->p_brkpageszc != 0 && phdr == dataphdrp &&
1278                             (prot & PROT_WRITE)) {
1279                                 uint_t  szc = curproc->p_brkpageszc;
1280                                 size_t pgsz = page_get_pagesize(szc);
1281                                 caddr_t ebss = addr + phdr->p_memsz;
1282                                 size_t extra_zfodsz;
1283 
1284                                 ASSERT(pgsz > PAGESIZE);
1285 
1286                                 extra_zfodsz = P2NPHASE((uintptr_t)ebss, pgsz);
1287 
1288                                 if (error = execmap(vp, addr, phdr->p_filesz,
1289                                     zfodsz + extra_zfodsz, phdr->p_offset,
1290                                     prot, page, szc))
1291                                         goto bad;
1292                                 if (brksize != NULL)
1293                                         *brksize = extra_zfodsz;
1294                         } else {
1295                                 if (error = execmap(vp, addr, phdr->p_filesz,
1296                                     zfodsz, phdr->p_offset, prot, page, 0))
1297                                         goto bad;
1298                         }
1299 
1300                         if (bssbase != NULL && addr >= *bssbase &&
1301                             phdr == dataphdrp) {
1302                                 *bssbase = addr + phdr->p_filesz;
1303                         }
1304                         if (brkbase != NULL && addr >= *brkbase) {
1305                                 *brkbase = addr + phdr->p_memsz;
1306                         }
1307 
1308                         *execsz += btopr(phdr->p_memsz);
1309                         break;
1310 
1311                 case PT_INTERP:
1312                         if (ptload)
1313                                 goto bad;
1314                         *dyphdr = phdr;
1315                         break;
1316 
1317                 case PT_SHLIB:
1318                         *stphdr = phdr;
1319                         break;
1320 
1321                 case PT_PHDR:
1322                         if (ptload)
1323                                 goto bad;
1324                         *uphdr = phdr;
1325                         break;
1326 
1327                 case PT_NULL:
1328                 case PT_DYNAMIC:
1329                 case PT_NOTE:
1330                         break;
1331 
1332                 case PT_SUNWDTRACE:
1333                         if (dtphdr != NULL)
1334                                 *dtphdr = phdr;
1335                         break;
1336 
1337                 default:
1338                         break;
1339                 }
1340                 phdr = (Phdr *)((caddr_t)phdr + hsize);
1341         }
1342 
1343         if (minaddr != NULL) {
1344                 ASSERT(mintmp != (caddr_t)-1);
1345                 *minaddr = (intptr_t)mintmp;
1346         }
1347 
1348         return (0);
1349 bad:
1350         if (error == 0)
1351                 error = EINVAL;
1352         return (error);
1353 }
1354 
1355 int
1356 elfnote(vnode_t *vp, offset_t *offsetp, int type, int descsz, void *desc,
1357     rlim64_t rlimit, cred_t *credp)
1358 {
1359         Note note;
1360         int error;
1361 
1362         bzero(&note, sizeof (note));
1363         bcopy("CORE", note.name, 4);
1364         note.nhdr.n_type = type;
1365         /*
1366          * The System V ABI states that n_namesz must be the length of the
1367          * string that follows the Nhdr structure including the terminating
1368          * null. The ABI also specifies that sufficient padding should be
1369          * included so that the description that follows the name string
1370          * begins on a 4- or 8-byte boundary for 32- and 64-bit binaries
1371          * respectively. However, since this change was not made correctly
1372          * at the time of the 64-bit port, both 32- and 64-bit binaries
1373          * descriptions are only guaranteed to begin on a 4-byte boundary.
1374          */
1375         note.nhdr.n_namesz = 5;
1376         note.nhdr.n_descsz = roundup(descsz, sizeof (Word));
1377 
1378         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, &note,
1379             sizeof (note), rlimit, credp))
1380                 return (error);
1381 
1382         *offsetp += sizeof (note);
1383 
1384         if (error = core_write(vp, UIO_SYSSPACE, *offsetp, desc,
1385             note.nhdr.n_descsz, rlimit, credp))
1386                 return (error);
1387 
1388         *offsetp += note.nhdr.n_descsz;
1389         return (0);
1390 }
1391 
1392 /*
1393  * Copy the section data from one vnode to the section of another vnode.
1394  */
1395 static void
1396 copy_scn(Shdr *src, vnode_t *src_vp, Shdr *dst, vnode_t *dst_vp, Off *doffset,
1397     void *buf, size_t size, cred_t *credp, rlim64_t rlimit)
1398 {
1399         ssize_t resid;
1400         size_t len, n = src->sh_size;
1401         offset_t off = 0;
1402 
1403         while (n != 0) {
1404                 len = MIN(size, n);
1405                 if (vn_rdwr(UIO_READ, src_vp, buf, len, src->sh_offset + off,
1406                     UIO_SYSSPACE, 0, (rlim64_t)0, credp, &resid) != 0 ||
1407                     resid >= len ||
1408                     core_write(dst_vp, UIO_SYSSPACE, *doffset + off,
1409                     buf, len - resid, rlimit, credp) != 0) {
1410                         dst->sh_size = 0;
1411                         dst->sh_offset = 0;
1412                         return;
1413                 }
1414 
1415                 ASSERT(n >= len - resid);
1416 
1417                 n -= len - resid;
1418                 off += len - resid;
1419         }
1420 
1421         *doffset += src->sh_size;
1422 }
1423 
1424 #ifdef _ELF32_COMPAT
1425 extern size_t elf_datasz_max;
1426 #else
1427 size_t elf_datasz_max = 1 * 1024 * 1024;
1428 #endif
1429 
1430 /*
1431  * This function processes mappings that correspond to load objects to
1432  * examine their respective sections for elfcore(). It's called once with
1433  * v set to NULL to count the number of sections that we're going to need
1434  * and then again with v set to some allocated buffer that we fill in with
1435  * all the section data.
1436  */
1437 static int
1438 process_scns(core_content_t content, proc_t *p, cred_t *credp, vnode_t *vp,
1439     Shdr *v, int nv, rlim64_t rlimit, Off *doffsetp, int *nshdrsp)
1440 {
1441         vnode_t *lastvp = NULL;
1442         struct seg *seg;
1443         int i, j;
1444         void *data = NULL;
1445         size_t datasz = 0;
1446         shstrtab_t shstrtab;
1447         struct as *as = p->p_as;
1448         int error = 0;
1449 
1450         if (v != NULL)
1451                 shstrtab_init(&shstrtab);
1452 
1453         i = 1;
1454         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1455                 uint_t prot;
1456                 vnode_t *mvp;
1457                 void *tmp = NULL;
1458                 caddr_t saddr = seg->s_base;
1459                 caddr_t naddr;
1460                 caddr_t eaddr;
1461                 size_t segsize;
1462 
1463                 Ehdr ehdr;
1464                 int nshdrs, shstrndx, nphdrs;
1465                 caddr_t shbase;
1466                 ssize_t shsize;
1467                 char *shstrbase;
1468                 ssize_t shstrsize;
1469 
1470                 Shdr *shdr;
1471                 const char *name;
1472                 size_t sz;
1473                 uintptr_t off;
1474 
1475                 int ctf_ndx = 0;
1476                 int symtab_ndx = 0;
1477 
1478                 /*
1479                  * Since we're just looking for text segments of load
1480                  * objects, we only care about the protection bits; we don't
1481                  * care about the actual size of the segment so we use the
1482                  * reserved size. If the segment's size is zero, there's
1483                  * something fishy going on so we ignore this segment.
1484                  */
1485                 if (seg->s_ops != &segvn_ops ||
1486                     SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1487                     mvp == lastvp || mvp == NULL || mvp->v_type != VREG ||
1488                     (segsize = pr_getsegsize(seg, 1)) == 0)
1489                         continue;
1490 
1491                 eaddr = saddr + segsize;
1492                 prot = pr_getprot(seg, 1, &tmp, &saddr, &naddr, eaddr);
1493                 pr_getprot_done(&tmp);
1494 
1495                 /*
1496                  * Skip this segment unless the protection bits look like
1497                  * what we'd expect for a text segment.
1498                  */
1499                 if ((prot & (PROT_WRITE | PROT_EXEC)) != PROT_EXEC)
1500                         continue;
1501 
1502                 if (getelfhead(mvp, credp, &ehdr, &nshdrs, &shstrndx,
1503                     &nphdrs) != 0 ||
1504                     getelfshdr(mvp, credp, &ehdr, nshdrs, shstrndx,
1505                     &shbase, &shsize, &shstrbase, &shstrsize) != 0)
1506                         continue;
1507 
1508                 off = ehdr.e_shentsize;
1509                 for (j = 1; j < nshdrs; j++, off += ehdr.e_shentsize) {
1510                         Shdr *symtab = NULL, *strtab;
1511 
1512                         shdr = (Shdr *)(shbase + off);
1513 
1514                         if (shdr->sh_name >= shstrsize)
1515                                 continue;
1516 
1517                         name = shstrbase + shdr->sh_name;
1518 
1519                         if (strcmp(name, shstrtab_data[STR_CTF]) == 0) {
1520                                 if ((content & CC_CONTENT_CTF) == 0 ||
1521                                     ctf_ndx != 0)
1522                                         continue;
1523 
1524                                 if (shdr->sh_link > 0 &&
1525                                     shdr->sh_link < nshdrs) {
1526                                         symtab = (Shdr *)(shbase +
1527                                             shdr->sh_link * ehdr.e_shentsize);
1528                                 }
1529 
1530                                 if (v != NULL && i < nv - 1) {
1531                                         if (shdr->sh_size > datasz &&
1532                                             shdr->sh_size <= elf_datasz_max) {
1533                                                 if (data != NULL)
1534                                                         kmem_free(data, datasz);
1535 
1536                                                 datasz = shdr->sh_size;
1537                                                 data = kmem_alloc(datasz,
1538                                                     KM_SLEEP);
1539                                         }
1540 
1541                                         v[i].sh_name = shstrtab_ndx(&shstrtab,
1542                                             STR_CTF);
1543                                         v[i].sh_addr = (Addr)(uintptr_t)saddr;
1544                                         v[i].sh_type = SHT_PROGBITS;
1545                                         v[i].sh_addralign = 4;
1546                                         *doffsetp = roundup(*doffsetp,
1547                                             v[i].sh_addralign);
1548                                         v[i].sh_offset = *doffsetp;
1549                                         v[i].sh_size = shdr->sh_size;
1550                                         if (symtab == NULL)  {
1551                                                 v[i].sh_link = 0;
1552                                         } else if (symtab->sh_type ==
1553                                             SHT_SYMTAB &&
1554                                             symtab_ndx != 0) {
1555                                                 v[i].sh_link =
1556                                                     symtab_ndx;
1557                                         } else {
1558                                                 v[i].sh_link = i + 1;
1559                                         }
1560 
1561                                         copy_scn(shdr, mvp, &v[i], vp,
1562                                             doffsetp, data, datasz, credp,
1563                                             rlimit);
1564                                 }
1565 
1566                                 ctf_ndx = i++;
1567 
1568                                 /*
1569                                  * We've already dumped the symtab.
1570                                  */
1571                                 if (symtab != NULL &&
1572                                     symtab->sh_type == SHT_SYMTAB &&
1573                                     symtab_ndx != 0)
1574                                         continue;
1575 
1576                         } else if (strcmp(name,
1577                             shstrtab_data[STR_SYMTAB]) == 0) {
1578                                 if ((content & CC_CONTENT_SYMTAB) == 0 ||
1579                                     symtab != 0)
1580                                         continue;
1581 
1582                                 symtab = shdr;
1583                         }
1584 
1585                         if (symtab != NULL) {
1586                                 if ((symtab->sh_type != SHT_DYNSYM &&
1587                                     symtab->sh_type != SHT_SYMTAB) ||
1588                                     symtab->sh_link == 0 ||
1589                                     symtab->sh_link >= nshdrs)
1590                                         continue;
1591 
1592                                 strtab = (Shdr *)(shbase +
1593                                     symtab->sh_link * ehdr.e_shentsize);
1594 
1595                                 if (strtab->sh_type != SHT_STRTAB)
1596                                         continue;
1597 
1598                                 if (v != NULL && i < nv - 2) {
1599                                         sz = MAX(symtab->sh_size,
1600                                             strtab->sh_size);
1601                                         if (sz > datasz &&
1602                                             sz <= elf_datasz_max) {
1603                                                 if (data != NULL)
1604                                                         kmem_free(data, datasz);
1605 
1606                                                 datasz = sz;
1607                                                 data = kmem_alloc(datasz,
1608                                                     KM_SLEEP);
1609                                         }
1610 
1611                                         if (symtab->sh_type == SHT_DYNSYM) {
1612                                                 v[i].sh_name = shstrtab_ndx(
1613                                                     &shstrtab, STR_DYNSYM);
1614                                                 v[i + 1].sh_name = shstrtab_ndx(
1615                                                     &shstrtab, STR_DYNSTR);
1616                                         } else {
1617                                                 v[i].sh_name = shstrtab_ndx(
1618                                                     &shstrtab, STR_SYMTAB);
1619                                                 v[i + 1].sh_name = shstrtab_ndx(
1620                                                     &shstrtab, STR_STRTAB);
1621                                         }
1622 
1623                                         v[i].sh_type = symtab->sh_type;
1624                                         v[i].sh_addr = symtab->sh_addr;
1625                                         if (ehdr.e_type == ET_DYN ||
1626                                             v[i].sh_addr == 0)
1627                                                 v[i].sh_addr +=
1628                                                     (Addr)(uintptr_t)saddr;
1629                                         v[i].sh_addralign =
1630                                             symtab->sh_addralign;
1631                                         *doffsetp = roundup(*doffsetp,
1632                                             v[i].sh_addralign);
1633                                         v[i].sh_offset = *doffsetp;
1634                                         v[i].sh_size = symtab->sh_size;
1635                                         v[i].sh_link = i + 1;
1636                                         v[i].sh_entsize = symtab->sh_entsize;
1637                                         v[i].sh_info = symtab->sh_info;
1638 
1639                                         copy_scn(symtab, mvp, &v[i], vp,
1640                                             doffsetp, data, datasz, credp,
1641                                             rlimit);
1642 
1643                                         v[i + 1].sh_type = SHT_STRTAB;
1644                                         v[i + 1].sh_flags = SHF_STRINGS;
1645                                         v[i + 1].sh_addr = symtab->sh_addr;
1646                                         if (ehdr.e_type == ET_DYN ||
1647                                             v[i + 1].sh_addr == 0)
1648                                                 v[i + 1].sh_addr +=
1649                                                     (Addr)(uintptr_t)saddr;
1650                                         v[i + 1].sh_addralign =
1651                                             strtab->sh_addralign;
1652                                         *doffsetp = roundup(*doffsetp,
1653                                             v[i + 1].sh_addralign);
1654                                         v[i + 1].sh_offset = *doffsetp;
1655                                         v[i + 1].sh_size = strtab->sh_size;
1656 
1657                                         copy_scn(strtab, mvp, &v[i + 1], vp,
1658                                             doffsetp, data, datasz, credp,
1659                                             rlimit);
1660                                 }
1661 
1662                                 if (symtab->sh_type == SHT_SYMTAB)
1663                                         symtab_ndx = i;
1664                                 i += 2;
1665                         }
1666                 }
1667 
1668                 kmem_free(shstrbase, shstrsize);
1669                 kmem_free(shbase, shsize);
1670 
1671                 lastvp = mvp;
1672         }
1673 
1674         if (v == NULL) {
1675                 if (i == 1)
1676                         *nshdrsp = 0;
1677                 else
1678                         *nshdrsp = i + 1;
1679                 goto done;
1680         }
1681 
1682         if (i != nv - 1) {
1683                 cmn_err(CE_WARN, "elfcore: core dump failed for "
1684                     "process %d; address space is changing", p->p_pid);
1685                 error = EIO;
1686                 goto done;
1687         }
1688 
1689         v[i].sh_name = shstrtab_ndx(&shstrtab, STR_SHSTRTAB);
1690         v[i].sh_size = shstrtab_size(&shstrtab);
1691         v[i].sh_addralign = 1;
1692         *doffsetp = roundup(*doffsetp, v[i].sh_addralign);
1693         v[i].sh_offset = *doffsetp;
1694         v[i].sh_flags = SHF_STRINGS;
1695         v[i].sh_type = SHT_STRTAB;
1696 
1697         if (v[i].sh_size > datasz) {
1698                 if (data != NULL)
1699                         kmem_free(data, datasz);
1700 
1701                 datasz = v[i].sh_size;
1702                 data = kmem_alloc(datasz,
1703                     KM_SLEEP);
1704         }
1705 
1706         shstrtab_dump(&shstrtab, data);
1707 
1708         if ((error = core_write(vp, UIO_SYSSPACE, *doffsetp,
1709             data, v[i].sh_size, rlimit, credp)) != 0)
1710                 goto done;
1711 
1712         *doffsetp += v[i].sh_size;
1713 
1714 done:
1715         if (data != NULL)
1716                 kmem_free(data, datasz);
1717 
1718         return (error);
1719 }
1720 
1721 int
1722 elfcore(vnode_t *vp, proc_t *p, cred_t *credp, rlim64_t rlimit, int sig,
1723     core_content_t content)
1724 {
1725         offset_t poffset, soffset;
1726         Off doffset;
1727         int error, i, nphdrs, nshdrs;
1728         int overflow = 0;
1729         struct seg *seg;
1730         struct as *as = p->p_as;
1731         union {
1732                 Ehdr ehdr;
1733                 Phdr phdr[1];
1734                 Shdr shdr[1];
1735         } *bigwad;
1736         size_t bigsize;
1737         size_t phdrsz, shdrsz;
1738         Ehdr *ehdr;
1739         Phdr *v;
1740         caddr_t brkbase;
1741         size_t brksize;
1742         caddr_t stkbase;
1743         size_t stksize;
1744         int ntries = 0;
1745         klwp_t *lwp = ttolwp(curthread);
1746 
1747 top:
1748         /*
1749          * Make sure we have everything we need (registers, etc.).
1750          * All other lwps have already stopped and are in an orderly state.
1751          */
1752         ASSERT(p == ttoproc(curthread));
1753         prstop(0, 0);
1754 
1755         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1756         nphdrs = prnsegs(as, 0) + 2;            /* two CORE note sections */
1757 
1758         /*
1759          * Count the number of section headers we're going to need.
1760          */
1761         nshdrs = 0;
1762         if (content & (CC_CONTENT_CTF | CC_CONTENT_SYMTAB)) {
1763                 (void) process_scns(content, p, credp, NULL, NULL, NULL, 0,
1764                     NULL, &nshdrs);
1765         }
1766         AS_LOCK_EXIT(as, &as->a_lock);
1767 
1768         ASSERT(nshdrs == 0 || nshdrs > 1);
1769 
1770         /*
1771          * The core file contents may required zero section headers, but if
1772          * we overflow the 16 bits allotted to the program header count in
1773          * the ELF header, we'll need that program header at index zero.
1774          */
1775         if (nshdrs == 0 && nphdrs >= PN_XNUM)
1776                 nshdrs = 1;
1777 
1778         phdrsz = nphdrs * sizeof (Phdr);
1779         shdrsz = nshdrs * sizeof (Shdr);
1780 
1781         bigsize = MAX(sizeof (*bigwad), MAX(phdrsz, shdrsz));
1782         bigwad = kmem_alloc(bigsize, KM_SLEEP);
1783 
1784         ehdr = &bigwad->ehdr;
1785         bzero(ehdr, sizeof (*ehdr));
1786 
1787         ehdr->e_ident[EI_MAG0] = ELFMAG0;
1788         ehdr->e_ident[EI_MAG1] = ELFMAG1;
1789         ehdr->e_ident[EI_MAG2] = ELFMAG2;
1790         ehdr->e_ident[EI_MAG3] = ELFMAG3;
1791         ehdr->e_ident[EI_CLASS] = ELFCLASS;
1792         ehdr->e_type = ET_CORE;
1793 
1794 #if !defined(_LP64) || defined(_ELF32_COMPAT)
1795 
1796 #if defined(__sparc)
1797         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1798         ehdr->e_machine = EM_SPARC;
1799 #elif defined(__i386) || defined(__i386_COMPAT)
1800         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1801         ehdr->e_machine = EM_386;
1802 #else
1803 #error "no recognized machine type is defined"
1804 #endif
1805 
1806 #else   /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1807 
1808 #if defined(__sparc)
1809         ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
1810         ehdr->e_machine = EM_SPARCV9;
1811 #elif defined(__amd64)
1812         ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1813         ehdr->e_machine = EM_AMD64;
1814 #else
1815 #error "no recognized 64-bit machine type is defined"
1816 #endif
1817 
1818 #endif  /* !defined(_LP64) || defined(_ELF32_COMPAT) */
1819 
1820         /*
1821          * If the count of program headers or section headers or the index
1822          * of the section string table can't fit in the mere 16 bits
1823          * shortsightedly allotted to them in the ELF header, we use the
1824          * extended formats and put the real values in the section header
1825          * as index 0.
1826          */
1827         ehdr->e_version = EV_CURRENT;
1828         ehdr->e_ehsize = sizeof (Ehdr);
1829 
1830         if (nphdrs >= PN_XNUM)
1831                 ehdr->e_phnum = PN_XNUM;
1832         else
1833                 ehdr->e_phnum = (unsigned short)nphdrs;
1834 
1835         ehdr->e_phoff = sizeof (Ehdr);
1836         ehdr->e_phentsize = sizeof (Phdr);
1837 
1838         if (nshdrs > 0) {
1839                 if (nshdrs >= SHN_LORESERVE)
1840                         ehdr->e_shnum = 0;
1841                 else
1842                         ehdr->e_shnum = (unsigned short)nshdrs;
1843 
1844                 if (nshdrs - 1 >= SHN_LORESERVE)
1845                         ehdr->e_shstrndx = SHN_XINDEX;
1846                 else
1847                         ehdr->e_shstrndx = (unsigned short)(nshdrs - 1);
1848 
1849                 ehdr->e_shoff = ehdr->e_phoff + ehdr->e_phentsize * nphdrs;
1850                 ehdr->e_shentsize = sizeof (Shdr);
1851         }
1852 
1853         if (error = core_write(vp, UIO_SYSSPACE, (offset_t)0, ehdr,
1854             sizeof (Ehdr), rlimit, credp))
1855                 goto done;
1856 
1857         poffset = sizeof (Ehdr);
1858         soffset = sizeof (Ehdr) + phdrsz;
1859         doffset = sizeof (Ehdr) + phdrsz + shdrsz;
1860 
1861         v = &bigwad->phdr[0];
1862         bzero(v, phdrsz);
1863 
1864         setup_old_note_header(&v[0], p);
1865         v[0].p_offset = doffset = roundup(doffset, sizeof (Word));
1866         doffset += v[0].p_filesz;
1867 
1868         setup_note_header(&v[1], p);
1869         v[1].p_offset = doffset = roundup(doffset, sizeof (Word));
1870         doffset += v[1].p_filesz;
1871 
1872         mutex_enter(&p->p_lock);
1873 
1874         brkbase = p->p_brkbase;
1875         brksize = p->p_brksize;
1876 
1877         stkbase = p->p_usrstack - p->p_stksize;
1878         stksize = p->p_stksize;
1879 
1880         mutex_exit(&p->p_lock);
1881 
1882         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
1883         i = 2;
1884         for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1885                 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1886                 caddr_t saddr, naddr;
1887                 void *tmp = NULL;
1888                 extern struct seg_ops segspt_shmops;
1889 
1890                 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1891                         uint_t prot;
1892                         size_t size;
1893                         int type;
1894                         vnode_t *mvp;
1895 
1896                         prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1897                         prot &= PROT_READ | PROT_WRITE | PROT_EXEC;
1898                         if ((size = (size_t)(naddr - saddr)) == 0)
1899                                 continue;
1900                         if (i == nphdrs) {
1901                                 overflow++;
1902                                 continue;
1903                         }
1904                         v[i].p_type = PT_LOAD;
1905                         v[i].p_vaddr = (Addr)(uintptr_t)saddr;
1906                         v[i].p_memsz = size;
1907                         if (prot & PROT_READ)
1908                                 v[i].p_flags |= PF_R;
1909                         if (prot & PROT_WRITE)
1910                                 v[i].p_flags |= PF_W;
1911                         if (prot & PROT_EXEC)
1912                                 v[i].p_flags |= PF_X;
1913 
1914                         /*
1915                          * Figure out which mappings to include in the core.
1916                          */
1917                         type = SEGOP_GETTYPE(seg, saddr);
1918 
1919                         if (saddr == stkbase && size == stksize) {
1920                                 if (!(content & CC_CONTENT_STACK))
1921                                         goto exclude;
1922 
1923                         } else if (saddr == brkbase && size == brksize) {
1924                                 if (!(content & CC_CONTENT_HEAP))
1925                                         goto exclude;
1926 
1927                         } else if (seg->s_ops == &segspt_shmops) {
1928                                 if (type & MAP_NORESERVE) {
1929                                         if (!(content & CC_CONTENT_DISM))
1930                                                 goto exclude;
1931                                 } else {
1932                                         if (!(content & CC_CONTENT_ISM))
1933                                                 goto exclude;
1934                                 }
1935 
1936                         } else if (seg->s_ops != &segvn_ops) {
1937                                 goto exclude;
1938 
1939                         } else if (type & MAP_SHARED) {
1940                                 if (shmgetid(p, saddr) != SHMID_NONE) {
1941                                         if (!(content & CC_CONTENT_SHM))
1942                                                 goto exclude;
1943 
1944                                 } else if (SEGOP_GETVP(seg, seg->s_base,
1945                                     &mvp) != 0 || mvp == NULL ||
1946                                     mvp->v_type != VREG) {
1947                                         if (!(content & CC_CONTENT_SHANON))
1948                                                 goto exclude;
1949 
1950                                 } else {
1951                                         if (!(content & CC_CONTENT_SHFILE))
1952                                                 goto exclude;
1953                                 }
1954 
1955                         } else if (SEGOP_GETVP(seg, seg->s_base, &mvp) != 0 ||
1956                             mvp == NULL || mvp->v_type != VREG) {
1957                                 if (!(content & CC_CONTENT_ANON))
1958                                         goto exclude;
1959 
1960                         } else if (prot == (PROT_READ | PROT_EXEC)) {
1961                                 if (!(content & CC_CONTENT_TEXT))
1962                                         goto exclude;
1963 
1964                         } else if (prot == PROT_READ) {
1965                                 if (!(content & CC_CONTENT_RODATA))
1966                                         goto exclude;
1967 
1968                         } else {
1969                                 if (!(content & CC_CONTENT_DATA))
1970                                         goto exclude;
1971                         }
1972 
1973                         doffset = roundup(doffset, sizeof (Word));
1974                         v[i].p_offset = doffset;
1975                         v[i].p_filesz = size;
1976                         doffset += size;
1977 exclude:
1978                         i++;
1979                 }
1980                 ASSERT(tmp == NULL);
1981         }
1982         AS_LOCK_EXIT(as, &as->a_lock);
1983 
1984         if (overflow || i != nphdrs) {
1985                 if (ntries++ == 0) {
1986                         kmem_free(bigwad, bigsize);
1987                         overflow = 0;
1988                         goto top;
1989                 }
1990                 cmn_err(CE_WARN, "elfcore: core dump failed for "
1991                     "process %d; address space is changing", p->p_pid);
1992                 error = EIO;
1993                 goto done;
1994         }
1995 
1996         if ((error = core_write(vp, UIO_SYSSPACE, poffset,
1997             v, phdrsz, rlimit, credp)) != 0)
1998                 goto done;
1999 
2000         if ((error = write_old_elfnotes(p, sig, vp, v[0].p_offset, rlimit,
2001             credp)) != 0)
2002                 goto done;
2003 
2004         if ((error = write_elfnotes(p, sig, vp, v[1].p_offset, rlimit,
2005             credp, content)) != 0)
2006                 goto done;
2007 
2008         for (i = 2; i < nphdrs; i++) {
2009                 prkillinfo_t killinfo;
2010                 sigqueue_t *sq;
2011                 int sig, j;
2012 
2013                 if (v[i].p_filesz == 0)
2014                         continue;
2015 
2016                 /*
2017                  * If dumping out this segment fails, rather than failing
2018                  * the core dump entirely, we reset the size of the mapping
2019                  * to zero to indicate that the data is absent from the core
2020                  * file and or in the PF_SUNW_FAILURE flag to differentiate
2021                  * this from mappings that were excluded due to the core file
2022                  * content settings.
2023                  */
2024                 if ((error = core_seg(p, vp, v[i].p_offset,
2025                     (caddr_t)(uintptr_t)v[i].p_vaddr, v[i].p_filesz,
2026                     rlimit, credp)) == 0) {
2027                         continue;
2028                 }
2029 
2030                 if ((sig = lwp->lwp_cursig) == 0) {
2031                         /*
2032                          * We failed due to something other than a signal.
2033                          * Since the space reserved for the segment is now
2034                          * unused, we stash the errno in the first four
2035                          * bytes. This undocumented interface will let us
2036                          * understand the nature of the failure.
2037                          */
2038                         (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2039                             &error, sizeof (error), rlimit, credp);
2040 
2041                         v[i].p_filesz = 0;
2042                         v[i].p_flags |= PF_SUNW_FAILURE;
2043                         if ((error = core_write(vp, UIO_SYSSPACE,
2044                             poffset + sizeof (v[i]) * i, &v[i], sizeof (v[i]),
2045                             rlimit, credp)) != 0)
2046                                 goto done;
2047 
2048                         continue;
2049                 }
2050 
2051                 /*
2052                  * We took a signal.  We want to abort the dump entirely, but
2053                  * we also want to indicate what failed and why.  We therefore
2054                  * use the space reserved for the first failing segment to
2055                  * write our error (which, for purposes of compatability with
2056                  * older core dump readers, we set to EINTR) followed by any
2057                  * siginfo associated with the signal.
2058                  */
2059                 bzero(&killinfo, sizeof (killinfo));
2060                 killinfo.prk_error = EINTR;
2061 
2062                 sq = sig == SIGKILL ? curproc->p_killsqp : lwp->lwp_curinfo;
2063 
2064                 if (sq != NULL) {
2065                         bcopy(&sq->sq_info, &killinfo.prk_info,
2066                             sizeof (sq->sq_info));
2067                 } else {
2068                         killinfo.prk_info.si_signo = lwp->lwp_cursig;
2069                         killinfo.prk_info.si_code = SI_NOINFO;
2070                 }
2071 
2072 #if (defined(_SYSCALL32_IMPL) || defined(_LP64))
2073                 /*
2074                  * If this is a 32-bit process, we need to translate from the
2075                  * native siginfo to the 32-bit variant.  (Core readers must
2076                  * always have the same data model as their target or must
2077                  * be aware of -- and compensate for -- data model differences.)
2078                  */
2079                 if (curproc->p_model == DATAMODEL_ILP32) {
2080                         siginfo32_t si32;
2081 
2082                         siginfo_kto32((k_siginfo_t *)&killinfo.prk_info, &si32);
2083                         bcopy(&si32, &killinfo.prk_info, sizeof (si32));
2084                 }
2085 #endif
2086 
2087                 (void) core_write(vp, UIO_SYSSPACE, v[i].p_offset,
2088                     &killinfo, sizeof (killinfo), rlimit, credp);
2089 
2090                 /*
2091                  * For the segment on which we took the signal, indicate that
2092                  * its data now refers to a siginfo.
2093                  */
2094                 v[i].p_filesz = 0;
2095                 v[i].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED |
2096                     PF_SUNW_SIGINFO;
2097 
2098                 /*
2099                  * And for every other segment, indicate that its absence
2100                  * is due to a signal.
2101                  */
2102                 for (j = i + 1; j < nphdrs; j++) {
2103                         v[j].p_filesz = 0;
2104                         v[j].p_flags |= PF_SUNW_FAILURE | PF_SUNW_KILLED;
2105                 }
2106 
2107                 /*
2108                  * Finally, write out our modified program headers.
2109                  */
2110                 if ((error = core_write(vp, UIO_SYSSPACE,
2111                     poffset + sizeof (v[i]) * i, &v[i],
2112                     sizeof (v[i]) * (nphdrs - i), rlimit, credp)) != 0)
2113                         goto done;
2114 
2115                 break;
2116         }
2117 
2118         if (nshdrs > 0) {
2119                 bzero(&bigwad->shdr[0], shdrsz);
2120 
2121                 if (nshdrs >= SHN_LORESERVE)
2122                         bigwad->shdr[0].sh_size = nshdrs;
2123 
2124                 if (nshdrs - 1 >= SHN_LORESERVE)
2125                         bigwad->shdr[0].sh_link = nshdrs - 1;
2126 
2127                 if (nphdrs >= PN_XNUM)
2128                         bigwad->shdr[0].sh_info = nphdrs;
2129 
2130                 if (nshdrs > 1) {
2131                         AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2132                         if ((error = process_scns(content, p, credp, vp,
2133                             &bigwad->shdr[0], nshdrs, rlimit, &doffset,
2134                             NULL)) != 0) {
2135                                 AS_LOCK_EXIT(as, &as->a_lock);
2136                                 goto done;
2137                         }
2138                         AS_LOCK_EXIT(as, &as->a_lock);
2139                 }
2140 
2141                 if ((error = core_write(vp, UIO_SYSSPACE, soffset,
2142                     &bigwad->shdr[0], shdrsz, rlimit, credp)) != 0)
2143                         goto done;
2144         }
2145 
2146 done:
2147         kmem_free(bigwad, bigsize);
2148         return (error);
2149 }
2150 
2151 #ifndef _ELF32_COMPAT
2152 
2153 static struct execsw esw = {
2154 #ifdef  _LP64
2155         elf64magicstr,
2156 #else   /* _LP64 */
2157         elf32magicstr,
2158 #endif  /* _LP64 */
2159         0,
2160         5,
2161         elfexec,
2162         elfcore
2163 };
2164 
2165 static struct modlexec modlexec = {
2166         &mod_execops, "exec module for elf", &esw
2167 };
2168 
2169 #ifdef  _LP64
2170 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
2171                         intpdata_t *idatap, int level, long *execsz,
2172                         int setid, caddr_t exec_file, cred_t *cred,
2173                         int brand_action);
2174 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
2175                         rlim64_t rlimit, int sig, core_content_t content);
2176 
2177 static struct execsw esw32 = {
2178         elf32magicstr,
2179         0,
2180         5,
2181         elf32exec,
2182         elf32core
2183 };
2184 
2185 static struct modlexec modlexec32 = {
2186         &mod_execops, "32-bit exec module for elf", &esw32
2187 };
2188 #endif  /* _LP64 */
2189 
2190 static struct modlinkage modlinkage = {
2191         MODREV_1,
2192         (void *)&modlexec,
2193 #ifdef  _LP64
2194         (void *)&modlexec32,
2195 #endif  /* _LP64 */
2196         NULL
2197 };
2198 
2199 int
2200 _init(void)
2201 {
2202         return (mod_install(&modlinkage));
2203 }
2204 
2205 int
2206 _fini(void)
2207 {
2208         return (mod_remove(&modlinkage));
2209 }
2210 
2211 int
2212 _info(struct modinfo *modinfop)
2213 {
2214         return (mod_info(&modlinkage, modinfop));
2215 }
2216 
2217 #endif  /* !_ELF32_COMPAT */