Print this page
uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap.  Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it


  51 #include <sys/vmparam.h>
  52 #include <sys/mmapobj.h>
  53 #include <sys/atomic.h>
  54 
  55 /*
  56  * Theory statement:
  57  *
  58  * The main driving force behind mmapobj is to interpret and map ELF files
  59  * inside of the kernel instead of having the linker be responsible for this.
  60  *
  61  * mmapobj also supports the AOUT 4.x binary format as well as flat files in
  62  * a read only manner.
  63  *
  64  * When interpreting and mapping an ELF file, mmapobj will map each PT_LOAD
  65  * or PT_SUNWBSS segment according to the ELF standard.  Refer to the "Linker
  66  * and Libraries Guide" for more information about the standard and mapping
  67  * rules.
  68  *
  69  * Having mmapobj interpret and map objects will allow the kernel to make the
  70  * best decision for where to place the mappings for said objects.  Thus, we
  71  * can make optimizations inside of the kernel for specific platforms or
  72  * cache mapping information to make mapping objects faster.

  73  *
  74  * The lib_va_hash will be one such optimization.  For each ELF object that
  75  * mmapobj is asked to interpret, we will attempt to cache the information
  76  * about the PT_LOAD and PT_SUNWBSS sections to speed up future mappings of
  77  * the same objects.  We will cache up to LIBVA_CACHED_SEGS (see below) program
  78  * headers which should cover a majority of the libraries out there without
  79  * wasting space.  In order to make sure that the cached information is valid,
  80  * we check the passed in vnode's mtime and ctime to make sure the vnode
  81  * has not been modified since the last time we used it.
  82  *
  83  * In addition, the lib_va_hash may contain a preferred starting VA for the
  84  * object which can be useful for platforms which support a shared context.
  85  * This will increase the likelyhood that library text can be shared among
  86  * many different processes.  We limit the reserved VA space for 32 bit objects
  87  * in order to minimize fragmenting the processes address space.
  88  *
  89  * In addition to the above, the mmapobj interface allows for padding to be
  90  * requested before the first mapping and after the last mapping created.
  91  * When padding is requested, no additional optimizations will be made for
  92  * that request.


 701 }
 702 
 703 /*
 704  * Get the starting address for a given file to be mapped and return it
 705  * to the caller.  If we're using lib_va and we need to allocate an address,
 706  * we will attempt to allocate it from the global reserved pool such that the
 707  * same address can be used in the future for this file.  If we can't use the
 708  * reserved address then we just get one that will fit in our address space.
 709  *
 710  * Returns the starting virtual address for the range to be mapped or NULL
 711  * if an error is encountered. If we successfully insert the requested info
 712  * into the lib_va hash, then *lvpp will be set to point to this lib_va
 713  * structure.  The structure will have a hold on it and thus lib_va_release
 714  * needs to be called on it by the caller.  This function will not fill out
 715  * lv_mps or lv_num_segs since it does not have enough information to do so.
 716  * The caller is responsible for doing this making sure that any modifications
 717  * to lv_mps are visible before setting lv_num_segs.
 718  */
 719 static caddr_t
 720 mmapobj_alloc_start_addr(struct lib_va **lvpp, size_t len, int use_lib_va,
 721     size_t align, vattr_t *vap)
 722 {
 723         proc_t *p = curproc;
 724         struct as *as = p->p_as;
 725         struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL);
 726         int error;
 727         model_t model;
 728         uint_t ma_flags = _MAP_LOW32;
 729         caddr_t base = NULL;
 730         vmem_t *model_vmem;
 731         size_t lib_va_start;
 732         size_t lib_va_end;
 733         size_t lib_va_len;
 734 
 735         ASSERT(lvpp != NULL);

 736 
 737         MOBJ_STAT_ADD(alloc_start);
 738         model = get_udatamodel();
 739 
 740         if (model == DATAMODEL_LP64) {
 741                 ma_flags = 0;
 742                 model_vmem = lib_va_64_arena;
 743         } else {
 744                 ASSERT(model == DATAMODEL_ILP32);
 745                 model_vmem = lib_va_32_arena;
 746         }
 747 
 748         if (align > 1) {
 749                 ma_flags |= MAP_ALIGN;
 750         }




 751         if (use_lib_va) {
 752                 /*
 753                  * The first time through, we need to setup the lib_va arenas.
 754                  * We call map_addr to find a suitable range of memory to map
 755                  * the given library, and we will set the highest address
 756                  * in our vmem arena to the end of this adddress range.
 757                  * We allow up to half of the address space to be used
 758                  * for lib_va addresses but we do not prevent any allocations
 759                  * in this range from other allocation paths.
 760                  */
 761                 if (lib_va_64_arena == NULL && model == DATAMODEL_LP64) {
 762                         mutex_enter(&lib_va_init_mutex);
 763                         if (lib_va_64_arena == NULL) {
 764                                 base = (caddr_t)align;
 765                                 as_rangelock(as);
 766                                 map_addr(&base, len, 0, 1, ma_flags);
 767                                 as_rangeunlock(as);
 768                                 if (base == NULL) {
 769                                         mutex_exit(&lib_va_init_mutex);
 770                                         MOBJ_STAT_ADD(lib_va_create_failure);


 844                  * Check for collision on insertion and free up our VA space.
 845                  * This is expected to be rare, so we'll just reset base to
 846                  * NULL instead of looking it up in the lib_va hash.
 847                  */
 848                 if (*lvpp == NULL) {
 849                         if (base != NULL) {
 850                                 vmem_xfree(model_vmem, base, len);
 851                                 base = NULL;
 852                                 MOBJ_STAT_ADD(add_collision);
 853                         }
 854                 }
 855         }
 856 
 857 nolibva:
 858         as_rangelock(as);
 859 
 860         /*
 861          * If we don't have an expected base address, or the one that we want
 862          * to use is not available or acceptable, go get an acceptable
 863          * address range.




 864          */



 865         if (base == NULL || as_gap(as, len, &base, &len, 0, NULL) ||
 866             valid_usr_range(base, len, PROT_ALL, as, as->a_userlimit) !=
 867             RANGE_OKAY || OVERLAPS_STACK(base + len, p)) {
 868                 MOBJ_STAT_ADD(get_addr);
 869                 base = (caddr_t)align;
 870                 map_addr(&base, len, 0, 1, ma_flags);
 871         }
 872 
 873         /*
 874          * Need to reserve the address space we're going to use.
 875          * Don't reserve swap space since we'll be mapping over this.
 876          */
 877         if (base != NULL) {
 878                 /* Don't reserve swap space since we'll be mapping over this */
 879                 crargs.flags |= MAP_NORESERVE;
 880                 error = as_map(as, base, len, segvn_create, &crargs);
 881                 if (error) {
 882                         base = NULL;
 883                 }
 884         }


1508                                 }
1509                         } else {
1510                                 AS_LOCK_EXIT(as, &as->a_lock);
1511                                 as_rangeunlock(as);
1512                                 mmapobj_unmap_exec(mrp, i, start_addr);
1513                                 MOBJ_STAT_ADD(exec_addr_in_use);
1514                                 return (EADDRINUSE);
1515                         }
1516                 }
1517         }
1518         as_rangeunlock(as);
1519         return (0);
1520 }
1521 
1522 /*
1523  * Walk through the ELF program headers and extract all useful information
1524  * for PT_LOAD and PT_SUNWBSS segments into mrp.
1525  * Return 0 on success or error on failure.
1526  */
1527 static int
1528 process_phdr(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, mmapobj_result_t *mrp,
1529     vnode_t *vp, uint_t *num_mapped, size_t padding, cred_t *fcred)
1530 {
1531         int i;
1532         caddr_t start_addr = NULL;
1533         caddr_t vaddr;
1534         size_t len = 0;
1535         size_t lib_len = 0;
1536         int ret;
1537         int prot;
1538         struct lib_va *lvp = NULL;
1539         vattr_t vattr;
1540         struct as *as = curproc->p_as;
1541         int error;
1542         int loadable = 0;
1543         int current = 0;
1544         int use_lib_va = 1;
1545         size_t align = 0;
1546         size_t add_pad = 0;
1547         int hdr_seen = 0;
1548         ushort_t e_type = ehdrp->e_type;     /* same offset 32 and 64 bit */


1564          * For 64bit processes, 8 byte alignment is required.
1565          * If the alignment isn't correct, we need to return failure
1566          * since it could cause an alignment error panic while walking
1567          * the phdr array.
1568          */
1569         if (model == DATAMODEL_LP64) {
1570                 hsize = ehdrp->e_phentsize;
1571                 if (hsize & 7) {
1572                         MOBJ_STAT_ADD(phent_align64);
1573                         return (ENOTSUP);
1574                 }
1575         } else {
1576                 ASSERT(model == DATAMODEL_ILP32);
1577                 hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize;
1578                 if (hsize & 3) {
1579                         MOBJ_STAT_ADD(phent_align32);
1580                         return (ENOTSUP);
1581                 }
1582         }
1583 
1584         if (padding != 0) {
1585                 use_lib_va = 0;
1586         }
1587         if (e_type == ET_DYN) {
1588                 vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME;
1589                 error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
1590                 if (error) {
1591                         return (error);
1592                 }
1593                 /* Check to see if we already have a description for this lib */

1594                 lvp = lib_va_find(&vattr);
1595 
1596                 if (lvp != NULL) {
1597                         MOBJ_STAT_ADD(lvp_found);
1598                         if (use_lib_va) {
1599                                 start_addr = mmapobj_lookup_start_addr(lvp);
1600                                 if (start_addr == NULL) {
1601                                         lib_va_release(lvp);
1602                                         return (ENOMEM);
1603                                 }
1604                         }
1605 
1606                         /*
1607                          * loadable may be zero if the original allocator
1608                          * of lvp hasn't finished setting it up but the rest
1609                          * of the fields will be accurate.
1610                          */
1611                         loadable = lvp->lv_num_segs;
1612                         len = lvp->lv_len;
1613                         align = lvp->lv_align;


1684                         if (align > 1) {
1685                                 add_pad = P2ROUNDUP(padding, align);
1686                                 len += add_pad;
1687                                 MOBJ_STAT_ADD(dyn_pad_align);
1688                         } else {
1689                                 MOBJ_STAT_ADD(dyn_pad_noalign);
1690                                 len += padding; /* at beginning */
1691                         }
1692                         len += padding; /* at end of mapping */
1693                 }
1694                 /*
1695                  * At this point, if lvp is non-NULL, then above we
1696                  * already found it in the cache but did not get
1697                  * the start address since we were not going to use lib_va.
1698                  * Since we know that lib_va will not be used, it's safe
1699                  * to call mmapobj_alloc_start_addr and know that lvp
1700                  * will not be modified.
1701                  */
1702                 ASSERT(lvp ? use_lib_va == 0 : 1);
1703                 start_addr = mmapobj_alloc_start_addr(&lvp, len,
1704                     use_lib_va, align, &vattr);


1705                 if (start_addr == NULL) {
1706                         if (lvp) {
1707                                 lib_va_release(lvp);
1708                         }
1709                         MOBJ_STAT_ADD(alloc_start_fail);
1710                         return (ENOMEM);
1711                 }
1712                 /*
1713                  * If we can't cache it, no need to hang on to it.
1714                  * Setting lv_num_segs to non-zero will make that
1715                  * field active and since there are too many segments
1716                  * to cache, all future users will not try to use lv_mps.
1717                  */
1718                 if (lvp != NULL && loadable > LIBVA_CACHED_SEGS && use_lib_va) {
1719                         lvp->lv_num_segs = loadable;
1720                         lib_va_release(lvp);
1721                         lvp = NULL;
1722                         MOBJ_STAT_ADD(lvp_nocache);
1723                 }
1724                 /*


2009 
2010         /* Make sure we only wait for memory if it's a reasonable request */
2011         if (phsizep > mmapobj_alloc_threshold) {
2012                 MOBJ_STAT_ADD(phsize_large);
2013                 if ((phbasep = kmem_alloc(phsizep, KM_NOSLEEP)) == NULL) {
2014                         MOBJ_STAT_ADD(phsize_xtralarge);
2015                         return (ENOMEM);
2016                 }
2017         } else {
2018                 phbasep = kmem_alloc(phsizep, KM_SLEEP);
2019         }
2020 
2021         if ((error = vn_rdwr(UIO_READ, vp, phbasep, phsizep,
2022             (offset_t)phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
2023             fcred, NULL)) != 0) {
2024                 kmem_free(phbasep, phsizep);
2025                 return (error);
2026         }
2027 
2028         /* Now process the phdr's */
2029         error = process_phdr(ehdrp, phbasep, nphdrs, mrp, vp, num_mapped,
2030             padding, fcred);
2031         kmem_free(phbasep, phsizep);
2032         return (error);
2033 }
2034 
2035 #if defined(__sparc)
2036 /*
2037  * Hack to support 64 bit kernels running AOUT 4.x programs.
2038  * This is the sizeof (struct nlist) for a 32 bit kernel.
2039  * Since AOUT programs are 32 bit only, they will never use the 64 bit
2040  * sizeof (struct nlist) and thus creating a #define is the simplest
2041  * way around this since this is a format which is not being updated.
2042  * This will be used in the place of sizeof (struct nlist) below.
2043  */
2044 #define NLIST_SIZE      (0xC)
2045 
2046 static int
2047 doaoutwork(vnode_t *vp, mmapobj_result_t *mrp,
2048     uint_t *num_mapped, struct exec *hdr, cred_t *fcred)
2049 {


2295          * header has to be aligned to the native size of ulong_t in order
2296          * to avoid an unaligned access when dereferencing the header as
2297          * a ulong_t.  Thus we allocate our array on the stack of type
2298          * ulong_t and then have header, which we dereference later as a char
2299          * array point at lheader.
2300          */
2301         ulong_t lheader[(MAX_HEADER_SIZE / (sizeof (ulong_t))) + 1];
2302         caddr_t header = (caddr_t)&lheader;
2303 
2304         vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME | AT_SIZE;
2305         error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
2306         if (error) {
2307                 return (error);
2308         }
2309 
2310         /*
2311          * Check lib_va to see if we already have a full description
2312          * for this library.  This is the fast path and only used for
2313          * ET_DYN ELF files (dynamic libraries).
2314          */
2315         if (padding == 0 && (lvp = lib_va_find(&vattr)) != NULL) {

2316                 int num_segs;
2317 
2318                 model = get_udatamodel();
2319                 if ((model == DATAMODEL_ILP32 &&
2320                     lvp->lv_flags & LV_ELF64) ||
2321                     (model == DATAMODEL_LP64 &&
2322                     lvp->lv_flags & LV_ELF32)) {
2323                         lib_va_release(lvp);
2324                         MOBJ_STAT_ADD(fast_wrong_model);
2325                         return (ENOTSUP);
2326                 }
2327                 num_segs = lvp->lv_num_segs;
2328                 if (*num_mapped < num_segs) {
2329                         *num_mapped = num_segs;
2330                         lib_va_release(lvp);
2331                         MOBJ_STAT_ADD(fast_e2big);
2332                         return (E2BIG);
2333                 }
2334 
2335                 /*




  51 #include <sys/vmparam.h>
  52 #include <sys/mmapobj.h>
  53 #include <sys/atomic.h>
  54 
  55 /*
  56  * Theory statement:
  57  *
  58  * The main driving force behind mmapobj is to interpret and map ELF files
  59  * inside of the kernel instead of having the linker be responsible for this.
  60  *
  61  * mmapobj also supports the AOUT 4.x binary format as well as flat files in
  62  * a read only manner.
  63  *
  64  * When interpreting and mapping an ELF file, mmapobj will map each PT_LOAD
  65  * or PT_SUNWBSS segment according to the ELF standard.  Refer to the "Linker
  66  * and Libraries Guide" for more information about the standard and mapping
  67  * rules.
  68  *
  69  * Having mmapobj interpret and map objects will allow the kernel to make the
  70  * best decision for where to place the mappings for said objects.  Thus, we
  71  * can make optimizations inside of the kernel for specific platforms or cache
  72  * mapping information to make mapping objects faster.  The cache is ignored
  73  * if ASLR is enabled.
  74  *
  75  * The lib_va_hash will be one such optimization.  For each ELF object that
  76  * mmapobj is asked to interpret, we will attempt to cache the information
  77  * about the PT_LOAD and PT_SUNWBSS sections to speed up future mappings of
  78  * the same objects.  We will cache up to LIBVA_CACHED_SEGS (see below) program
  79  * headers which should cover a majority of the libraries out there without
  80  * wasting space.  In order to make sure that the cached information is valid,
  81  * we check the passed in vnode's mtime and ctime to make sure the vnode
  82  * has not been modified since the last time we used it.
  83  *
  84  * In addition, the lib_va_hash may contain a preferred starting VA for the
  85  * object which can be useful for platforms which support a shared context.
  86  * This will increase the likelyhood that library text can be shared among
  87  * many different processes.  We limit the reserved VA space for 32 bit objects
  88  * in order to minimize fragmenting the processes address space.
  89  *
  90  * In addition to the above, the mmapobj interface allows for padding to be
  91  * requested before the first mapping and after the last mapping created.
  92  * When padding is requested, no additional optimizations will be made for
  93  * that request.


 702 }
 703 
 704 /*
 705  * Get the starting address for a given file to be mapped and return it
 706  * to the caller.  If we're using lib_va and we need to allocate an address,
 707  * we will attempt to allocate it from the global reserved pool such that the
 708  * same address can be used in the future for this file.  If we can't use the
 709  * reserved address then we just get one that will fit in our address space.
 710  *
 711  * Returns the starting virtual address for the range to be mapped or NULL
 712  * if an error is encountered. If we successfully insert the requested info
 713  * into the lib_va hash, then *lvpp will be set to point to this lib_va
 714  * structure.  The structure will have a hold on it and thus lib_va_release
 715  * needs to be called on it by the caller.  This function will not fill out
 716  * lv_mps or lv_num_segs since it does not have enough information to do so.
 717  * The caller is responsible for doing this making sure that any modifications
 718  * to lv_mps are visible before setting lv_num_segs.
 719  */
 720 static caddr_t
 721 mmapobj_alloc_start_addr(struct lib_va **lvpp, size_t len, int use_lib_va,
 722     int randomize, size_t align, vattr_t *vap)
 723 {
 724         proc_t *p = curproc;
 725         struct as *as = p->p_as;
 726         struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL);
 727         int error;
 728         model_t model;
 729         uint_t ma_flags = _MAP_LOW32;
 730         caddr_t base = NULL;
 731         vmem_t *model_vmem;
 732         size_t lib_va_start;
 733         size_t lib_va_end;
 734         size_t lib_va_len;
 735 
 736         ASSERT(lvpp != NULL);
 737         ASSERT((randomize & use_lib_va) != 1);
 738 
 739         MOBJ_STAT_ADD(alloc_start);
 740         model = get_udatamodel();
 741 
 742         if (model == DATAMODEL_LP64) {
 743                 ma_flags = 0;
 744                 model_vmem = lib_va_64_arena;
 745         } else {
 746                 ASSERT(model == DATAMODEL_ILP32);
 747                 model_vmem = lib_va_32_arena;
 748         }
 749 
 750         if (align > 1) {
 751                 ma_flags |= MAP_ALIGN;
 752         }
 753 
 754         if (randomize != 0)
 755                 ma_flags |= _MAP_RANDOMIZE;
 756 
 757         if (use_lib_va) {
 758                 /*
 759                  * The first time through, we need to setup the lib_va arenas.
 760                  * We call map_addr to find a suitable range of memory to map
 761                  * the given library, and we will set the highest address
 762                  * in our vmem arena to the end of this adddress range.
 763                  * We allow up to half of the address space to be used
 764                  * for lib_va addresses but we do not prevent any allocations
 765                  * in this range from other allocation paths.
 766                  */
 767                 if (lib_va_64_arena == NULL && model == DATAMODEL_LP64) {
 768                         mutex_enter(&lib_va_init_mutex);
 769                         if (lib_va_64_arena == NULL) {
 770                                 base = (caddr_t)align;
 771                                 as_rangelock(as);
 772                                 map_addr(&base, len, 0, 1, ma_flags);
 773                                 as_rangeunlock(as);
 774                                 if (base == NULL) {
 775                                         mutex_exit(&lib_va_init_mutex);
 776                                         MOBJ_STAT_ADD(lib_va_create_failure);


 850                  * Check for collision on insertion and free up our VA space.
 851                  * This is expected to be rare, so we'll just reset base to
 852                  * NULL instead of looking it up in the lib_va hash.
 853                  */
 854                 if (*lvpp == NULL) {
 855                         if (base != NULL) {
 856                                 vmem_xfree(model_vmem, base, len);
 857                                 base = NULL;
 858                                 MOBJ_STAT_ADD(add_collision);
 859                         }
 860                 }
 861         }
 862 
 863 nolibva:
 864         as_rangelock(as);
 865 
 866         /*
 867          * If we don't have an expected base address, or the one that we want
 868          * to use is not available or acceptable, go get an acceptable
 869          * address range.
 870          *
 871          * If ASLR is enabled, we should never have used the cache, and should
 872          * also start our real work here, in the consequent of the next
 873          * condition.
 874          */
 875         if (randomize != 0)
 876                 ASSERT(base == NULL);
 877 
 878         if (base == NULL || as_gap(as, len, &base, &len, 0, NULL) ||
 879             valid_usr_range(base, len, PROT_ALL, as, as->a_userlimit) !=
 880             RANGE_OKAY || OVERLAPS_STACK(base + len, p)) {
 881                 MOBJ_STAT_ADD(get_addr);
 882                 base = (caddr_t)align;
 883                 map_addr(&base, len, 0, 1, ma_flags);
 884         }
 885 
 886         /*
 887          * Need to reserve the address space we're going to use.
 888          * Don't reserve swap space since we'll be mapping over this.
 889          */
 890         if (base != NULL) {
 891                 /* Don't reserve swap space since we'll be mapping over this */
 892                 crargs.flags |= MAP_NORESERVE;
 893                 error = as_map(as, base, len, segvn_create, &crargs);
 894                 if (error) {
 895                         base = NULL;
 896                 }
 897         }


1521                                 }
1522                         } else {
1523                                 AS_LOCK_EXIT(as, &as->a_lock);
1524                                 as_rangeunlock(as);
1525                                 mmapobj_unmap_exec(mrp, i, start_addr);
1526                                 MOBJ_STAT_ADD(exec_addr_in_use);
1527                                 return (EADDRINUSE);
1528                         }
1529                 }
1530         }
1531         as_rangeunlock(as);
1532         return (0);
1533 }
1534 
1535 /*
1536  * Walk through the ELF program headers and extract all useful information
1537  * for PT_LOAD and PT_SUNWBSS segments into mrp.
1538  * Return 0 on success or error on failure.
1539  */
1540 static int
1541 process_phdrs(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, mmapobj_result_t *mrp,
1542     vnode_t *vp, uint_t *num_mapped, size_t padding, cred_t *fcred)
1543 {
1544         int i;
1545         caddr_t start_addr = NULL;
1546         caddr_t vaddr;
1547         size_t len = 0;
1548         size_t lib_len = 0;
1549         int ret;
1550         int prot;
1551         struct lib_va *lvp = NULL;
1552         vattr_t vattr;
1553         struct as *as = curproc->p_as;
1554         int error;
1555         int loadable = 0;
1556         int current = 0;
1557         int use_lib_va = 1;
1558         size_t align = 0;
1559         size_t add_pad = 0;
1560         int hdr_seen = 0;
1561         ushort_t e_type = ehdrp->e_type;     /* same offset 32 and 64 bit */


1577          * For 64bit processes, 8 byte alignment is required.
1578          * If the alignment isn't correct, we need to return failure
1579          * since it could cause an alignment error panic while walking
1580          * the phdr array.
1581          */
1582         if (model == DATAMODEL_LP64) {
1583                 hsize = ehdrp->e_phentsize;
1584                 if (hsize & 7) {
1585                         MOBJ_STAT_ADD(phent_align64);
1586                         return (ENOTSUP);
1587                 }
1588         } else {
1589                 ASSERT(model == DATAMODEL_ILP32);
1590                 hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize;
1591                 if (hsize & 3) {
1592                         MOBJ_STAT_ADD(phent_align32);
1593                         return (ENOTSUP);
1594                 }
1595         }
1596 
1597         if ((padding != 0) || secflag_enabled(curproc, PROC_SEC_ASLR)) {
1598                 use_lib_va = 0;
1599         }
1600         if (e_type == ET_DYN) {
1601                 vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME;
1602                 error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
1603                 if (error) {
1604                         return (error);
1605                 }
1606                 /* Check to see if we already have a description for this lib */
1607                 if (!secflag_enabled(curproc, PROC_SEC_ASLR))
1608                         lvp = lib_va_find(&vattr);
1609 
1610                 if (lvp != NULL) {
1611                         MOBJ_STAT_ADD(lvp_found);
1612                         if (use_lib_va) {
1613                                 start_addr = mmapobj_lookup_start_addr(lvp);
1614                                 if (start_addr == NULL) {
1615                                         lib_va_release(lvp);
1616                                         return (ENOMEM);
1617                                 }
1618                         }
1619 
1620                         /*
1621                          * loadable may be zero if the original allocator
1622                          * of lvp hasn't finished setting it up but the rest
1623                          * of the fields will be accurate.
1624                          */
1625                         loadable = lvp->lv_num_segs;
1626                         len = lvp->lv_len;
1627                         align = lvp->lv_align;


1698                         if (align > 1) {
1699                                 add_pad = P2ROUNDUP(padding, align);
1700                                 len += add_pad;
1701                                 MOBJ_STAT_ADD(dyn_pad_align);
1702                         } else {
1703                                 MOBJ_STAT_ADD(dyn_pad_noalign);
1704                                 len += padding; /* at beginning */
1705                         }
1706                         len += padding; /* at end of mapping */
1707                 }
1708                 /*
1709                  * At this point, if lvp is non-NULL, then above we
1710                  * already found it in the cache but did not get
1711                  * the start address since we were not going to use lib_va.
1712                  * Since we know that lib_va will not be used, it's safe
1713                  * to call mmapobj_alloc_start_addr and know that lvp
1714                  * will not be modified.
1715                  */
1716                 ASSERT(lvp ? use_lib_va == 0 : 1);
1717                 start_addr = mmapobj_alloc_start_addr(&lvp, len,
1718                     use_lib_va,
1719                     secflag_enabled(curproc, PROC_SEC_ASLR),
1720                     align, &vattr);
1721                 if (start_addr == NULL) {
1722                         if (lvp) {
1723                                 lib_va_release(lvp);
1724                         }
1725                         MOBJ_STAT_ADD(alloc_start_fail);
1726                         return (ENOMEM);
1727                 }
1728                 /*
1729                  * If we can't cache it, no need to hang on to it.
1730                  * Setting lv_num_segs to non-zero will make that
1731                  * field active and since there are too many segments
1732                  * to cache, all future users will not try to use lv_mps.
1733                  */
1734                 if (lvp != NULL && loadable > LIBVA_CACHED_SEGS && use_lib_va) {
1735                         lvp->lv_num_segs = loadable;
1736                         lib_va_release(lvp);
1737                         lvp = NULL;
1738                         MOBJ_STAT_ADD(lvp_nocache);
1739                 }
1740                 /*


2025 
2026         /* Make sure we only wait for memory if it's a reasonable request */
2027         if (phsizep > mmapobj_alloc_threshold) {
2028                 MOBJ_STAT_ADD(phsize_large);
2029                 if ((phbasep = kmem_alloc(phsizep, KM_NOSLEEP)) == NULL) {
2030                         MOBJ_STAT_ADD(phsize_xtralarge);
2031                         return (ENOMEM);
2032                 }
2033         } else {
2034                 phbasep = kmem_alloc(phsizep, KM_SLEEP);
2035         }
2036 
2037         if ((error = vn_rdwr(UIO_READ, vp, phbasep, phsizep,
2038             (offset_t)phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
2039             fcred, NULL)) != 0) {
2040                 kmem_free(phbasep, phsizep);
2041                 return (error);
2042         }
2043 
2044         /* Now process the phdr's */
2045         error = process_phdrs(ehdrp, phbasep, nphdrs, mrp, vp, num_mapped,
2046             padding, fcred);
2047         kmem_free(phbasep, phsizep);
2048         return (error);
2049 }
2050 
2051 #if defined(__sparc)
2052 /*
2053  * Hack to support 64 bit kernels running AOUT 4.x programs.
2054  * This is the sizeof (struct nlist) for a 32 bit kernel.
2055  * Since AOUT programs are 32 bit only, they will never use the 64 bit
2056  * sizeof (struct nlist) and thus creating a #define is the simplest
2057  * way around this since this is a format which is not being updated.
2058  * This will be used in the place of sizeof (struct nlist) below.
2059  */
2060 #define NLIST_SIZE      (0xC)
2061 
2062 static int
2063 doaoutwork(vnode_t *vp, mmapobj_result_t *mrp,
2064     uint_t *num_mapped, struct exec *hdr, cred_t *fcred)
2065 {


2311          * header has to be aligned to the native size of ulong_t in order
2312          * to avoid an unaligned access when dereferencing the header as
2313          * a ulong_t.  Thus we allocate our array on the stack of type
2314          * ulong_t and then have header, which we dereference later as a char
2315          * array point at lheader.
2316          */
2317         ulong_t lheader[(MAX_HEADER_SIZE / (sizeof (ulong_t))) + 1];
2318         caddr_t header = (caddr_t)&lheader;
2319 
2320         vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME | AT_SIZE;
2321         error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
2322         if (error) {
2323                 return (error);
2324         }
2325 
2326         /*
2327          * Check lib_va to see if we already have a full description
2328          * for this library.  This is the fast path and only used for
2329          * ET_DYN ELF files (dynamic libraries).
2330          */
2331         if (padding == 0 && !secflag_enabled(curproc, PROC_SEC_ASLR) &&
2332             ((lvp = lib_va_find(&vattr)) != NULL)) {
2333                 int num_segs;
2334 
2335                 model = get_udatamodel();
2336                 if ((model == DATAMODEL_ILP32 &&
2337                     lvp->lv_flags & LV_ELF64) ||
2338                     (model == DATAMODEL_LP64 &&
2339                     lvp->lv_flags & LV_ELF32)) {
2340                         lib_va_release(lvp);
2341                         MOBJ_STAT_ADD(fast_wrong_model);
2342                         return (ENOTSUP);
2343                 }
2344                 num_segs = lvp->lv_num_segs;
2345                 if (*num_mapped < num_segs) {
2346                         *num_mapped = num_segs;
2347                         lib_va_release(lvp);
2348                         MOBJ_STAT_ADD(fast_e2big);
2349                         return (E2BIG);
2350                 }
2351 
2352                 /*