Print this page
7029 want per-process exploit mitigation features (secflags)
7030 want basic address space layout randomization (aslr)
7031 noexec_user_stack should be a secflag
7032 want a means to forbid mappings around NULL.


  42 #include <sys/param.h>
  43 #include <sys/systm.h>
  44 #include <sys/user.h>
  45 #include <sys/proc.h>
  46 #include <sys/kmem.h>
  47 #include <sys/vmem.h>
  48 #include <sys/buf.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/lgrp.h>
  51 #include <sys/disp.h>
  52 #include <sys/vm.h>
  53 #include <sys/mman.h>
  54 #include <sys/vnode.h>
  55 #include <sys/cred.h>
  56 #include <sys/exec.h>
  57 #include <sys/exechdr.h>
  58 #include <sys/debug.h>
  59 #include <sys/vmsystm.h>
  60 #include <sys/swap.h>
  61 #include <sys/dumphdr.h>

  62 
  63 #include <vm/hat.h>
  64 #include <vm/as.h>
  65 #include <vm/seg.h>
  66 #include <vm/seg_kp.h>
  67 #include <vm/seg_vn.h>
  68 #include <vm/page.h>
  69 #include <vm/seg_kmem.h>
  70 #include <vm/seg_kpm.h>
  71 #include <vm/vm_dep.h>
  72 
  73 #include <sys/cpu.h>
  74 #include <sys/vm_machparam.h>
  75 #include <sys/memlist.h>
  76 #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */
  77 #include <vm/hat_i86.h>
  78 #include <sys/x86_archext.h>
  79 #include <sys/elf_386.h>
  80 #include <sys/cmn_err.h>
  81 #include <sys/archsystm.h>
  82 #include <sys/machsystm.h>

  83 
  84 #include <sys/vtrace.h>
  85 #include <sys/ddidmareq.h>
  86 #include <sys/promif.h>
  87 #include <sys/memnode.h>
  88 #include <sys/stack.h>
  89 #include <util/qsort.h>
  90 #include <sys/taskq.h>
  91 
  92 #ifdef __xpv
  93 
  94 #include <sys/hypervisor.h>
  95 #include <sys/xen_mmu.h>
  96 #include <sys/balloon_impl.h>
  97 
  98 /*
  99  * domain 0 pages usable for DMA are kept pre-allocated and kept in
 100  * distinct lists, ordered by increasing mfn.
 101  */
 102 static kmutex_t io_pool_lock;


 620 }
 621 
 622 void
 623 map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
 624 {
 625         struct proc *p = curproc;
 626         caddr_t userlimit = (flags & _MAP_LOW32) ?
 627             (caddr_t)_userlimit32 : p->p_as->a_userlimit;
 628 
 629         map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags);
 630 }
 631 
 632 /*ARGSUSED*/
 633 int
 634 map_addr_vacalign_check(caddr_t addr, u_offset_t off)
 635 {
 636         return (0);
 637 }
 638 
 639 /*







 640  * map_addr_proc() is the routine called when the system is to
 641  * choose an address for the user.  We will pick an address
 642  * range which is the highest available below userlimit.
 643  *
 644  * Every mapping will have a redzone of a single page on either side of
 645  * the request. This is done to leave one page unmapped between segments.
 646  * This is not required, but it's useful for the user because if their
 647  * program strays across a segment boundary, it will catch a fault
 648  * immediately making debugging a little easier.  Currently the redzone
 649  * is mandatory.
 650  *
 651  * addrp is a value/result parameter.
 652  *      On input it is a hint from the user to be used in a completely
 653  *      machine dependent fashion.  We decide to completely ignore this hint.
 654  *      If MAP_ALIGN was specified, addrp contains the minimal alignment, which
 655  *      must be some "power of two" multiple of pagesize.
 656  *
 657  *      On output it is NULL if no address can be found in the current
 658  *      processes address space or else an address that is currently
 659  *      not mapped for len bytes with a page of red zone on either side.


 735                  * For 32-bit processes, only those which have specified
 736                  * MAP_ALIGN and an addr will be aligned on a larger page size.
 737                  * Not doing so can potentially waste up to 1G of process
 738                  * address space.
 739                  */
 740                 int lvl = (p->p_model == DATAMODEL_ILP32) ? 1 :
 741                     mmu.umax_page_level;
 742 
 743                 while (lvl && len < LEVEL_SIZE(lvl))
 744                         --lvl;
 745 
 746                 align_amount = LEVEL_SIZE(lvl);
 747         }
 748         if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount))
 749                 align_amount = (uintptr_t)*addrp;
 750 
 751         ASSERT(ISP2(align_amount));
 752         ASSERT(align_amount == 0 || align_amount >= PAGESIZE);
 753 
 754         off = off & (align_amount - 1);

 755         /*
 756          * Look for a large enough hole starting below userlimit.
 757          * After finding it, use the upper part.
 758          */
 759         if (as_gap_aligned(as, len, &base, &slen, AH_HI, NULL, align_amount,
 760             PAGESIZE, off) == 0) {
 761                 caddr_t as_addr;
 762 
 763                 /*
 764                  * addr is the highest possible address to use since we have
 765                  * a PAGESIZE redzone at the beginning and end.
 766                  */
 767                 addr = base + slen - (PAGESIZE + len);
 768                 as_addr = addr;
 769                 /*
 770                  * Round address DOWN to the alignment amount and
 771                  * add the offset in.
 772                  * If addr is greater than as_addr, len would not be large
 773                  * enough to include the redzone, so we must adjust down
 774                  * by the alignment amount.
 775                  */
 776                 addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1)));
 777                 addr += (uintptr_t)off;
 778                 if (addr > as_addr) {
 779                         addr -= align_amount;
 780                 }
 781 














 782                 ASSERT(addr > base);
 783                 ASSERT(addr + len < base + slen);
 784                 ASSERT(((uintptr_t)addr & (align_amount - 1)) ==
 785                     ((uintptr_t)(off)));
 786                 *addrp = addr;
 787         } else {
 788                 *addrp = NULL;  /* no more virtual space */
 789         }
 790 }
 791 
 792 int valid_va_range_aligned_wraparound;
 793 
 794 /*
 795  * Determine whether [*basep, *basep + *lenp) contains a mappable range of
 796  * addresses at least "minlen" long, where the base of the range is at "off"
 797  * phase from an "align" boundary and there is space for a "redzone"-sized
 798  * redzone on either side of the range.  On success, 1 is returned and *basep
 799  * and *lenp are adjusted to describe the acceptable range (including
 800  * the redzone).  On failure, 0 is returned.
 801  */


 887         }
 888 
 889         *basep = (caddr_t)lo;
 890         *lenp = hi - lo;
 891         return (1);
 892 }
 893 
 894 /*
 895  * Determine whether [*basep, *basep + *lenp) contains a mappable range of
 896  * addresses at least "minlen" long.  On success, 1 is returned and *basep
 897  * and *lenp are adjusted to describe the acceptable range.  On failure, 0
 898  * is returned.
 899  */
 900 int
 901 valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
 902 {
 903         return (valid_va_range_aligned(basep, lenp, minlen, dir, 0, 0, 0));
 904 }
 905 
 906 /*







 907  * Determine whether [addr, addr+len] are valid user addresses.
 908  */
 909 /*ARGSUSED*/
 910 int
 911 valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
 912     caddr_t userlimit)
 913 {
 914         caddr_t eaddr = addr + len;
 915 
 916         if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
 917                 return (RANGE_BADADDR);
 918 




 919 #if defined(__amd64)
 920         /*
 921          * Check for the VA hole
 922          */
 923         if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end)
 924                 return (RANGE_BADADDR);
 925 #endif
 926 
 927         return (RANGE_OKAY);
 928 }
 929 
 930 /*
 931  * Return 1 if the page frame is onboard memory, else 0.
 932  */
 933 int
 934 pf_is_memory(pfn_t pf)
 935 {
 936         if (pfn_is_foreign(pf))
 937                 return (0);
 938         return (address_in_memlist(phys_install, pfn_to_pa(pf), 1));


3909 
3910         hat_mempte_release(cpup->cpu_caddr2, cpup->cpu_caddr2pte);
3911         cpup->cpu_caddr2pte = 0;
3912         vmem_free(heap_arena, cpup->cpu_caddr2, mmu_ptob(1));
3913         cpup->cpu_caddr2 = 0;
3914 
3915         hat_mempte_release(cpup->cpu_caddr1, cpup->cpu_caddr1pte);
3916         cpup->cpu_caddr1pte = 0;
3917         vmem_free(heap_arena, cpup->cpu_caddr1, mmu_ptob(1));
3918         cpup->cpu_caddr1 = 0;
3919 }
3920 
3921 /*
3922  * Function for flushing D-cache when performing module relocations
3923  * to an alternate mapping.  Unnecessary on Intel / AMD platforms.
3924  */
3925 void
3926 dcache_flushall()
3927 {}
3928 
3929 size_t
3930 exec_get_spslew(void)
3931 {
3932         return (0);
3933 }
3934 
3935 /*
3936  * Allocate a memory page.  The argument 'seed' can be any pseudo-random
3937  * number to vary where the pages come from.  This is quite a hacked up
3938  * method -- it works for now, but really needs to be fixed up a bit.
3939  *
3940  * We currently use page_create_va() on the kvp with fake offsets,
3941  * segments and virt address.  This is pretty bogus, but was copied from the
3942  * old hat_i86.c code.  A better approach would be to specify either mnode
3943  * random or mnode local and takes a page from whatever color has the MOST
3944  * available - this would have a minimal impact on page coloring.
3945  */
3946 page_t *
3947 page_get_physical(uintptr_t seed)
3948 {
3949         page_t *pp;
3950         u_offset_t offset;
3951         static struct seg tmpseg;
3952         static uintptr_t ctr = 0;
3953 
3954         /*




  42 #include <sys/param.h>
  43 #include <sys/systm.h>
  44 #include <sys/user.h>
  45 #include <sys/proc.h>
  46 #include <sys/kmem.h>
  47 #include <sys/vmem.h>
  48 #include <sys/buf.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/lgrp.h>
  51 #include <sys/disp.h>
  52 #include <sys/vm.h>
  53 #include <sys/mman.h>
  54 #include <sys/vnode.h>
  55 #include <sys/cred.h>
  56 #include <sys/exec.h>
  57 #include <sys/exechdr.h>
  58 #include <sys/debug.h>
  59 #include <sys/vmsystm.h>
  60 #include <sys/swap.h>
  61 #include <sys/dumphdr.h>
  62 #include <sys/random.h>
  63 
  64 #include <vm/hat.h>
  65 #include <vm/as.h>
  66 #include <vm/seg.h>
  67 #include <vm/seg_kp.h>
  68 #include <vm/seg_vn.h>
  69 #include <vm/page.h>
  70 #include <vm/seg_kmem.h>
  71 #include <vm/seg_kpm.h>
  72 #include <vm/vm_dep.h>
  73 
  74 #include <sys/cpu.h>
  75 #include <sys/vm_machparam.h>
  76 #include <sys/memlist.h>
  77 #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */
  78 #include <vm/hat_i86.h>
  79 #include <sys/x86_archext.h>
  80 #include <sys/elf_386.h>
  81 #include <sys/cmn_err.h>
  82 #include <sys/archsystm.h>
  83 #include <sys/machsystm.h>
  84 #include <sys/secflags.h>
  85 
  86 #include <sys/vtrace.h>
  87 #include <sys/ddidmareq.h>
  88 #include <sys/promif.h>
  89 #include <sys/memnode.h>
  90 #include <sys/stack.h>
  91 #include <util/qsort.h>
  92 #include <sys/taskq.h>
  93 
  94 #ifdef __xpv
  95 
  96 #include <sys/hypervisor.h>
  97 #include <sys/xen_mmu.h>
  98 #include <sys/balloon_impl.h>
  99 
 100 /*
 101  * domain 0 pages usable for DMA are kept pre-allocated and kept in
 102  * distinct lists, ordered by increasing mfn.
 103  */
 104 static kmutex_t io_pool_lock;


 622 }
 623 
 624 void
 625 map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
 626 {
 627         struct proc *p = curproc;
 628         caddr_t userlimit = (flags & _MAP_LOW32) ?
 629             (caddr_t)_userlimit32 : p->p_as->a_userlimit;
 630 
 631         map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags);
 632 }
 633 
 634 /*ARGSUSED*/
 635 int
 636 map_addr_vacalign_check(caddr_t addr, u_offset_t off)
 637 {
 638         return (0);
 639 }
 640 
 641 /*
 642  * The maximum amount a randomized mapping will be slewed.  We should perhaps
 643  * arrange things so these tunables can be separate for mmap, mmapobj, and
 644  * ld.so
 645  */
 646 size_t aslr_max_map_skew = 256 * 1024 * 1024; /* 256MB */
 647 
 648 /*
 649  * map_addr_proc() is the routine called when the system is to
 650  * choose an address for the user.  We will pick an address
 651  * range which is the highest available below userlimit.
 652  *
 653  * Every mapping will have a redzone of a single page on either side of
 654  * the request. This is done to leave one page unmapped between segments.
 655  * This is not required, but it's useful for the user because if their
 656  * program strays across a segment boundary, it will catch a fault
 657  * immediately making debugging a little easier.  Currently the redzone
 658  * is mandatory.
 659  *
 660  * addrp is a value/result parameter.
 661  *      On input it is a hint from the user to be used in a completely
 662  *      machine dependent fashion.  We decide to completely ignore this hint.
 663  *      If MAP_ALIGN was specified, addrp contains the minimal alignment, which
 664  *      must be some "power of two" multiple of pagesize.
 665  *
 666  *      On output it is NULL if no address can be found in the current
 667  *      processes address space or else an address that is currently
 668  *      not mapped for len bytes with a page of red zone on either side.


 744                  * For 32-bit processes, only those which have specified
 745                  * MAP_ALIGN and an addr will be aligned on a larger page size.
 746                  * Not doing so can potentially waste up to 1G of process
 747                  * address space.
 748                  */
 749                 int lvl = (p->p_model == DATAMODEL_ILP32) ? 1 :
 750                     mmu.umax_page_level;
 751 
 752                 while (lvl && len < LEVEL_SIZE(lvl))
 753                         --lvl;
 754 
 755                 align_amount = LEVEL_SIZE(lvl);
 756         }
 757         if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount))
 758                 align_amount = (uintptr_t)*addrp;
 759 
 760         ASSERT(ISP2(align_amount));
 761         ASSERT(align_amount == 0 || align_amount >= PAGESIZE);
 762 
 763         off = off & (align_amount - 1);
 764 
 765         /*
 766          * Look for a large enough hole starting below userlimit.
 767          * After finding it, use the upper part.
 768          */
 769         if (as_gap_aligned(as, len, &base, &slen, AH_HI, NULL, align_amount,
 770             PAGESIZE, off) == 0) {
 771                 caddr_t as_addr;
 772 
 773                 /*
 774                  * addr is the highest possible address to use since we have
 775                  * a PAGESIZE redzone at the beginning and end.
 776                  */
 777                 addr = base + slen - (PAGESIZE + len);
 778                 as_addr = addr;
 779                 /*
 780                  * Round address DOWN to the alignment amount and
 781                  * add the offset in.
 782                  * If addr is greater than as_addr, len would not be large
 783                  * enough to include the redzone, so we must adjust down
 784                  * by the alignment amount.
 785                  */
 786                 addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1)));
 787                 addr += (uintptr_t)off;
 788                 if (addr > as_addr) {
 789                         addr -= align_amount;
 790                 }
 791 
 792                 /*
 793                  * If randomization is requested, slew the allocation
 794                  * backwards, within the same gap, by a random amount.
 795                  */
 796                 if (flags & _MAP_RANDOMIZE) {
 797                         uint32_t slew;
 798 
 799                         (void) random_get_pseudo_bytes((uint8_t *)&slew,
 800                             sizeof (slew));
 801 
 802                         slew = slew % MIN(aslr_max_map_skew, (addr - base));
 803                         addr -= P2ALIGN(slew, align_amount);
 804                 }
 805 
 806                 ASSERT(addr > base);
 807                 ASSERT(addr + len < base + slen);
 808                 ASSERT(((uintptr_t)addr & (align_amount - 1)) ==
 809                     ((uintptr_t)(off)));
 810                 *addrp = addr;
 811         } else {
 812                 *addrp = NULL;  /* no more virtual space */
 813         }
 814 }
 815 
 816 int valid_va_range_aligned_wraparound;
 817 
 818 /*
 819  * Determine whether [*basep, *basep + *lenp) contains a mappable range of
 820  * addresses at least "minlen" long, where the base of the range is at "off"
 821  * phase from an "align" boundary and there is space for a "redzone"-sized
 822  * redzone on either side of the range.  On success, 1 is returned and *basep
 823  * and *lenp are adjusted to describe the acceptable range (including
 824  * the redzone).  On failure, 0 is returned.
 825  */


 911         }
 912 
 913         *basep = (caddr_t)lo;
 914         *lenp = hi - lo;
 915         return (1);
 916 }
 917 
 918 /*
 919  * Determine whether [*basep, *basep + *lenp) contains a mappable range of
 920  * addresses at least "minlen" long.  On success, 1 is returned and *basep
 921  * and *lenp are adjusted to describe the acceptable range.  On failure, 0
 922  * is returned.
 923  */
 924 int
 925 valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
 926 {
 927         return (valid_va_range_aligned(basep, lenp, minlen, dir, 0, 0, 0));
 928 }
 929 
 930 /*
 931  * Default to forbidding the first 64k of address space.  This protects most
 932  * reasonably sized structures from dereferences through NULL:
 933  *     ((foo_t *)0)->bar
 934  */
 935 uintptr_t forbidden_null_mapping_sz = 0x10000;
 936 
 937 /*
 938  * Determine whether [addr, addr+len] are valid user addresses.
 939  */
 940 /*ARGSUSED*/
 941 int
 942 valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
 943     caddr_t userlimit)
 944 {
 945         caddr_t eaddr = addr + len;
 946 
 947         if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
 948                 return (RANGE_BADADDR);
 949 
 950         if ((addr <= (caddr_t)forbidden_null_mapping_sz) &&
 951             secflag_enabled(as->a_proc, PROC_SEC_FORBIDNULLMAP))
 952                 return (RANGE_BADADDR);
 953 
 954 #if defined(__amd64)
 955         /*
 956          * Check for the VA hole
 957          */
 958         if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end)
 959                 return (RANGE_BADADDR);
 960 #endif
 961 
 962         return (RANGE_OKAY);
 963 }
 964 
 965 /*
 966  * Return 1 if the page frame is onboard memory, else 0.
 967  */
 968 int
 969 pf_is_memory(pfn_t pf)
 970 {
 971         if (pfn_is_foreign(pf))
 972                 return (0);
 973         return (address_in_memlist(phys_install, pfn_to_pa(pf), 1));


3944 
3945         hat_mempte_release(cpup->cpu_caddr2, cpup->cpu_caddr2pte);
3946         cpup->cpu_caddr2pte = 0;
3947         vmem_free(heap_arena, cpup->cpu_caddr2, mmu_ptob(1));
3948         cpup->cpu_caddr2 = 0;
3949 
3950         hat_mempte_release(cpup->cpu_caddr1, cpup->cpu_caddr1pte);
3951         cpup->cpu_caddr1pte = 0;
3952         vmem_free(heap_arena, cpup->cpu_caddr1, mmu_ptob(1));
3953         cpup->cpu_caddr1 = 0;
3954 }
3955 
3956 /*
3957  * Function for flushing D-cache when performing module relocations
3958  * to an alternate mapping.  Unnecessary on Intel / AMD platforms.
3959  */
3960 void
3961 dcache_flushall()
3962 {}
3963 






3964 /*
3965  * Allocate a memory page.  The argument 'seed' can be any pseudo-random
3966  * number to vary where the pages come from.  This is quite a hacked up
3967  * method -- it works for now, but really needs to be fixed up a bit.
3968  *
3969  * We currently use page_create_va() on the kvp with fake offsets,
3970  * segments and virt address.  This is pretty bogus, but was copied from the
3971  * old hat_i86.c code.  A better approach would be to specify either mnode
3972  * random or mnode local and takes a page from whatever color has the MOST
3973  * available - this would have a minimal impact on page coloring.
3974  */
3975 page_t *
3976 page_get_physical(uintptr_t seed)
3977 {
3978         page_t *pp;
3979         u_offset_t offset;
3980         static struct seg tmpseg;
3981         static uintptr_t ctr = 0;
3982 
3983         /*