Print this page
uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap.  Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it


  52 #include <sys/lgrp.h>
  53 #include <sys/vtrace.h>
  54 #include <sys/exec.h>
  55 #include <sys/exechdr.h>
  56 #include <sys/kmem.h>
  57 #include <sys/prsystm.h>
  58 #include <sys/modctl.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/door.h>
  61 #include <sys/schedctl.h>
  62 #include <sys/utrap.h>
  63 #include <sys/systeminfo.h>
  64 #include <sys/stack.h>
  65 #include <sys/rctl.h>
  66 #include <sys/dtrace.h>
  67 #include <sys/lwpchan_impl.h>
  68 #include <sys/pool.h>
  69 #include <sys/sdt.h>
  70 #include <sys/brand.h>
  71 #include <sys/klpd.h>

  72 
  73 #include <c2/audit.h>
  74 
  75 #include <vm/hat.h>
  76 #include <vm/anon.h>
  77 #include <vm/as.h>
  78 #include <vm/seg.h>
  79 #include <vm/seg_vn.h>
  80 
  81 #define PRIV_RESET              0x01    /* needs to reset privs */
  82 #define PRIV_SETID              0x02    /* needs to change uids */
  83 #define PRIV_SETUGID            0x04    /* is setuid/setgid/forced privs */
  84 #define PRIV_INCREASE           0x08    /* child runs with more privs */
  85 #define MAC_FLAGS               0x10    /* need to adjust MAC flags */
  86 #define PRIV_FORCED             0x20    /* has forced privileges */
  87 
  88 static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *,
  89     priv_set_t *, cred_t *, const char *);
  90 static int hold_execsw(struct execsw *);
  91 
  92 uint_t auxv_hwcap = 0;  /* auxv AT_SUN_HWCAP value; determined on the fly */
  93 uint_t auxv_hwcap_2 = 0;        /* AT_SUN_HWCAP2 */
  94 #if defined(_SYSCALL32_IMPL)
  95 uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  96 uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  97 #endif
  98 
  99 #define PSUIDFLAGS              (SNOCD|SUGID)
 100 
 101 /*














 102  * exece() - system call wrapper around exec_common()
 103  */
 104 int
 105 exece(const char *fname, const char **argp, const char **envp)
 106 {
 107         int error;
 108 
 109         error = exec_common(fname, argp, envp, EBA_NONE);
 110         return (error ? (set_errno(error)) : 0);
 111 }
 112 
 113 int
 114 exec_common(const char *fname, const char **argp, const char **envp,
 115     int brand_action)
 116 {
 117         vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
 118         proc_t *p = ttoproc(curthread);
 119         klwp_t *lwp = ttolwp(curthread);
 120         struct user *up = PTOU(p);
 121         long execsz;            /* temporary count of exec size */


 643                                 priv_intersect(&CR_OPPRIV(cred), &fset);
 644                         }
 645                         priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 646                         CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 647                         if (privflags & PRIV_FORCED) {
 648                                 priv_set_PA(cred);
 649                                 priv_union(&fset, &CR_EPRIV(cred));
 650                                 priv_union(&fset, &CR_PPRIV(cred));
 651                         }
 652                         priv_adjust_PA(cred);
 653                 }
 654         } else if (level == 0 && args->pfcred != NULL) {
 655                 newcred = cred = args->pfcred;
 656                 privflags |= PRIV_INCREASE;
 657                 /* pfcred is not forced to adhere to these settings */
 658                 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 659                 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 660                 priv_adjust_PA(cred);
 661         }
 662 




 663         /* SunOS 4.x buy-back */
 664         if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
 665             (vattr.va_mode & (VSUID|VSGID))) {
 666                 char path[MAXNAMELEN];
 667                 refstr_t *mntpt = NULL;
 668                 int ret = -1;
 669 
 670                 bzero(path, sizeof (path));
 671                 zone_hold(pp->p_zone);
 672 
 673                 ret = vnodetopath(pp->p_zone->zone_rootvp, vp, path,
 674                     sizeof (path), cred);
 675 
 676                 /* fallback to mountpoint if a path can't be found */
 677                 if ((ret != 0) || (ret == 0 && path[0] == '\0'))
 678                         mntpt = vfs_getmntpoint(vp->v_vfsp);
 679 
 680                 if (mntpt == NULL)
 681                         zcmn_err(pp->p_zone->zone_id, CE_NOTE,
 682                             "!uid %d: setuid execution not allowed, "


1757                                     AT_SUN_EMULATOR, (long)&ustrp[*--offp])
1758                 } else {
1759                         auxv32_t **a = (auxv32_t **)auxvpp;
1760                         ADDAUX(*a,
1761                             AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
1762                         ADDAUX(*a,
1763                             AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
1764                         if (args->brandname != NULL)
1765                                 ADDAUX(*a, AT_SUN_BRANDNAME,
1766                                     (int)(uintptr_t)&ustrp[*--offp])
1767                         if (args->emulator != NULL)
1768                                 ADDAUX(*a, AT_SUN_EMULATOR,
1769                                     (int)(uintptr_t)&ustrp[*--offp])
1770                 }
1771         }
1772 
1773         return (0);
1774 }
1775 
1776 /*





































1777  * Initialize a new user stack with the specified arguments and environment.
1778  * The initial user stack layout is as follows:
1779  *
1780  *      User Stack
1781  *      +---------------+ <--- curproc->p_usrstack
1782  *      |               |
1783  *      | slew          |
1784  *      |               |
1785  *      +---------------+
1786  *      | NULL          |
1787  *      +---------------+
1788  *      |               |
1789  *      | auxv strings  |
1790  *      |               |
1791  *      +---------------+
1792  *      |               |
1793  *      | envp strings  |
1794  *      |               |
1795  *      +---------------+
1796  *      |               |


1986         p->p_datprot = args->dat_prot;
1987 
1988         /*
1989          * Reset resource controls such that all controls are again active as
1990          * well as appropriate to the potentially new address model for the
1991          * process.
1992          */
1993         e.rcep_p.proc = p;
1994         e.rcep_t = RCENTITY_PROCESS;
1995         rctl_set_reset(p->p_rctls, p, &e);
1996 
1997         /* Too early to call map_pgsz for the heap */
1998         if (use_stk_lpg) {
1999                 p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
2000         }
2001 
2002         mutex_enter(&p->p_lock);
2003         p->p_flag |= SAUTOLPG;       /* kernel controls page sizes */
2004         mutex_exit(&p->p_lock);
2005 
2006         /*
2007          * Some platforms may choose to randomize real stack start by adding a
2008          * small slew (not more than a few hundred bytes) to the top of the
2009          * stack. This helps avoid cache thrashing when identical processes
2010          * simultaneously share caches that don't provide enough associativity
2011          * (e.g. sun4v systems). In this case stack slewing makes the same hot
2012          * stack variables in different processes to live in different cache
2013          * sets increasing effective associativity.
2014          */
2015         sp_slew = exec_get_spslew();
2016         ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);


2017         exec_set_sp(size + sp_slew);
2018 
2019         as = as_alloc();
2020         p->p_as = as;
2021         as->a_proc = p;
2022         if (p->p_model == DATAMODEL_ILP32 || args->addr32)
2023                 as->a_userlimit = (caddr_t)USERLIMIT32;
2024         (void) hat_setup(as->a_hat, HAT_ALLOC);
2025         hat_join_srd(as->a_hat, args->ex_vp);
2026 
2027         /*
2028          * Finally, write out the contents of the new stack.
2029          */
2030         error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
2031         kmem_free(args->stk_base, args->stk_size);
2032         return (error);
2033 }


  52 #include <sys/lgrp.h>
  53 #include <sys/vtrace.h>
  54 #include <sys/exec.h>
  55 #include <sys/exechdr.h>
  56 #include <sys/kmem.h>
  57 #include <sys/prsystm.h>
  58 #include <sys/modctl.h>
  59 #include <sys/vmparam.h>
  60 #include <sys/door.h>
  61 #include <sys/schedctl.h>
  62 #include <sys/utrap.h>
  63 #include <sys/systeminfo.h>
  64 #include <sys/stack.h>
  65 #include <sys/rctl.h>
  66 #include <sys/dtrace.h>
  67 #include <sys/lwpchan_impl.h>
  68 #include <sys/pool.h>
  69 #include <sys/sdt.h>
  70 #include <sys/brand.h>
  71 #include <sys/klpd.h>
  72 #include <sys/random.h>
  73 
  74 #include <c2/audit.h>
  75 
  76 #include <vm/hat.h>
  77 #include <vm/anon.h>
  78 #include <vm/as.h>
  79 #include <vm/seg.h>
  80 #include <vm/seg_vn.h>
  81 
  82 #define PRIV_RESET              0x01    /* needs to reset privs */
  83 #define PRIV_SETID              0x02    /* needs to change uids */
  84 #define PRIV_SETUGID            0x04    /* is setuid/setgid/forced privs */
  85 #define PRIV_INCREASE           0x08    /* child runs with more privs */
  86 #define MAC_FLAGS               0x10    /* need to adjust MAC flags */
  87 #define PRIV_FORCED             0x20    /* has forced privileges */
  88 
  89 static int execsetid(struct vnode *, struct vattr *, uid_t *, uid_t *,
  90     priv_set_t *, cred_t *, const char *);
  91 static int hold_execsw(struct execsw *);
  92 
  93 uint_t auxv_hwcap = 0;  /* auxv AT_SUN_HWCAP value; determined on the fly */
  94 uint_t auxv_hwcap_2 = 0;        /* AT_SUN_HWCAP2 */
  95 #if defined(_SYSCALL32_IMPL)
  96 uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  97 uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  98 #endif
  99 
 100 #define PSUIDFLAGS              (SNOCD|SUGID)
 101 
 102 /*
 103  * These are consumed within the specific exec modules, but are defined here because
 104  *
 105  * 1) The exec modules are unloadable, which would make this near useless.
 106  *
 107  * 2) We want them to be common across all of them, should more than ELF come
 108  *    to support them.
 109  *
 110  * All must be powers of 2.
 111  */
 112 volatile size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */
 113 #pragma weak exec_stackgap = aslr_max_stack_skew      /* Old, compatible name */
 114 volatile size_t aslr_max_stack_skew = 64 * 1024;      /* 64KB */
 115 
 116 /*
 117  * exece() - system call wrapper around exec_common()
 118  */
 119 int
 120 exece(const char *fname, const char **argp, const char **envp)
 121 {
 122         int error;
 123 
 124         error = exec_common(fname, argp, envp, EBA_NONE);
 125         return (error ? (set_errno(error)) : 0);
 126 }
 127 
 128 int
 129 exec_common(const char *fname, const char **argp, const char **envp,
 130     int brand_action)
 131 {
 132         vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
 133         proc_t *p = ttoproc(curthread);
 134         klwp_t *lwp = ttolwp(curthread);
 135         struct user *up = PTOU(p);
 136         long execsz;            /* temporary count of exec size */


 658                                 priv_intersect(&CR_OPPRIV(cred), &fset);
 659                         }
 660                         priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 661                         CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 662                         if (privflags & PRIV_FORCED) {
 663                                 priv_set_PA(cred);
 664                                 priv_union(&fset, &CR_EPRIV(cred));
 665                                 priv_union(&fset, &CR_PPRIV(cred));
 666                         }
 667                         priv_adjust_PA(cred);
 668                 }
 669         } else if (level == 0 && args->pfcred != NULL) {
 670                 newcred = cred = args->pfcred;
 671                 privflags |= PRIV_INCREASE;
 672                 /* pfcred is not forced to adhere to these settings */
 673                 priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 674                 CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 675                 priv_adjust_PA(cred);
 676         }
 677 
 678         /* The new image gets the inheritable secflags as its secflags */
 679         /* XXX: This probably means we have the wrong secflags when exec fails */
 680         secflag_promote(pp);
 681 
 682         /* SunOS 4.x buy-back */
 683         if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
 684             (vattr.va_mode & (VSUID|VSGID))) {
 685                 char path[MAXNAMELEN];
 686                 refstr_t *mntpt = NULL;
 687                 int ret = -1;
 688 
 689                 bzero(path, sizeof (path));
 690                 zone_hold(pp->p_zone);
 691 
 692                 ret = vnodetopath(pp->p_zone->zone_rootvp, vp, path,
 693                     sizeof (path), cred);
 694 
 695                 /* fallback to mountpoint if a path can't be found */
 696                 if ((ret != 0) || (ret == 0 && path[0] == '\0'))
 697                         mntpt = vfs_getmntpoint(vp->v_vfsp);
 698 
 699                 if (mntpt == NULL)
 700                         zcmn_err(pp->p_zone->zone_id, CE_NOTE,
 701                             "!uid %d: setuid execution not allowed, "


1776                                     AT_SUN_EMULATOR, (long)&ustrp[*--offp])
1777                 } else {
1778                         auxv32_t **a = (auxv32_t **)auxvpp;
1779                         ADDAUX(*a,
1780                             AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
1781                         ADDAUX(*a,
1782                             AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
1783                         if (args->brandname != NULL)
1784                                 ADDAUX(*a, AT_SUN_BRANDNAME,
1785                                     (int)(uintptr_t)&ustrp[*--offp])
1786                         if (args->emulator != NULL)
1787                                 ADDAUX(*a, AT_SUN_EMULATOR,
1788                                     (int)(uintptr_t)&ustrp[*--offp])
1789                 }
1790         }
1791 
1792         return (0);
1793 }
1794 
1795 /*
1796  * Though the actual stack base is constant, slew the %sp by a random aligned
1797  * amount in [0,aslr_max_stack_skew).  Mostly, this makes life slightly more
1798  * complicated for buffer overflows hoping to overwrite the return address.
1799  *
1800  * On some platforms this helps avoid cache thrashing when identical processes
1801  * simultaneously share caches that don't provide enough associativity
1802  * (e.g. sun4v systems). In this case stack slewing makes the same hot stack
1803  * variables in different processes live in different cache sets increasing
1804  * effective associativity.
1805  */
1806 size_t
1807 exec_get_spslew(void)
1808 {
1809 #ifdef sun4v
1810         static uint_t sp_color_stride = 16;
1811         static uint_t sp_color_mask = 0x1f;
1812         static uint_t sp_current_color = (uint_t)-1;
1813 #endif
1814         size_t off;
1815 
1816         ASSERT(ISP2(aslr_max_stack_skew));
1817 
1818         if ((aslr_max_stack_skew == 0) ||
1819             !secflag_enabled(curproc, PROC_SEC_ASLR)) {
1820 #ifdef sun4v
1821                 uint_t spcolor = atomic_inc_32_nv(&sp_current_color);
1822                 return ((size_t)((spcolor & sp_color_mask) * SA(sp_color_stride)));
1823 #else
1824                 return (0);
1825 #endif
1826         }
1827 
1828         (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
1829         return SA(P2PHASE(off, aslr_max_stack_skew));
1830 }
1831 
1832 /*
1833  * Initialize a new user stack with the specified arguments and environment.
1834  * The initial user stack layout is as follows:
1835  *
1836  *      User Stack
1837  *      +---------------+ <--- curproc->p_usrstack
1838  *      |               |
1839  *      | slew          |
1840  *      |               |
1841  *      +---------------+
1842  *      | NULL          |
1843  *      +---------------+
1844  *      |               |
1845  *      | auxv strings  |
1846  *      |               |
1847  *      +---------------+
1848  *      |               |
1849  *      | envp strings  |
1850  *      |               |
1851  *      +---------------+
1852  *      |               |


2042         p->p_datprot = args->dat_prot;
2043 
2044         /*
2045          * Reset resource controls such that all controls are again active as
2046          * well as appropriate to the potentially new address model for the
2047          * process.
2048          */
2049         e.rcep_p.proc = p;
2050         e.rcep_t = RCENTITY_PROCESS;
2051         rctl_set_reset(p->p_rctls, p, &e);
2052 
2053         /* Too early to call map_pgsz for the heap */
2054         if (use_stk_lpg) {
2055                 p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
2056         }
2057 
2058         mutex_enter(&p->p_lock);
2059         p->p_flag |= SAUTOLPG;       /* kernel controls page sizes */
2060         mutex_exit(&p->p_lock);
2061 









2062         sp_slew = exec_get_spslew();
2063         ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);
2064         /* Be certain we don't underflow */
2065         VERIFY((curproc->p_usrstack - (size + sp_slew)) < curproc->p_usrstack);
2066         exec_set_sp(size + sp_slew);
2067 
2068         as = as_alloc();
2069         p->p_as = as;
2070         as->a_proc = p;
2071         if (p->p_model == DATAMODEL_ILP32 || args->addr32)
2072                 as->a_userlimit = (caddr_t)USERLIMIT32;
2073         (void) hat_setup(as->a_hat, HAT_ALLOC);
2074         hat_join_srd(as->a_hat, args->ex_vp);
2075 
2076         /*
2077          * Finally, write out the contents of the new stack.
2078          */
2079         error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
2080         kmem_free(args->stk_base, args->stk_size);
2081         return (error);
2082 }