Print this page
uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap.  Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/exec.c
          +++ new/usr/src/uts/common/os/exec.c
↓ open down ↓ 61 lines elided ↑ open up ↑
  62   62  #include <sys/utrap.h>
  63   63  #include <sys/systeminfo.h>
  64   64  #include <sys/stack.h>
  65   65  #include <sys/rctl.h>
  66   66  #include <sys/dtrace.h>
  67   67  #include <sys/lwpchan_impl.h>
  68   68  #include <sys/pool.h>
  69   69  #include <sys/sdt.h>
  70   70  #include <sys/brand.h>
  71   71  #include <sys/klpd.h>
       72 +#include <sys/random.h>
  72   73  
  73   74  #include <c2/audit.h>
  74   75  
  75   76  #include <vm/hat.h>
  76   77  #include <vm/anon.h>
  77   78  #include <vm/as.h>
  78   79  #include <vm/seg.h>
  79   80  #include <vm/seg_vn.h>
  80   81  
  81   82  #define PRIV_RESET              0x01    /* needs to reset privs */
↓ open down ↓ 10 lines elided ↑ open up ↑
  92   93  uint_t auxv_hwcap = 0;  /* auxv AT_SUN_HWCAP value; determined on the fly */
  93   94  uint_t auxv_hwcap_2 = 0;        /* AT_SUN_HWCAP2 */
  94   95  #if defined(_SYSCALL32_IMPL)
  95   96  uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  96   97  uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  97   98  #endif
  98   99  
  99  100  #define PSUIDFLAGS              (SNOCD|SUGID)
 100  101  
 101  102  /*
      103 + * These are consumed within the specific exec modules, but are defined here because
      104 + *
      105 + * 1) The exec modules are unloadable, which would make this near useless.
      106 + *
      107 + * 2) We want them to be common across all of them, should more than ELF come
      108 + *    to support them.
      109 + *
      110 + * All must be powers of 2.
      111 + */
      112 +volatile size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */
      113 +#pragma weak exec_stackgap = aslr_max_stack_skew      /* Old, compatible name */
      114 +volatile size_t aslr_max_stack_skew = 64 * 1024;      /* 64KB */
      115 +
      116 +/*
 102  117   * exece() - system call wrapper around exec_common()
 103  118   */
 104  119  int
 105  120  exece(const char *fname, const char **argp, const char **envp)
 106  121  {
 107  122          int error;
 108  123  
 109  124          error = exec_common(fname, argp, envp, EBA_NONE);
 110  125          return (error ? (set_errno(error)) : 0);
 111  126  }
↓ open down ↓ 541 lines elided ↑ open up ↑
 653  668                  }
 654  669          } else if (level == 0 && args->pfcred != NULL) {
 655  670                  newcred = cred = args->pfcred;
 656  671                  privflags |= PRIV_INCREASE;
 657  672                  /* pfcred is not forced to adhere to these settings */
 658  673                  priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 659  674                  CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 660  675                  priv_adjust_PA(cred);
 661  676          }
 662  677  
      678 +        /* The new image gets the inheritable secflags as its secflags */
      679 +        /* XXX: This probably means we have the wrong secflags when exec fails */
      680 +        secflag_promote(pp);
      681 +
 663  682          /* SunOS 4.x buy-back */
 664  683          if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
 665  684              (vattr.va_mode & (VSUID|VSGID))) {
 666  685                  char path[MAXNAMELEN];
 667  686                  refstr_t *mntpt = NULL;
 668  687                  int ret = -1;
 669  688  
 670  689                  bzero(path, sizeof (path));
 671  690                  zone_hold(pp->p_zone);
 672  691  
↓ open down ↓ 1094 lines elided ↑ open up ↑
1767 1786                          if (args->emulator != NULL)
1768 1787                                  ADDAUX(*a, AT_SUN_EMULATOR,
1769 1788                                      (int)(uintptr_t)&ustrp[*--offp])
1770 1789                  }
1771 1790          }
1772 1791  
1773 1792          return (0);
1774 1793  }
1775 1794  
1776 1795  /*
     1796 + * Though the actual stack base is constant, slew the %sp by a random aligned
     1797 + * amount in [0,aslr_max_stack_skew).  Mostly, this makes life slightly more
     1798 + * complicated for buffer overflows hoping to overwrite the return address.
     1799 + *
     1800 + * On some platforms this helps avoid cache thrashing when identical processes
     1801 + * simultaneously share caches that don't provide enough associativity
     1802 + * (e.g. sun4v systems). In this case stack slewing makes the same hot stack
     1803 + * variables in different processes live in different cache sets increasing
     1804 + * effective associativity.
     1805 + */
     1806 +size_t
     1807 +exec_get_spslew(void)
     1808 +{
     1809 +#ifdef sun4v
     1810 +        static uint_t sp_color_stride = 16;
     1811 +        static uint_t sp_color_mask = 0x1f;
     1812 +        static uint_t sp_current_color = (uint_t)-1;
     1813 +#endif
     1814 +        size_t off;
     1815 +
     1816 +        ASSERT(ISP2(aslr_max_stack_skew));
     1817 +
     1818 +        if ((aslr_max_stack_skew == 0) ||
     1819 +            !secflag_enabled(curproc, PROC_SEC_ASLR)) {
     1820 +#ifdef sun4v
     1821 +                uint_t spcolor = atomic_inc_32_nv(&sp_current_color);
     1822 +                return ((size_t)((spcolor & sp_color_mask) * SA(sp_color_stride)));
     1823 +#else
     1824 +                return (0);
     1825 +#endif
     1826 +        }
     1827 +
     1828 +        (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
     1829 +        return SA(P2PHASE(off, aslr_max_stack_skew));
     1830 +}
     1831 +
     1832 +/*
1777 1833   * Initialize a new user stack with the specified arguments and environment.
1778 1834   * The initial user stack layout is as follows:
1779 1835   *
1780 1836   *      User Stack
1781 1837   *      +---------------+ <--- curproc->p_usrstack
1782 1838   *      |               |
1783 1839   *      | slew          |
1784 1840   *      |               |
1785 1841   *      +---------------+
1786 1842   *      | NULL          |
↓ open down ↓ 209 lines elided ↑ open up ↑
1996 2052  
1997 2053          /* Too early to call map_pgsz for the heap */
1998 2054          if (use_stk_lpg) {
1999 2055                  p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
2000 2056          }
2001 2057  
2002 2058          mutex_enter(&p->p_lock);
2003 2059          p->p_flag |= SAUTOLPG;  /* kernel controls page sizes */
2004 2060          mutex_exit(&p->p_lock);
2005 2061  
2006      -        /*
2007      -         * Some platforms may choose to randomize real stack start by adding a
2008      -         * small slew (not more than a few hundred bytes) to the top of the
2009      -         * stack. This helps avoid cache thrashing when identical processes
2010      -         * simultaneously share caches that don't provide enough associativity
2011      -         * (e.g. sun4v systems). In this case stack slewing makes the same hot
2012      -         * stack variables in different processes to live in different cache
2013      -         * sets increasing effective associativity.
2014      -         */
2015 2062          sp_slew = exec_get_spslew();
2016 2063          ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);
     2064 +        /* Be certain we don't underflow */
     2065 +        VERIFY((curproc->p_usrstack - (size + sp_slew)) < curproc->p_usrstack);
2017 2066          exec_set_sp(size + sp_slew);
2018 2067  
2019 2068          as = as_alloc();
2020 2069          p->p_as = as;
2021 2070          as->a_proc = p;
2022 2071          if (p->p_model == DATAMODEL_ILP32 || args->addr32)
2023 2072                  as->a_userlimit = (caddr_t)USERLIMIT32;
2024 2073          (void) hat_setup(as->a_hat, HAT_ALLOC);
2025 2074          hat_join_srd(as->a_hat, args->ex_vp);
2026 2075  
2027 2076          /*
2028 2077           * Finally, write out the contents of the new stack.
2029 2078           */
2030 2079          error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
2031 2080          kmem_free(args->stk_base, args->stk_size);
2032 2081          return (error);
2033 2082  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX