Print this page
7029 want per-process exploit mitigation features (secflags)
7030 want basic address space layout randomization (aslr)
7031 noexec_user_stack should be a secflag
7032 want a means to forbid mappings around NULL.

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/exec.c
          +++ new/usr/src/uts/common/os/exec.c
↓ open down ↓ 61 lines elided ↑ open up ↑
  62   62  #include <sys/utrap.h>
  63   63  #include <sys/systeminfo.h>
  64   64  #include <sys/stack.h>
  65   65  #include <sys/rctl.h>
  66   66  #include <sys/dtrace.h>
  67   67  #include <sys/lwpchan_impl.h>
  68   68  #include <sys/pool.h>
  69   69  #include <sys/sdt.h>
  70   70  #include <sys/brand.h>
  71   71  #include <sys/klpd.h>
       72 +#include <sys/random.h>
  72   73  
  73   74  #include <c2/audit.h>
  74   75  
  75   76  #include <vm/hat.h>
  76   77  #include <vm/anon.h>
  77   78  #include <vm/as.h>
  78   79  #include <vm/seg.h>
  79   80  #include <vm/seg_vn.h>
  80   81  
  81   82  #define PRIV_RESET              0x01    /* needs to reset privs */
↓ open down ↓ 10 lines elided ↑ open up ↑
  92   93  uint_t auxv_hwcap = 0;  /* auxv AT_SUN_HWCAP value; determined on the fly */
  93   94  uint_t auxv_hwcap_2 = 0;        /* AT_SUN_HWCAP2 */
  94   95  #if defined(_SYSCALL32_IMPL)
  95   96  uint_t auxv_hwcap32 = 0;        /* 32-bit version of auxv_hwcap */
  96   97  uint_t auxv_hwcap32_2 = 0;      /* 32-bit version of auxv_hwcap2 */
  97   98  #endif
  98   99  
  99  100  #define PSUIDFLAGS              (SNOCD|SUGID)
 100  101  
 101  102  /*
      103 + * These are consumed within the specific exec modules, but are defined here
      104 + * because
      105 + *
      106 + * 1) The exec modules are unloadable, which would make this near useless.
      107 + *
      108 + * 2) We want them to be common across all of them, should more than ELF come
      109 + *    to support them.
      110 + *
      111 + * All must be powers of 2.
      112 + */
      113 +size_t aslr_max_brk_skew = 16 * 1024 * 1024; /* 16MB */
      114 +#pragma weak exec_stackgap = aslr_max_stack_skew /* Old, compatible name */
      115 +size_t aslr_max_stack_skew = 64 * 1024; /* 64KB */
      116 +
      117 +/*
 102  118   * exece() - system call wrapper around exec_common()
 103  119   */
 104  120  int
 105  121  exece(const char *fname, const char **argp, const char **envp)
 106  122  {
 107  123          int error;
 108  124  
 109  125          error = exec_common(fname, argp, envp, EBA_NONE);
 110  126          return (error ? (set_errno(error)) : 0);
 111  127  }
↓ open down ↓ 441 lines elided ↑ open up ↑
 553  569          int suidflags = 0;
 554  570          ssize_t resid;
 555  571          uid_t uid, gid;
 556  572          struct vattr vattr;
 557  573          char magbuf[MAGIC_BYTES];
 558  574          int setid;
 559  575          cred_t *oldcred, *newcred = NULL;
 560  576          int privflags = 0;
 561  577          int setidfl;
 562  578          priv_set_t fset;
      579 +        secflagset_t old_secflags;
      580 +
      581 +        secflags_copy(&old_secflags, &pp->p_secflags.psf_effective);
 563  582  
 564  583          /*
 565  584           * If the SNOCD or SUGID flag is set, turn it off and remember the
 566  585           * previous setting so we can restore it if we encounter an error.
 567  586           */
 568  587          if (level == 0 && (pp->p_flag & PSUIDFLAGS)) {
 569  588                  mutex_enter(&pp->p_lock);
 570  589                  suidflags = pp->p_flag & PSUIDFLAGS;
 571  590                  pp->p_flag &= ~PSUIDFLAGS;
 572  591                  mutex_exit(&pp->p_lock);
↓ open down ↓ 80 lines elided ↑ open up ↑
 653  672                  }
 654  673          } else if (level == 0 && args->pfcred != NULL) {
 655  674                  newcred = cred = args->pfcred;
 656  675                  privflags |= PRIV_INCREASE;
 657  676                  /* pfcred is not forced to adhere to these settings */
 658  677                  priv_intersect(&CR_LPRIV(cred), &CR_IPRIV(cred));
 659  678                  CR_EPRIV(cred) = CR_PPRIV(cred) = CR_IPRIV(cred);
 660  679                  priv_adjust_PA(cred);
 661  680          }
 662  681  
      682 +        /* The new image gets the inheritable secflags as its secflags */
      683 +        secflags_promote(pp);
      684 +
 663  685          /* SunOS 4.x buy-back */
 664  686          if ((vp->v_vfsp->vfs_flag & VFS_NOSETUID) &&
 665  687              (vattr.va_mode & (VSUID|VSGID))) {
 666  688                  char path[MAXNAMELEN];
 667  689                  refstr_t *mntpt = NULL;
 668  690                  int ret = -1;
 669  691  
 670  692                  bzero(path, sizeof (path));
 671  693                  zone_hold(pp->p_zone);
 672  694  
↓ open down ↓ 40 lines elided ↑ open up ↑
 713  735           * execsetid() told us whether or not we had to change the
 714  736           * credentials of the process.  In privflags, it told us
 715  737           * whether we gained any privileges or executed a set-uid executable.
 716  738           */
 717  739          setid = (privflags & (PRIV_SETUGID|PRIV_INCREASE|PRIV_FORCED));
 718  740  
 719  741          /*
 720  742           * Use /etc/system variable to determine if the stack
 721  743           * should be marked as executable by default.
 722  744           */
 723      -        if (noexec_user_stack)
      745 +        if ((noexec_user_stack != 0) ||
      746 +            secflag_enabled(pp, PROC_SEC_NOEXECSTACK))
 724  747                  args->stk_prot &= ~PROT_EXEC;
 725  748  
 726  749          args->execswp = eswp; /* Save execsw pointer in uarg for exec_func */
 727  750          args->ex_vp = vp;
 728  751  
 729  752          /*
 730  753           * Traditionally, the setid flags told the sub processes whether
 731  754           * the file just executed was set-uid or set-gid; this caused
 732  755           * some confusion as the 'setid' flag did not match the SUGID
 733  756           * process flag which is only set when the uids/gids do not match.
↓ open down ↓ 65 lines elided ↑ open up ↑
 799  822                          /*
 800  823                           * DTrace accesses t_cred in probe context.  t_cred
 801  824                           * must always be either NULL, or point to a valid,
 802  825                           * allocated cred structure.
 803  826                           */
 804  827                          oldcred = curthread->t_cred;
 805  828                          curthread->t_cred = cred;
 806  829                          crfree(oldcred);
 807  830  
 808  831                          if (priv_basic_test >= 0 &&
 809      -                            !PRIV_ISASSERT(&CR_IPRIV(newcred),
      832 +                            !PRIV_ISMEMBER(&CR_IPRIV(newcred),
 810  833                              priv_basic_test)) {
 811  834                                  pid_t pid = pp->p_pid;
 812  835                                  char *fn = PTOU(pp)->u_comm;
 813  836  
 814  837                                  cmn_err(CE_WARN, "%s[%d]: exec: basic_test "
 815  838                                      "privilege removed from E/I", fn, pid);
 816  839                          }
 817  840                  }
 818  841                  /*
 819  842                   * On emerging from a successful exec(), the saved
↓ open down ↓ 49 lines elided ↑ open up ↑
 869  892  
 870  893  bad:
 871  894          (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, cred, NULL);
 872  895  
 873  896  bad_noclose:
 874  897          if (newcred != NULL)
 875  898                  crfree(newcred);
 876  899          if (error == 0)
 877  900                  error = ENOEXEC;
 878  901  
      902 +        mutex_enter(&pp->p_lock);
 879  903          if (suidflags) {
 880      -                mutex_enter(&pp->p_lock);
 881  904                  pp->p_flag |= suidflags;
 882      -                mutex_exit(&pp->p_lock);
 883  905          }
      906 +        /*
      907 +         * Restore the effective secflags, to maintain the invariant they
      908 +         * never change for a given process
      909 +         */
      910 +        secflags_copy(&pp->p_secflags.psf_effective, &old_secflags);
      911 +        mutex_exit(&pp->p_lock);
      912 +
 884  913          return (error);
 885  914  }
 886  915  
 887  916  extern char *execswnames[];
 888  917  
 889  918  struct execsw *
 890  919  allocate_execsw(char *name, char *magic, size_t magic_size)
 891  920  {
 892  921          int i, j;
 893  922          char *ename;
↓ open down ↓ 886 lines elided ↑ open up ↑
1780 1809                          if (args->emulator != NULL)
1781 1810                                  ADDAUX(*a, AT_SUN_EMULATOR,
1782 1811                                      (int)(uintptr_t)&ustrp[*--offp])
1783 1812                  }
1784 1813          }
1785 1814  
1786 1815          return (0);
1787 1816  }
1788 1817  
1789 1818  /*
     1819 + * Though the actual stack base is constant, slew the %sp by a random aligned
     1820 + * amount in [0,aslr_max_stack_skew).  Mostly, this makes life slightly more
     1821 + * complicated for buffer overflows hoping to overwrite the return address.
     1822 + *
     1823 + * On some platforms this helps avoid cache thrashing when identical processes
     1824 + * simultaneously share caches that don't provide enough associativity
     1825 + * (e.g. sun4v systems). In this case stack slewing makes the same hot stack
     1826 + * variables in different processes live in different cache sets increasing
     1827 + * effective associativity.
     1828 + */
     1829 +size_t
     1830 +exec_get_spslew(void)
     1831 +{
     1832 +#ifdef sun4v
     1833 +        static uint_t sp_color_stride = 16;
     1834 +        static uint_t sp_color_mask = 0x1f;
     1835 +        static uint_t sp_current_color = (uint_t)-1;
     1836 +#endif
     1837 +        size_t off;
     1838 +
     1839 +        ASSERT(ISP2(aslr_max_stack_skew));
     1840 +
     1841 +        if ((aslr_max_stack_skew == 0) ||
     1842 +            !secflag_enabled(curproc, PROC_SEC_ASLR)) {
     1843 +#ifdef sun4v
     1844 +                uint_t spcolor = atomic_inc_32_nv(&sp_current_color);
     1845 +                return ((size_t)((spcolor & sp_color_mask) *
     1846 +                    SA(sp_color_stride)));
     1847 +#else
     1848 +                return (0);
     1849 +#endif
     1850 +        }
     1851 +
     1852 +        (void) random_get_pseudo_bytes((uint8_t *)&off, sizeof (off));
     1853 +        return (SA(P2PHASE(off, aslr_max_stack_skew)));
     1854 +}
     1855 +
     1856 +/*
1790 1857   * Initialize a new user stack with the specified arguments and environment.
1791 1858   * The initial user stack layout is as follows:
1792 1859   *
1793 1860   *      User Stack
1794 1861   *      +---------------+ <--- curproc->p_usrstack
1795 1862   *      |               |
1796 1863   *      | slew          |
1797 1864   *      |               |
1798 1865   *      +---------------+
1799 1866   *      | NULL          |
↓ open down ↓ 209 lines elided ↑ open up ↑
2009 2076  
2010 2077          /* Too early to call map_pgsz for the heap */
2011 2078          if (use_stk_lpg) {
2012 2079                  p->p_stkpageszc = page_szc(map_pgsz(MAPPGSZ_STK, p, 0, 0, 0));
2013 2080          }
2014 2081  
2015 2082          mutex_enter(&p->p_lock);
2016 2083          p->p_flag |= SAUTOLPG;  /* kernel controls page sizes */
2017 2084          mutex_exit(&p->p_lock);
2018 2085  
2019      -        /*
2020      -         * Some platforms may choose to randomize real stack start by adding a
2021      -         * small slew (not more than a few hundred bytes) to the top of the
2022      -         * stack. This helps avoid cache thrashing when identical processes
2023      -         * simultaneously share caches that don't provide enough associativity
2024      -         * (e.g. sun4v systems). In this case stack slewing makes the same hot
2025      -         * stack variables in different processes to live in different cache
2026      -         * sets increasing effective associativity.
2027      -         */
2028 2086          sp_slew = exec_get_spslew();
2029 2087          ASSERT(P2PHASE(sp_slew, args->stk_align) == 0);
     2088 +        /* Be certain we don't underflow */
     2089 +        VERIFY((curproc->p_usrstack - (size + sp_slew)) < curproc->p_usrstack);
2030 2090          exec_set_sp(size + sp_slew);
2031 2091  
2032 2092          as = as_alloc();
2033 2093          p->p_as = as;
2034 2094          as->a_proc = p;
2035 2095          if (p->p_model == DATAMODEL_ILP32 || args->addr32)
2036 2096                  as->a_userlimit = (caddr_t)USERLIMIT32;
2037 2097          (void) hat_setup(as->a_hat, HAT_ALLOC);
2038 2098          hat_join_srd(as->a_hat, args->ex_vp);
2039 2099  
2040 2100          /*
2041 2101           * Finally, write out the contents of the new stack.
2042 2102           */
2043 2103          error = stk_copyout(args, usrstack - sp_slew, auxvpp, up);
2044 2104          kmem_free(args->stk_base, args->stk_size);
2045 2105          return (error);
2046 2106  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX