il-aslr Wdiff usr/src/uts/common/os/mmapobj.c

Print this page

uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap.  Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/mmapobj.c
          +++ new/usr/src/uts/common/os/mmapobj.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   * Copyright 2014 Joyent, Inc.  All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/sysmacros.h>
  29   29  #include <sys/kmem.h>
  30   30  #include <sys/param.h>
  31   31  #include <sys/systm.h>
  32   32  #include <sys/errno.h>
  33   33  #include <sys/mman.h>
  34   34  #include <sys/cmn_err.h>
  35   35  #include <sys/cred.h>
  36   36  #include <sys/vmsystm.h>
  37   37  #include <sys/machsystm.h>
  38   38  #include <sys/debug.h>
  39   39  #include <vm/as.h>
  40   40  #include <vm/seg.h>
  41   41  #include <sys/vmparam.h>
  42   42  #include <sys/vfs.h>
  43   43  #include <sys/elf.h>
  44   44  #include <sys/machelf.h>
  45   45  #include <sys/corectl.h>
  46   46  #include <sys/exec.h>
  47   47  #include <sys/exechdr.h>
  48   48  #include <sys/autoconf.h>
  49   49  #include <sys/mem.h>
  50   50  #include <vm/seg_dev.h>
  51   51  #include <sys/vmparam.h>
  52   52  #include <sys/mmapobj.h>
  53   53  #include <sys/atomic.h>
  54   54  
  55   55  /*
  56   56   * Theory statement:
  57   57   *
  58   58   * The main driving force behind mmapobj is to interpret and map ELF files
  59   59   * inside of the kernel instead of having the linker be responsible for this.
  60   60   *

↓ open down ↓

60 lines elided

↑ open up ↑

  61   61   * mmapobj also supports the AOUT 4.x binary format as well as flat files in
  62   62   * a read only manner.
  63   63   *
  64   64   * When interpreting and mapping an ELF file, mmapobj will map each PT_LOAD
  65   65   * or PT_SUNWBSS segment according to the ELF standard.  Refer to the "Linker
  66   66   * and Libraries Guide" for more information about the standard and mapping
  67   67   * rules.
  68   68   *
  69   69   * Having mmapobj interpret and map objects will allow the kernel to make the
  70   70   * best decision for where to place the mappings for said objects.  Thus, we
  71      - * can make optimizations inside of the kernel for specific platforms or
  72      - * cache mapping information to make mapping objects faster.
       71 + * can make optimizations inside of the kernel for specific platforms or cache
       72 + * mapping information to make mapping objects faster.  The cache is ignored
       73 + * if ASLR is enabled.
  73   74   *
  74   75   * The lib_va_hash will be one such optimization.  For each ELF object that
  75   76   * mmapobj is asked to interpret, we will attempt to cache the information
  76   77   * about the PT_LOAD and PT_SUNWBSS sections to speed up future mappings of
  77   78   * the same objects.  We will cache up to LIBVA_CACHED_SEGS (see below) program
  78   79   * headers which should cover a majority of the libraries out there without
  79   80   * wasting space.  In order to make sure that the cached information is valid,
  80   81   * we check the passed in vnode's mtime and ctime to make sure the vnode
  81   82   * has not been modified since the last time we used it.
  82   83   *

  83   84   * In addition, the lib_va_hash may contain a preferred starting VA for the
  84   85   * object which can be useful for platforms which support a shared context.
  85   86   * This will increase the likelyhood that library text can be shared among
  86   87   * many different processes.  We limit the reserved VA space for 32 bit objects
  87   88   * in order to minimize fragmenting the processes address space.
  88   89   *
  89   90   * In addition to the above, the mmapobj interface allows for padding to be
  90   91   * requested before the first mapping and after the last mapping created.
  91   92   * When padding is requested, no additional optimizations will be made for
  92   93   * that request.
  93   94   */
  94   95  
  95   96  /*
  96   97   * Threshold to prevent allocating too much kernel memory to read in the
  97   98   * program headers for an object.  If it requires more than below,
  98   99   * we will use a KM_NOSLEEP allocation to allocate memory to hold all of the
  99  100   * program headers which could possibly fail.  If less memory than below is
 100  101   * needed, then we use a KM_SLEEP allocation and are willing to wait for the
 101  102   * memory if we need to.
 102  103   */
 103  104  size_t mmapobj_alloc_threshold = 65536;
 104  105  
 105  106  /* Debug stats for test coverage */
 106  107  #ifdef DEBUG
 107  108  struct mobj_stats {
 108  109          uint_t  mobjs_unmap_called;
 109  110          uint_t  mobjs_remap_devnull;
 110  111          uint_t  mobjs_lookup_start;
 111  112          uint_t  mobjs_alloc_start;
 112  113          uint_t  mobjs_alloc_vmem;
 113  114          uint_t  mobjs_add_collision;
 114  115          uint_t  mobjs_get_addr;
 115  116          uint_t  mobjs_map_flat_no_padding;
 116  117          uint_t  mobjs_map_flat_padding;
 117  118          uint_t  mobjs_map_ptload_text;
 118  119          uint_t  mobjs_map_ptload_initdata;
 119  120          uint_t  mobjs_map_ptload_preread;
 120  121          uint_t  mobjs_map_ptload_unaligned_text;
 121  122          uint_t  mobjs_map_ptload_unaligned_map_fail;
 122  123          uint_t  mobjs_map_ptload_unaligned_read_fail;
 123  124          uint_t  mobjs_zfoddiff;
 124  125          uint_t  mobjs_zfoddiff_nowrite;
 125  126          uint_t  mobjs_zfodextra;
 126  127          uint_t  mobjs_ptload_failed;
 127  128          uint_t  mobjs_map_elf_no_holes;
 128  129          uint_t  mobjs_unmap_hole;
 129  130          uint_t  mobjs_nomem_header;
 130  131          uint_t  mobjs_inval_header;
 131  132          uint_t  mobjs_overlap_header;
 132  133          uint_t  mobjs_np2_align;
 133  134          uint_t  mobjs_np2_align_overflow;
 134  135          uint_t  mobjs_exec_padding;
 135  136          uint_t  mobjs_exec_addr_mapped;
 136  137          uint_t  mobjs_exec_addr_devnull;
 137  138          uint_t  mobjs_exec_addr_in_use;
 138  139          uint_t  mobjs_lvp_found;
 139  140          uint_t  mobjs_no_loadable_yet;
 140  141          uint_t  mobjs_nothing_to_map;
 141  142          uint_t  mobjs_e2big;
 142  143          uint_t  mobjs_dyn_pad_align;
 143  144          uint_t  mobjs_dyn_pad_noalign;
 144  145          uint_t  mobjs_alloc_start_fail;
 145  146          uint_t  mobjs_lvp_nocache;
 146  147          uint_t  mobjs_extra_padding;
 147  148          uint_t  mobjs_lvp_not_needed;
 148  149          uint_t  mobjs_no_mem_map_sz;
 149  150          uint_t  mobjs_check_exec_failed;
 150  151          uint_t  mobjs_lvp_used;
 151  152          uint_t  mobjs_wrong_model;
 152  153          uint_t  mobjs_noexec_fs;
 153  154          uint_t  mobjs_e2big_et_rel;
 154  155          uint_t  mobjs_et_rel_mapped;
 155  156          uint_t  mobjs_unknown_elf_type;
 156  157          uint_t  mobjs_phent32_too_small;
 157  158          uint_t  mobjs_phent64_too_small;
 158  159          uint_t  mobjs_inval_elf_class;
 159  160          uint_t  mobjs_too_many_phdrs;
 160  161          uint_t  mobjs_no_phsize;
 161  162          uint_t  mobjs_phsize_large;
 162  163          uint_t  mobjs_phsize_xtralarge;
 163  164          uint_t  mobjs_fast_wrong_model;
 164  165          uint_t  mobjs_fast_e2big;
 165  166          uint_t  mobjs_fast;
 166  167          uint_t  mobjs_fast_success;
 167  168          uint_t  mobjs_fast_not_now;
 168  169          uint_t  mobjs_small_file;
 169  170          uint_t  mobjs_read_error;
 170  171          uint_t  mobjs_unsupported;
 171  172          uint_t  mobjs_flat_e2big;
 172  173          uint_t  mobjs_phent_align32;
 173  174          uint_t  mobjs_phent_align64;
 174  175          uint_t  mobjs_lib_va_find_hit;
 175  176          uint_t  mobjs_lib_va_find_delay_delete;
 176  177          uint_t  mobjs_lib_va_find_delete;
 177  178          uint_t  mobjs_lib_va_add_delay_delete;
 178  179          uint_t  mobjs_lib_va_add_delete;
 179  180          uint_t  mobjs_lib_va_create_failure;
 180  181          uint_t  mobjs_min_align;
 181  182  #if defined(__sparc)
 182  183          uint_t  mobjs_aout_uzero_fault;
 183  184          uint_t  mobjs_aout_64bit_try;
 184  185          uint_t  mobjs_aout_noexec;
 185  186          uint_t  mobjs_aout_e2big;
 186  187          uint_t  mobjs_aout_lib;
 187  188          uint_t  mobjs_aout_fixed;
 188  189          uint_t  mobjs_aout_zfoddiff;
 189  190          uint_t  mobjs_aout_map_bss;
 190  191          uint_t  mobjs_aout_bss_fail;
 191  192          uint_t  mobjs_aout_nlist;
 192  193          uint_t  mobjs_aout_addr_in_use;
 193  194  #endif
 194  195  } mobj_stats;
 195  196  
 196  197  #define MOBJ_STAT_ADD(stat)             ((mobj_stats.mobjs_##stat)++)
 197  198  #else
 198  199  #define MOBJ_STAT_ADD(stat)
 199  200  #endif
 200  201  
 201  202  /*
 202  203   * Check if addr is at or above the address space reserved for the stack.
 203  204   * The stack is at the top of the address space for all sparc processes
 204  205   * and 64 bit x86 processes.  For 32 bit x86, the stack is not at the top
 205  206   * of the address space and thus this check wil always return false for
 206  207   * 32 bit x86 processes.
 207  208   */
 208  209  #if defined(__sparc)
 209  210  #define OVERLAPS_STACK(addr, p)                                         \
 210  211          (addr >= (p->p_usrstack - ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK)))
 211  212  #elif defined(__amd64)
 212  213  #define OVERLAPS_STACK(addr, p)                                         \
 213  214          ((p->p_model == DATAMODEL_LP64) &&                              \
 214  215          (addr >= (p->p_usrstack - ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK))))
 215  216  #elif defined(__i386)
 216  217  #define OVERLAPS_STACK(addr, p) 0
 217  218  #endif
 218  219  
 219  220  /* lv_flags values - bitmap */
 220  221  #define LV_ELF32        0x1             /* 32 bit ELF file */
 221  222  #define LV_ELF64        0x2             /* 64 bit ELF file */
 222  223  #define LV_DEL          0x4             /* delete when lv_refcnt hits zero */
 223  224  
 224  225  /*
 225  226   * Note: lv_num_segs will denote how many segments this file has and will
 226  227   * only be set after the lv_mps array has been filled out.
 227  228   * lv_mps can only be valid if lv_num_segs is non-zero.
 228  229   */
 229  230  struct lib_va {
 230  231          struct lib_va           *lv_next;
 231  232          caddr_t                 lv_base_va;     /* start va for library */
 232  233          ssize_t                 lv_len;         /* total va span of library */
 233  234          size_t                  lv_align;       /* minimum alignment */
 234  235          uint64_t                lv_nodeid;      /* filesystem node id */
 235  236          uint64_t                lv_fsid;        /* filesystem id */
 236  237          timestruc_t             lv_ctime;       /* last time file was changed */
 237  238          timestruc_t             lv_mtime;       /* or modified */
 238  239          mmapobj_result_t        lv_mps[LIBVA_CACHED_SEGS]; /* cached pheaders */
 239  240          int                     lv_num_segs;    /* # segs for this file */
 240  241          int                     lv_flags;
 241  242          uint_t                  lv_refcnt;      /* number of holds on struct */
 242  243  };
 243  244  
 244  245  #define LIB_VA_SIZE     1024
 245  246  #define LIB_VA_MASK     (LIB_VA_SIZE - 1)
 246  247  #define LIB_VA_MUTEX_SHIFT      3
 247  248  
 248  249  #if (LIB_VA_SIZE & (LIB_VA_SIZE - 1))
 249  250  #error  "LIB_VA_SIZE is not a power of 2"
 250  251  #endif
 251  252  
 252  253  static struct lib_va *lib_va_hash[LIB_VA_SIZE];
 253  254  static kmutex_t lib_va_hash_mutex[LIB_VA_SIZE >> LIB_VA_MUTEX_SHIFT];
 254  255  
 255  256  #define LIB_VA_HASH_MUTEX(index)                                        \
 256  257          (&lib_va_hash_mutex[index >> LIB_VA_MUTEX_SHIFT])
 257  258  
 258  259  #define LIB_VA_HASH(nodeid)                                             \
 259  260          (((nodeid) ^ ((nodeid) << 7) ^ ((nodeid) << 13)) & LIB_VA_MASK)
 260  261  
 261  262  #define LIB_VA_MATCH_ID(arg1, arg2)                                     \
 262  263          ((arg1)->lv_nodeid == (arg2)->va_nodeid &&                      \
 263  264          (arg1)->lv_fsid == (arg2)->va_fsid)
 264  265  
 265  266  #define LIB_VA_MATCH_TIME(arg1, arg2)                                   \
 266  267          ((arg1)->lv_ctime.tv_sec == (arg2)->va_ctime.tv_sec &&          \
 267  268          (arg1)->lv_mtime.tv_sec == (arg2)->va_mtime.tv_sec &&           \
 268  269          (arg1)->lv_ctime.tv_nsec == (arg2)->va_ctime.tv_nsec &&         \
 269  270          (arg1)->lv_mtime.tv_nsec == (arg2)->va_mtime.tv_nsec)
 270  271  
 271  272  #define LIB_VA_MATCH(arg1, arg2)                                        \
 272  273          (LIB_VA_MATCH_ID(arg1, arg2) && LIB_VA_MATCH_TIME(arg1, arg2))
 273  274  
 274  275  /*
 275  276   * lib_va will be used for optimized allocation of address ranges for
 276  277   * libraries, such that subsequent mappings of the same library will attempt
 277  278   * to use the same VA as previous mappings of that library.
 278  279   * In order to map libraries at the same VA in many processes, we need to carve
 279  280   * out our own address space for them which is unique across many processes.
 280  281   * We use different arenas for 32 bit and 64 bit libraries.
 281  282   *
 282  283   * Since the 32 bit address space is relatively small, we limit the number of
 283  284   * libraries which try to use consistent virtual addresses to lib_threshold.
 284  285   * For 64 bit libraries there is no such limit since the address space is large.
 285  286   */
 286  287  static vmem_t *lib_va_32_arena;
 287  288  static vmem_t *lib_va_64_arena;
 288  289  uint_t lib_threshold = 20;      /* modifiable via /etc/system */
 289  290  
 290  291  static kmutex_t lib_va_init_mutex;      /* no need to initialize */
 291  292  
 292  293  /*
 293  294   * Number of 32 bit and 64 bit libraries in lib_va hash.
 294  295   */
 295  296  static uint_t libs_mapped_32 = 0;
 296  297  static uint_t libs_mapped_64 = 0;
 297  298  
 298  299  /*
 299  300   * Free up the resources associated with lvp as well as lvp itself.
 300  301   * We also decrement the number of libraries mapped via a lib_va
 301  302   * cached virtual address.
 302  303   */
 303  304  void
 304  305  lib_va_free(struct lib_va *lvp)
 305  306  {
 306  307          int is_64bit = lvp->lv_flags & LV_ELF64;
 307  308          ASSERT(lvp->lv_refcnt == 0);
 308  309  
 309  310          if (lvp->lv_base_va != NULL) {
 310  311                  vmem_xfree(is_64bit ? lib_va_64_arena : lib_va_32_arena,
 311  312                      lvp->lv_base_va, lvp->lv_len);
 312  313                  if (is_64bit) {
 313  314                          atomic_dec_32(&libs_mapped_64);
 314  315                  } else {
 315  316                          atomic_dec_32(&libs_mapped_32);
 316  317                  }
 317  318          }
 318  319          kmem_free(lvp, sizeof (struct lib_va));
 319  320  }
 320  321  
 321  322  /*
 322  323   * See if the file associated with the vap passed in is in the lib_va hash.
 323  324   * If it is and the file has not been modified since last use, then
 324  325   * return a pointer to that data.  Otherwise, return NULL if the file has
 325  326   * changed or the file was not found in the hash.
 326  327   */
 327  328  static struct lib_va *
 328  329  lib_va_find(vattr_t *vap)
 329  330  {
 330  331          struct lib_va *lvp;
 331  332          struct lib_va *del = NULL;
 332  333          struct lib_va **tmp;
 333  334          uint_t index;
 334  335          index = LIB_VA_HASH(vap->va_nodeid);
 335  336  
 336  337          mutex_enter(LIB_VA_HASH_MUTEX(index));
 337  338          tmp = &lib_va_hash[index];
 338  339          while (*tmp != NULL) {
 339  340                  lvp = *tmp;
 340  341                  if (LIB_VA_MATCH_ID(lvp, vap)) {
 341  342                          if (LIB_VA_MATCH_TIME(lvp, vap)) {
 342  343                                  ASSERT((lvp->lv_flags & LV_DEL) == 0);
 343  344                                  lvp->lv_refcnt++;
 344  345                                  MOBJ_STAT_ADD(lib_va_find_hit);
 345  346                          } else {
 346  347                                  /*
 347  348                                   * file was updated since last use.
 348  349                                   * need to remove it from list.
 349  350                                   */
 350  351                                  del = lvp;
 351  352                                  *tmp = del->lv_next;
 352  353                                  del->lv_next = NULL;
 353  354                                  /*
 354  355                                   * If we can't delete it now, mark it for later
 355  356                                   */
 356  357                                  if (del->lv_refcnt) {
 357  358                                          MOBJ_STAT_ADD(lib_va_find_delay_delete);
 358  359                                          del->lv_flags |= LV_DEL;
 359  360                                          del = NULL;
 360  361                                  }
 361  362                                  lvp = NULL;
 362  363                          }
 363  364                          mutex_exit(LIB_VA_HASH_MUTEX(index));
 364  365                          if (del) {
 365  366                                  ASSERT(del->lv_refcnt == 0);
 366  367                                  MOBJ_STAT_ADD(lib_va_find_delete);
 367  368                                  lib_va_free(del);
 368  369                          }
 369  370                          return (lvp);
 370  371                  }
 371  372                  tmp = &lvp->lv_next;
 372  373          }
 373  374          mutex_exit(LIB_VA_HASH_MUTEX(index));
 374  375          return (NULL);
 375  376  }
 376  377  
 377  378  /*
 378  379   * Add a new entry to the lib_va hash.
 379  380   * Search the hash while holding the appropriate mutex to make sure that the
 380  381   * data is not already in the cache.  If we find data that is in the cache
 381  382   * already and has not been modified since last use, we return NULL.  If it
 382  383   * has been modified since last use, we will remove that entry from
 383  384   * the hash and it will be deleted once it's reference count reaches zero.
 384  385   * If there is no current entry in the hash we will add the new entry and
 385  386   * return it to the caller who is responsible for calling lib_va_release to
 386  387   * drop their reference count on it.
 387  388   *
 388  389   * lv_num_segs will be set to zero since the caller needs to add that
 389  390   * information to the data structure.
 390  391   */
 391  392  static struct lib_va *
 392  393  lib_va_add_hash(caddr_t base_va, ssize_t len, size_t align, vattr_t *vap)
 393  394  {
 394  395          struct lib_va *lvp;
 395  396          uint_t index;
 396  397          model_t model;
 397  398          struct lib_va **tmp;
 398  399          struct lib_va *del = NULL;
 399  400  
 400  401          model = get_udatamodel();
 401  402          index = LIB_VA_HASH(vap->va_nodeid);
 402  403  
 403  404          lvp = kmem_alloc(sizeof (struct lib_va), KM_SLEEP);
 404  405  
 405  406          mutex_enter(LIB_VA_HASH_MUTEX(index));
 406  407  
 407  408          /*
 408  409           * Make sure not adding same data a second time.
 409  410           * The hash chains should be relatively short and adding
 410  411           * is a relatively rare event, so it's worth the check.
 411  412           */
 412  413          tmp = &lib_va_hash[index];
 413  414          while (*tmp != NULL) {
 414  415                  if (LIB_VA_MATCH_ID(*tmp, vap)) {
 415  416                          if (LIB_VA_MATCH_TIME(*tmp, vap)) {
 416  417                                  mutex_exit(LIB_VA_HASH_MUTEX(index));
 417  418                                  kmem_free(lvp, sizeof (struct lib_va));
 418  419                                  return (NULL);
 419  420                          }
 420  421  
 421  422                          /*
 422  423                           * We have the same nodeid and fsid but the file has
 423  424                           * been modified since we last saw it.
 424  425                           * Need to remove the old node and add this new
 425  426                           * one.
 426  427                           * Could probably use a callback mechanism to make
 427  428                           * this cleaner.
 428  429                           */
 429  430                          ASSERT(del == NULL);
 430  431                          del = *tmp;
 431  432                          *tmp = del->lv_next;
 432  433                          del->lv_next = NULL;
 433  434  
 434  435                          /*
 435  436                           * Check to see if we can free it.  If lv_refcnt
 436  437                           * is greater than zero, than some other thread
 437  438                           * has a reference to the one we want to delete
 438  439                           * and we can not delete it.  All of this is done
 439  440                           * under the lib_va_hash_mutex lock so it is atomic.
 440  441                           */
 441  442                          if (del->lv_refcnt) {
 442  443                                  MOBJ_STAT_ADD(lib_va_add_delay_delete);
 443  444                                  del->lv_flags |= LV_DEL;
 444  445                                  del = NULL;
 445  446                          }
 446  447                          /* tmp is already advanced */
 447  448                          continue;
 448  449                  }
 449  450                  tmp = &((*tmp)->lv_next);
 450  451          }
 451  452  
 452  453          lvp->lv_base_va = base_va;
 453  454          lvp->lv_len = len;
 454  455          lvp->lv_align = align;
 455  456          lvp->lv_nodeid = vap->va_nodeid;
 456  457          lvp->lv_fsid = vap->va_fsid;
 457  458          lvp->lv_ctime.tv_sec = vap->va_ctime.tv_sec;
 458  459          lvp->lv_ctime.tv_nsec = vap->va_ctime.tv_nsec;
 459  460          lvp->lv_mtime.tv_sec = vap->va_mtime.tv_sec;
 460  461          lvp->lv_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 461  462          lvp->lv_next = NULL;
 462  463          lvp->lv_refcnt = 1;
 463  464  
 464  465          /* Caller responsible for filling this and lv_mps out */
 465  466          lvp->lv_num_segs = 0;
 466  467  
 467  468          if (model == DATAMODEL_LP64) {
 468  469                  lvp->lv_flags = LV_ELF64;
 469  470          } else {
 470  471                  ASSERT(model == DATAMODEL_ILP32);
 471  472                  lvp->lv_flags = LV_ELF32;
 472  473          }
 473  474  
 474  475          if (base_va != NULL) {
 475  476                  if (model == DATAMODEL_LP64) {
 476  477                          atomic_inc_32(&libs_mapped_64);
 477  478                  } else {
 478  479                          ASSERT(model == DATAMODEL_ILP32);
 479  480                          atomic_inc_32(&libs_mapped_32);
 480  481                  }
 481  482          }
 482  483          ASSERT(*tmp == NULL);
 483  484          *tmp = lvp;
 484  485          mutex_exit(LIB_VA_HASH_MUTEX(index));
 485  486          if (del) {
 486  487                  ASSERT(del->lv_refcnt == 0);
 487  488                  MOBJ_STAT_ADD(lib_va_add_delete);
 488  489                  lib_va_free(del);
 489  490          }
 490  491          return (lvp);
 491  492  }
 492  493  
 493  494  /*
 494  495   * Release the hold on lvp which was acquired by lib_va_find or lib_va_add_hash.
 495  496   * In addition, if this is the last hold and lvp is marked for deletion,
 496  497   * free up it's reserved address space and free the structure.
 497  498   */
 498  499  static void
 499  500  lib_va_release(struct lib_va *lvp)
 500  501  {
 501  502          uint_t index;
 502  503          int to_del = 0;
 503  504  
 504  505          ASSERT(lvp->lv_refcnt > 0);
 505  506  
 506  507          index = LIB_VA_HASH(lvp->lv_nodeid);
 507  508          mutex_enter(LIB_VA_HASH_MUTEX(index));
 508  509          if (--lvp->lv_refcnt == 0 && (lvp->lv_flags & LV_DEL)) {
 509  510                  to_del = 1;
 510  511          }
 511  512          mutex_exit(LIB_VA_HASH_MUTEX(index));
 512  513          if (to_del) {
 513  514                  ASSERT(lvp->lv_next == 0);
 514  515                  lib_va_free(lvp);
 515  516          }
 516  517  }
 517  518  
 518  519  /*
 519  520   * Dummy function for mapping through /dev/null
 520  521   * Normally I would have used mmmmap in common/io/mem.c
 521  522   * but that is a static function, and for /dev/null, it
 522  523   * just returns -1.
 523  524   */
 524  525  /* ARGSUSED */
 525  526  static int
 526  527  mmapobj_dummy(dev_t dev, off_t off, int prot)
 527  528  {
 528  529          return (-1);
 529  530  }
 530  531  
 531  532  /*
 532  533   * Called when an error occurred which requires mmapobj to return failure.
 533  534   * All mapped objects will be unmapped and /dev/null mappings will be
 534  535   * reclaimed if necessary.
 535  536   * num_mapped is the number of elements of mrp which have been mapped, and
 536  537   * num_segs is the total number of elements in mrp.
 537  538   * For e_type ET_EXEC, we need to unmap all of the elements in mrp since
 538  539   * we had already made reservations for them.
 539  540   * If num_mapped equals num_segs, then we know that we had fully mapped
 540  541   * the file and only need to clean up the segments described.
 541  542   * If they are not equal, then for ET_DYN we will unmap the range from the
 542  543   * end of the last mapped segment to the end of the last segment in mrp
 543  544   * since we would have made a reservation for that memory earlier.
 544  545   * If e_type is passed in as zero, num_mapped must equal num_segs.
 545  546   */
 546  547  void
 547  548  mmapobj_unmap(mmapobj_result_t *mrp, int num_mapped, int num_segs,
 548  549      ushort_t e_type)
 549  550  {
 550  551          int i;
 551  552          struct as *as = curproc->p_as;
 552  553          caddr_t addr;
 553  554          size_t size;
 554  555  
 555  556          if (e_type == ET_EXEC) {
 556  557                  num_mapped = num_segs;
 557  558          }
 558  559  #ifdef DEBUG
 559  560          if (e_type == 0) {
 560  561                  ASSERT(num_mapped == num_segs);
 561  562          }
 562  563  #endif
 563  564  
 564  565          MOBJ_STAT_ADD(unmap_called);
 565  566          for (i = 0; i < num_mapped; i++) {
 566  567  
 567  568                  /*
 568  569                   * If we are going to have to create a mapping we need to
 569  570                   * make sure that no one else will use the address we
 570  571                   * need to remap between the time it is unmapped and
 571  572                   * mapped below.
 572  573                   */
 573  574                  if (mrp[i].mr_flags & MR_RESV) {
 574  575                          as_rangelock(as);
 575  576                  }
 576  577                  /* Always need to unmap what we mapped */
 577  578                  (void) as_unmap(as, mrp[i].mr_addr, mrp[i].mr_msize);
 578  579  
 579  580                  /* Need to reclaim /dev/null reservation from earlier */
 580  581                  if (mrp[i].mr_flags & MR_RESV) {
 581  582                          struct segdev_crargs dev_a;
 582  583  
 583  584                          ASSERT(e_type != ET_DYN);
 584  585                          /*
 585  586                           * Use seg_dev segment driver for /dev/null mapping.
 586  587                           */
 587  588                          dev_a.mapfunc = mmapobj_dummy;
 588  589                          dev_a.dev = makedevice(mm_major, M_NULL);
 589  590                          dev_a.offset = 0;
 590  591                          dev_a.type = 0;         /* neither PRIVATE nor SHARED */
 591  592                          dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
 592  593                          dev_a.hat_attr = 0;
 593  594                          dev_a.hat_flags = 0;
 594  595  
 595  596                          (void) as_map(as, mrp[i].mr_addr, mrp[i].mr_msize,
 596  597                              segdev_create, &dev_a);
 597  598                          MOBJ_STAT_ADD(remap_devnull);
 598  599                          as_rangeunlock(as);
 599  600                  }
 600  601          }
 601  602  
 602  603          if (num_mapped != num_segs) {
 603  604                  ASSERT(e_type == ET_DYN);
 604  605                  /* Need to unmap any reservation made after last mapped seg */
 605  606                  if (num_mapped == 0) {
 606  607                          addr = mrp[0].mr_addr;
 607  608                  } else {
 608  609                          addr = mrp[num_mapped - 1].mr_addr +
 609  610                              mrp[num_mapped - 1].mr_msize;
 610  611                  }
 611  612                  size = (size_t)mrp[num_segs - 1].mr_addr +
 612  613                      mrp[num_segs - 1].mr_msize - (size_t)addr;
 613  614                  (void) as_unmap(as, addr, size);
 614  615  
 615  616                  /*
 616  617                   * Now we need to unmap the holes between mapped segs.
 617  618                   * Note that we have not mapped all of the segments and thus
 618  619                   * the holes between segments would not have been unmapped
 619  620                   * yet.  If num_mapped == num_segs, then all of the holes
 620  621                   * between segments would have already been unmapped.
 621  622                   */
 622  623  
 623  624                  for (i = 1; i < num_mapped; i++) {
 624  625                          addr = mrp[i - 1].mr_addr + mrp[i - 1].mr_msize;
 625  626                          size = mrp[i].mr_addr - addr;
 626  627                          (void) as_unmap(as, addr, size);
 627  628                  }
 628  629          }
 629  630  }
 630  631  
 631  632  /*
 632  633   * We need to add the start address into mrp so that the unmap function
 633  634   * has absolute addresses to use.
 634  635   */
 635  636  static void
 636  637  mmapobj_unmap_exec(mmapobj_result_t *mrp, int num_mapped, caddr_t start_addr)
 637  638  {
 638  639          int i;
 639  640  
 640  641          for (i = 0; i < num_mapped; i++) {
 641  642                  mrp[i].mr_addr += (size_t)start_addr;
 642  643          }
 643  644          mmapobj_unmap(mrp, num_mapped, num_mapped, ET_EXEC);
 644  645  }
 645  646  
 646  647  static caddr_t
 647  648  mmapobj_lookup_start_addr(struct lib_va *lvp)
 648  649  {
 649  650          proc_t *p = curproc;
 650  651          struct as *as = p->p_as;
 651  652          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL);
 652  653          int error;
 653  654          uint_t ma_flags = _MAP_LOW32;
 654  655          caddr_t base = NULL;
 655  656          size_t len;
 656  657          size_t align;
 657  658  
 658  659          ASSERT(lvp != NULL);
 659  660          MOBJ_STAT_ADD(lookup_start);
 660  661  
 661  662          as_rangelock(as);
 662  663  
 663  664          base = lvp->lv_base_va;
 664  665          len = lvp->lv_len;
 665  666  
 666  667          /*
 667  668           * If we don't have an expected base address, or the one that we want
 668  669           * to use is not available or acceptable, go get an acceptable
 669  670           * address range.
 670  671           */
 671  672          if (base == NULL || as_gap(as, len, &base, &len, 0, NULL) ||
 672  673              valid_usr_range(base, len, PROT_ALL, as, as->a_userlimit) !=
 673  674              RANGE_OKAY || OVERLAPS_STACK(base + len, p)) {
 674  675                  if (lvp->lv_flags & LV_ELF64) {
 675  676                          ma_flags = 0;
 676  677                  }
 677  678  
 678  679                  align = lvp->lv_align;
 679  680                  if (align > 1) {
 680  681                          ma_flags |= MAP_ALIGN;
 681  682                  }
 682  683  
 683  684                  base = (caddr_t)align;
 684  685                  map_addr(&base, len, 0, 1, ma_flags);
 685  686          }
 686  687  
 687  688          /*
 688  689           * Need to reserve the address space we're going to use.
 689  690           * Don't reserve swap space since we'll be mapping over this.
 690  691           */
 691  692          if (base != NULL) {
 692  693                  crargs.flags |= MAP_NORESERVE;
 693  694                  error = as_map(as, base, len, segvn_create, &crargs);
 694  695                  if (error) {
 695  696                          base = NULL;
 696  697                  }
 697  698          }
 698  699  
 699  700          as_rangeunlock(as);
 700  701          return (base);
 701  702  }
 702  703  
 703  704  /*
 704  705   * Get the starting address for a given file to be mapped and return it
 705  706   * to the caller.  If we're using lib_va and we need to allocate an address,
 706  707   * we will attempt to allocate it from the global reserved pool such that the
 707  708   * same address can be used in the future for this file.  If we can't use the
 708  709   * reserved address then we just get one that will fit in our address space.
 709  710   *
 710  711   * Returns the starting virtual address for the range to be mapped or NULL

↓ open down ↓

628 lines elided

↑ open up ↑

 711  712   * if an error is encountered. If we successfully insert the requested info
 712  713   * into the lib_va hash, then *lvpp will be set to point to this lib_va
 713  714   * structure.  The structure will have a hold on it and thus lib_va_release
 714  715   * needs to be called on it by the caller.  This function will not fill out
 715  716   * lv_mps or lv_num_segs since it does not have enough information to do so.
 716  717   * The caller is responsible for doing this making sure that any modifications
 717  718   * to lv_mps are visible before setting lv_num_segs.
 718  719   */
 719  720  static caddr_t
 720  721  mmapobj_alloc_start_addr(struct lib_va **lvpp, size_t len, int use_lib_va,
 721      -    size_t align, vattr_t *vap)
      722 +    int randomize, size_t align, vattr_t *vap)
 722  723  {
 723  724          proc_t *p = curproc;
 724  725          struct as *as = p->p_as;
 725  726          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL);
 726  727          int error;
 727  728          model_t model;
 728  729          uint_t ma_flags = _MAP_LOW32;
 729  730          caddr_t base = NULL;
 730  731          vmem_t *model_vmem;
 731  732          size_t lib_va_start;
 732  733          size_t lib_va_end;
 733  734          size_t lib_va_len;
 734  735  
 735  736          ASSERT(lvpp != NULL);
      737 +        ASSERT((randomize & use_lib_va) != 1);
 736  738  
 737  739          MOBJ_STAT_ADD(alloc_start);
 738  740          model = get_udatamodel();
 739  741  
 740  742          if (model == DATAMODEL_LP64) {
 741  743                  ma_flags = 0;
 742  744                  model_vmem = lib_va_64_arena;
 743  745          } else {
 744  746                  ASSERT(model == DATAMODEL_ILP32);
 745  747                  model_vmem = lib_va_32_arena;
 746  748          }
 747  749  
 748  750          if (align > 1) {
 749  751                  ma_flags |= MAP_ALIGN;
 750  752          }
      753 +
      754 +        if (randomize != 0)
      755 +                ma_flags |= _MAP_RANDOMIZE;
      756 +
 751  757          if (use_lib_va) {
 752  758                  /*
 753  759                   * The first time through, we need to setup the lib_va arenas.
 754  760                   * We call map_addr to find a suitable range of memory to map
 755  761                   * the given library, and we will set the highest address
 756  762                   * in our vmem arena to the end of this adddress range.
 757  763                   * We allow up to half of the address space to be used
 758  764                   * for lib_va addresses but we do not prevent any allocations
 759  765                   * in this range from other allocation paths.
 760  766                   */

 761  767                  if (lib_va_64_arena == NULL && model == DATAMODEL_LP64) {
 762  768                          mutex_enter(&lib_va_init_mutex);
 763  769                          if (lib_va_64_arena == NULL) {
 764  770                                  base = (caddr_t)align;
 765  771                                  as_rangelock(as);
 766  772                                  map_addr(&base, len, 0, 1, ma_flags);
 767  773                                  as_rangeunlock(as);
 768  774                                  if (base == NULL) {
 769  775                                          mutex_exit(&lib_va_init_mutex);
 770  776                                          MOBJ_STAT_ADD(lib_va_create_failure);
 771  777                                          goto nolibva;
 772  778                                  }
 773  779                                  lib_va_end = (size_t)base + len;
 774  780                                  lib_va_len = lib_va_end >> 1;
 775  781                                  lib_va_len = P2ROUNDUP(lib_va_len, PAGESIZE);
 776  782                                  lib_va_start = lib_va_end - lib_va_len;
 777  783  
 778  784                                  /*
 779  785                                   * Need to make sure we avoid the address hole.
 780  786                                   * We know lib_va_end is valid but we need to
 781  787                                   * make sure lib_va_start is as well.
 782  788                                   */
 783  789                                  if ((lib_va_end > (size_t)hole_end) &&
 784  790                                      (lib_va_start < (size_t)hole_end)) {
 785  791                                          lib_va_start = P2ROUNDUP(
 786  792                                              (size_t)hole_end, PAGESIZE);
 787  793                                          lib_va_len = lib_va_end - lib_va_start;
 788  794                                  }
 789  795                                  lib_va_64_arena = vmem_create("lib_va_64",
 790  796                                      (void *)lib_va_start, lib_va_len, PAGESIZE,
 791  797                                      NULL, NULL, NULL, 0,
 792  798                                      VM_NOSLEEP | VMC_IDENTIFIER);
 793  799                                  if (lib_va_64_arena == NULL) {
 794  800                                          mutex_exit(&lib_va_init_mutex);
 795  801                                          goto nolibva;
 796  802                                  }
 797  803                          }
 798  804                          model_vmem = lib_va_64_arena;
 799  805                          mutex_exit(&lib_va_init_mutex);
 800  806                  } else if (lib_va_32_arena == NULL &&
 801  807                      model == DATAMODEL_ILP32) {
 802  808                          mutex_enter(&lib_va_init_mutex);
 803  809                          if (lib_va_32_arena == NULL) {
 804  810                                  base = (caddr_t)align;
 805  811                                  as_rangelock(as);
 806  812                                  map_addr(&base, len, 0, 1, ma_flags);
 807  813                                  as_rangeunlock(as);
 808  814                                  if (base == NULL) {
 809  815                                          mutex_exit(&lib_va_init_mutex);
 810  816                                          MOBJ_STAT_ADD(lib_va_create_failure);
 811  817                                          goto nolibva;
 812  818                                  }
 813  819                                  lib_va_end = (size_t)base + len;
 814  820                                  lib_va_len = lib_va_end >> 1;
 815  821                                  lib_va_len = P2ROUNDUP(lib_va_len, PAGESIZE);
 816  822                                  lib_va_start = lib_va_end - lib_va_len;
 817  823                                  lib_va_32_arena = vmem_create("lib_va_32",
 818  824                                      (void *)lib_va_start, lib_va_len, PAGESIZE,
 819  825                                      NULL, NULL, NULL, 0,
 820  826                                      VM_NOSLEEP | VMC_IDENTIFIER);
 821  827                                  if (lib_va_32_arena == NULL) {
 822  828                                          mutex_exit(&lib_va_init_mutex);
 823  829                                          goto nolibva;
 824  830                                  }
 825  831                          }
 826  832                          model_vmem = lib_va_32_arena;
 827  833                          mutex_exit(&lib_va_init_mutex);
 828  834                  }
 829  835  
 830  836                  if (model == DATAMODEL_LP64 || libs_mapped_32 < lib_threshold) {
 831  837                          base = vmem_xalloc(model_vmem, len, align, 0, 0, NULL,
 832  838                              NULL, VM_NOSLEEP | VM_ENDALLOC);
 833  839                          MOBJ_STAT_ADD(alloc_vmem);
 834  840                  }
 835  841  
 836  842                  /*
 837  843                   * Even if the address fails to fit in our address space,
 838  844                   * or we can't use a reserved address,
 839  845                   * we should still save it off in lib_va_hash.
 840  846                   */
 841  847                  *lvpp = lib_va_add_hash(base, len, align, vap);
 842  848  
 843  849                  /*
 844  850                   * Check for collision on insertion and free up our VA space.
 845  851                   * This is expected to be rare, so we'll just reset base to
 846  852                   * NULL instead of looking it up in the lib_va hash.
 847  853                   */
 848  854                  if (*lvpp == NULL) {
 849  855                          if (base != NULL) {
 850  856                                  vmem_xfree(model_vmem, base, len);
 851  857                                  base = NULL;
 852  858                                  MOBJ_STAT_ADD(add_collision);
 853  859                          }

↓ open down ↓

93 lines elided

↑ open up ↑

 854  860                  }
 855  861          }
 856  862  
 857  863  nolibva:
 858  864          as_rangelock(as);
 859  865  
 860  866          /*
 861  867           * If we don't have an expected base address, or the one that we want
 862  868           * to use is not available or acceptable, go get an acceptable
 863  869           * address range.
      870 +         *
      871 +         * If ASLR is enabled, we should never have used the cache, and should
      872 +         * also start our real work here, in the consequent of the next
      873 +         * condition.
 864  874           */
      875 +        if (randomize != 0)
      876 +                ASSERT(base == NULL);
      877 +
 865  878          if (base == NULL || as_gap(as, len, &base, &len, 0, NULL) ||
 866  879              valid_usr_range(base, len, PROT_ALL, as, as->a_userlimit) !=
 867  880              RANGE_OKAY || OVERLAPS_STACK(base + len, p)) {
 868  881                  MOBJ_STAT_ADD(get_addr);
 869  882                  base = (caddr_t)align;
 870  883                  map_addr(&base, len, 0, 1, ma_flags);
 871  884          }
 872  885  
 873  886          /*
 874  887           * Need to reserve the address space we're going to use.

 875  888           * Don't reserve swap space since we'll be mapping over this.
 876  889           */
 877  890          if (base != NULL) {
 878  891                  /* Don't reserve swap space since we'll be mapping over this */
 879  892                  crargs.flags |= MAP_NORESERVE;
 880  893                  error = as_map(as, base, len, segvn_create, &crargs);
 881  894                  if (error) {
 882  895                          base = NULL;
 883  896                  }
 884  897          }
 885  898  
 886  899          as_rangeunlock(as);
 887  900          return (base);
 888  901  }
 889  902  
 890  903  /*
 891  904   * Map the file associated with vp into the address space as a single
 892  905   * read only private mapping.
 893  906   * Returns 0 for success, and non-zero for failure to map the file.
 894  907   */
 895  908  static int
 896  909  mmapobj_map_flat(vnode_t *vp, mmapobj_result_t *mrp, size_t padding,
 897  910      cred_t *fcred)
 898  911  {
 899  912          int error = 0;
 900  913          struct as *as = curproc->p_as;
 901  914          caddr_t addr = NULL;
 902  915          caddr_t start_addr;
 903  916          size_t len;
 904  917          size_t pad_len;
 905  918          int prot = PROT_USER | PROT_READ;
 906  919          uint_t ma_flags = _MAP_LOW32;
 907  920          vattr_t vattr;
 908  921          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL);
 909  922  
 910  923          if (get_udatamodel() == DATAMODEL_LP64) {
 911  924                  ma_flags = 0;
 912  925          }
 913  926  
 914  927          vattr.va_mask = AT_SIZE;
 915  928          error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
 916  929          if (error) {
 917  930                  return (error);
 918  931          }
 919  932  
 920  933          len = vattr.va_size;
 921  934  
 922  935          ma_flags |= MAP_PRIVATE;
 923  936          if (padding == 0) {
 924  937                  MOBJ_STAT_ADD(map_flat_no_padding);
 925  938                  error = VOP_MAP(vp, 0, as, &addr, len, prot, PROT_ALL,
 926  939                      ma_flags, fcred, NULL);
 927  940                  if (error == 0) {
 928  941                          mrp[0].mr_addr = addr;
 929  942                          mrp[0].mr_msize = len;
 930  943                          mrp[0].mr_fsize = len;
 931  944                          mrp[0].mr_offset = 0;
 932  945                          mrp[0].mr_prot = prot;
 933  946                          mrp[0].mr_flags = 0;
 934  947                  }
 935  948                  return (error);
 936  949          }
 937  950  
 938  951          /* padding was requested so there's more work to be done */
 939  952          MOBJ_STAT_ADD(map_flat_padding);
 940  953  
 941  954          /* No need to reserve swap space now since it will be reserved later */
 942  955          crargs.flags |= MAP_NORESERVE;
 943  956  
 944  957          /* Need to setup padding which can only be in PAGESIZE increments. */
 945  958          ASSERT((padding & PAGEOFFSET) == 0);
 946  959          pad_len = len + (2 * padding);
 947  960  
 948  961          as_rangelock(as);
 949  962          map_addr(&addr, pad_len, 0, 1, ma_flags);
 950  963          error = as_map(as, addr, pad_len, segvn_create, &crargs);
 951  964          as_rangeunlock(as);
 952  965          if (error) {
 953  966                  return (error);
 954  967          }
 955  968          start_addr = addr;
 956  969          addr += padding;
 957  970          ma_flags |= MAP_FIXED;
 958  971          error = VOP_MAP(vp, 0, as, &addr, len, prot, PROT_ALL, ma_flags,
 959  972              fcred, NULL);
 960  973          if (error == 0) {
 961  974                  mrp[0].mr_addr = start_addr;
 962  975                  mrp[0].mr_msize = padding;
 963  976                  mrp[0].mr_fsize = 0;
 964  977                  mrp[0].mr_offset = 0;
 965  978                  mrp[0].mr_prot = 0;
 966  979                  mrp[0].mr_flags = MR_PADDING;
 967  980  
 968  981                  mrp[1].mr_addr = addr;
 969  982                  mrp[1].mr_msize = len;
 970  983                  mrp[1].mr_fsize = len;
 971  984                  mrp[1].mr_offset = 0;
 972  985                  mrp[1].mr_prot = prot;
 973  986                  mrp[1].mr_flags = 0;
 974  987  
 975  988                  mrp[2].mr_addr = addr + P2ROUNDUP(len, PAGESIZE);
 976  989                  mrp[2].mr_msize = padding;
 977  990                  mrp[2].mr_fsize = 0;
 978  991                  mrp[2].mr_offset = 0;
 979  992                  mrp[2].mr_prot = 0;
 980  993                  mrp[2].mr_flags = MR_PADDING;
 981  994          } else {
 982  995                  /* Need to cleanup the as_map from earlier */
 983  996                  (void) as_unmap(as, start_addr, pad_len);
 984  997          }
 985  998          return (error);
 986  999  }
 987 1000  
 988 1001  /*
 989 1002   * Map a PT_LOAD or PT_SUNWBSS section of an executable file into the user's
 990 1003   * address space.
 991 1004   * vp - vnode to be mapped in
 992 1005   * addr - start address
 993 1006   * len - length of vp to be mapped
 994 1007   * zfodlen - length of zero filled memory after len above
 995 1008   * offset - offset into file where mapping should start
 996 1009   * prot - protections for this mapping
 997 1010   * fcred - credentials for the file associated with vp at open time.
 998 1011   */
 999 1012  static int
1000 1013  mmapobj_map_ptload(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen,
1001 1014      off_t offset, int prot, cred_t *fcred)
1002 1015  {
1003 1016          int error = 0;
1004 1017          caddr_t zfodbase, oldaddr;
1005 1018          size_t oldlen;
1006 1019          size_t end;
1007 1020          size_t zfoddiff;
1008 1021          label_t ljb;
1009 1022          struct as *as = curproc->p_as;
1010 1023          model_t model;
1011 1024          int full_page;
1012 1025  
1013 1026          /*
1014 1027           * See if addr and offset are aligned such that we can map in
1015 1028           * full pages instead of partial pages.
1016 1029           */
1017 1030          full_page = (((uintptr_t)addr & PAGEOFFSET) ==
1018 1031              ((uintptr_t)offset & PAGEOFFSET));
1019 1032  
1020 1033          model = get_udatamodel();
1021 1034  
1022 1035          oldaddr = addr;
1023 1036          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1024 1037          if (len) {
1025 1038                  spgcnt_t availm, npages;
1026 1039                  int preread;
1027 1040                  uint_t mflag = MAP_PRIVATE | MAP_FIXED;
1028 1041  
1029 1042                  if (model == DATAMODEL_ILP32) {
1030 1043                          mflag |= _MAP_LOW32;
1031 1044                  }
1032 1045                  /* We may need to map in extra bytes */
1033 1046                  oldlen = len;
1034 1047                  len += ((size_t)oldaddr & PAGEOFFSET);
1035 1048  
1036 1049                  if (full_page) {
1037 1050                          offset = (off_t)((uintptr_t)offset & PAGEMASK);
1038 1051                          if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) {
1039 1052                                  mflag |= MAP_TEXT;
1040 1053                                  MOBJ_STAT_ADD(map_ptload_text);
1041 1054                          } else {
1042 1055                                  mflag |= MAP_INITDATA;
1043 1056                                  MOBJ_STAT_ADD(map_ptload_initdata);
1044 1057                          }
1045 1058  
1046 1059                          /*
1047 1060                           * maxprot is passed as PROT_ALL so that mdb can
1048 1061                           * write to this segment.
1049 1062                           */
1050 1063                          if (error = VOP_MAP(vp, (offset_t)offset, as, &addr,
1051 1064                              len, prot, PROT_ALL, mflag, fcred, NULL)) {
1052 1065                                  return (error);
1053 1066                          }
1054 1067  
1055 1068                          /*
1056 1069                           * If the segment can fit and is relatively small, then
1057 1070                           * we prefault the entire segment in.  This is based
1058 1071                           * on the model that says the best working set of a
1059 1072                           * small program is all of its pages.
1060 1073                           * We only do this if freemem will not drop below
1061 1074                           * lotsfree since we don't want to induce paging.
1062 1075                           */
1063 1076                          npages = (spgcnt_t)btopr(len);
1064 1077                          availm = freemem - lotsfree;
1065 1078                          preread = (npages < availm && len < PGTHRESH) ? 1 : 0;
1066 1079  
1067 1080                          /*
1068 1081                           * If we aren't prefaulting the segment,
1069 1082                           * increment "deficit", if necessary to ensure
1070 1083                           * that pages will become available when this
1071 1084                           * process starts executing.
1072 1085                           */
1073 1086                          if (preread == 0 && npages > availm &&
1074 1087                              deficit < lotsfree) {
1075 1088                                  deficit += MIN((pgcnt_t)(npages - availm),
1076 1089                                      lotsfree - deficit);
1077 1090                          }
1078 1091  
1079 1092                          if (preread) {
1080 1093                                  (void) as_faulta(as, addr, len);
1081 1094                                  MOBJ_STAT_ADD(map_ptload_preread);
1082 1095                          }
1083 1096                  } else {
1084 1097                          /*
1085 1098                           * addr and offset were not aligned such that we could
1086 1099                           * use VOP_MAP, thus we need to as_map the memory we
1087 1100                           * need and then read the data in from disk.
1088 1101                           * This code path is a corner case which should never
1089 1102                           * be taken, but hand crafted binaries could trigger
1090 1103                           * this logic and it needs to work correctly.
1091 1104                           */
1092 1105                          MOBJ_STAT_ADD(map_ptload_unaligned_text);
1093 1106                          as_rangelock(as);
1094 1107                          (void) as_unmap(as, addr, len);
1095 1108  
1096 1109                          /*
1097 1110                           * We use zfod_argsp because we need to be able to
1098 1111                           * write to the mapping and then we'll change the
1099 1112                           * protections later if they are incorrect.
1100 1113                           */
1101 1114                          error = as_map(as, addr, len, segvn_create, zfod_argsp);
1102 1115                          as_rangeunlock(as);
1103 1116                          if (error) {
1104 1117                                  MOBJ_STAT_ADD(map_ptload_unaligned_map_fail);
1105 1118                                  return (error);
1106 1119                          }
1107 1120  
1108 1121                          /* Now read in the data from disk */
1109 1122                          error = vn_rdwr(UIO_READ, vp, oldaddr, oldlen, offset,
1110 1123                              UIO_USERSPACE, 0, (rlim64_t)0, fcred, NULL);
1111 1124                          if (error) {
1112 1125                                  MOBJ_STAT_ADD(map_ptload_unaligned_read_fail);
1113 1126                                  return (error);
1114 1127                          }
1115 1128  
1116 1129                          /*
1117 1130                           * Now set protections.
1118 1131                           */
1119 1132                          if (prot != PROT_ZFOD) {
1120 1133                                  (void) as_setprot(as, addr, len, prot);
1121 1134                          }
1122 1135                  }
1123 1136          }
1124 1137  
1125 1138          if (zfodlen) {
1126 1139                  end = (size_t)addr + len;
1127 1140                  zfodbase = (caddr_t)P2ROUNDUP(end, PAGESIZE);
1128 1141                  zfoddiff = (uintptr_t)zfodbase - end;
1129 1142                  if (zfoddiff) {
1130 1143                          /*
1131 1144                           * Before we go to zero the remaining space on the last
1132 1145                           * page, make sure we have write permission.
1133 1146                           *
1134 1147                           * We need to be careful how we zero-fill the last page
1135 1148                           * if the protection does not include PROT_WRITE. Using
1136 1149                           * as_setprot() can cause the VM segment code to call
1137 1150                           * segvn_vpage(), which must allocate a page struct for
1138 1151                           * each page in the segment. If we have a very large
1139 1152                           * segment, this may fail, so we check for that, even
1140 1153                           * though we ignore other return values from as_setprot.
1141 1154                           */
1142 1155                          MOBJ_STAT_ADD(zfoddiff);
1143 1156                          if ((prot & PROT_WRITE) == 0) {
1144 1157                                  if (as_setprot(as, (caddr_t)end, zfoddiff,
1145 1158                                      prot | PROT_WRITE) == ENOMEM)
1146 1159                                          return (ENOMEM);
1147 1160                                  MOBJ_STAT_ADD(zfoddiff_nowrite);
1148 1161                          }
1149 1162                          if (on_fault(&ljb)) {
1150 1163                                  no_fault();
1151 1164                                  if ((prot & PROT_WRITE) == 0) {
1152 1165                                          (void) as_setprot(as, (caddr_t)end,
1153 1166                                              zfoddiff, prot);
1154 1167                                  }
1155 1168                                  return (EFAULT);
1156 1169                          }
1157 1170                          uzero((void *)end, zfoddiff);
1158 1171                          no_fault();
1159 1172  
1160 1173                          /*
1161 1174                           * Remove write protection to return to original state
1162 1175                           */
1163 1176                          if ((prot & PROT_WRITE) == 0) {
1164 1177                                  (void) as_setprot(as, (caddr_t)end,
1165 1178                                      zfoddiff, prot);
1166 1179                          }
1167 1180                  }
1168 1181                  if (zfodlen > zfoddiff) {
1169 1182                          struct segvn_crargs crargs =
1170 1183                              SEGVN_ZFOD_ARGS(prot, PROT_ALL);
1171 1184  
1172 1185                          MOBJ_STAT_ADD(zfodextra);
1173 1186                          zfodlen -= zfoddiff;
1174 1187                          crargs.szc = AS_MAP_NO_LPOOB;
1175 1188  
1176 1189  
1177 1190                          as_rangelock(as);
1178 1191                          (void) as_unmap(as, (caddr_t)zfodbase, zfodlen);
1179 1192                          error = as_map(as, (caddr_t)zfodbase,
1180 1193                              zfodlen, segvn_create, &crargs);
1181 1194                          as_rangeunlock(as);
1182 1195                          if (error) {
1183 1196                                  return (error);
1184 1197                          }
1185 1198                  }
1186 1199          }
1187 1200          return (0);
1188 1201  }
1189 1202  
1190 1203  /*
1191 1204   * Map the ELF file represented by vp into the users address space.  The
1192 1205   * first mapping will start at start_addr and there will be num_elements
1193 1206   * mappings.  The mappings are described by the data in mrp which may be
1194 1207   * modified upon returning from this function.
1195 1208   * Returns 0 for success or errno for failure.
1196 1209   */
1197 1210  static int
1198 1211  mmapobj_map_elf(struct vnode *vp, caddr_t start_addr, mmapobj_result_t *mrp,
1199 1212      int num_elements, cred_t *fcred, ushort_t e_type)
1200 1213  {
1201 1214          int i;
1202 1215          int ret;
1203 1216          caddr_t lo;
1204 1217          caddr_t hi;
1205 1218          struct as *as = curproc->p_as;
1206 1219  
1207 1220          for (i = 0; i < num_elements; i++) {
1208 1221                  caddr_t addr;
1209 1222                  size_t p_memsz;
1210 1223                  size_t p_filesz;
1211 1224                  size_t zfodlen;
1212 1225                  offset_t p_offset;
1213 1226                  size_t dif;
1214 1227                  int prot;
1215 1228  
1216 1229                  /* Always need to adjust mr_addr */
1217 1230                  addr = start_addr + (size_t)(mrp[i].mr_addr);
1218 1231                  mrp[i].mr_addr =
1219 1232                      (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1220 1233  
1221 1234                  /* Padding has already been mapped */
1222 1235                  if (MR_GET_TYPE(mrp[i].mr_flags) == MR_PADDING) {
1223 1236                          continue;
1224 1237                  }
1225 1238                  p_memsz = mrp[i].mr_msize;
1226 1239                  p_filesz = mrp[i].mr_fsize;
1227 1240                  zfodlen = p_memsz - p_filesz;
1228 1241                  p_offset = mrp[i].mr_offset;
1229 1242                  dif = (uintptr_t)(addr) & PAGEOFFSET;
1230 1243                  prot = mrp[i].mr_prot | PROT_USER;
1231 1244                  ret = mmapobj_map_ptload(vp, addr, p_filesz, zfodlen,
1232 1245                      p_offset, prot, fcred);
1233 1246                  if (ret != 0) {
1234 1247                          MOBJ_STAT_ADD(ptload_failed);
1235 1248                          mmapobj_unmap(mrp, i, num_elements, e_type);
1236 1249                          return (ret);
1237 1250                  }
1238 1251  
1239 1252                  /* Need to cleanup mrp to reflect the actual values used */
1240 1253                  mrp[i].mr_msize += dif;
1241 1254                  mrp[i].mr_offset = (size_t)addr & PAGEOFFSET;
1242 1255          }
1243 1256  
1244 1257          /* Also need to unmap any holes created above */
1245 1258          if (num_elements == 1) {
1246 1259                  MOBJ_STAT_ADD(map_elf_no_holes);
1247 1260                  return (0);
1248 1261          }
1249 1262          if (e_type == ET_EXEC) {
1250 1263                  return (0);
1251 1264          }
1252 1265  
1253 1266          as_rangelock(as);
1254 1267          lo = start_addr;
1255 1268          hi = mrp[0].mr_addr;
1256 1269  
1257 1270          /* Remove holes made by the rest of the segments */
1258 1271          for (i = 0; i < num_elements - 1; i++) {
1259 1272                  lo = (caddr_t)P2ROUNDUP((size_t)(mrp[i].mr_addr) +
1260 1273                      mrp[i].mr_msize, PAGESIZE);
1261 1274                  hi = mrp[i + 1].mr_addr;
1262 1275                  if (lo < hi) {
1263 1276                          /*
1264 1277                           * If as_unmap fails we just use up a bit of extra
1265 1278                           * space
1266 1279                           */
1267 1280                          (void) as_unmap(as, (caddr_t)lo,
1268 1281                              (size_t)hi - (size_t)lo);
1269 1282                          MOBJ_STAT_ADD(unmap_hole);
1270 1283                  }
1271 1284          }
1272 1285          as_rangeunlock(as);
1273 1286  
1274 1287          return (0);
1275 1288  }
1276 1289  
1277 1290  /* Ugly hack to get STRUCT_* macros to work below */
1278 1291  struct myphdr {
1279 1292          Phdr            x;      /* native version */
1280 1293  };
1281 1294  
1282 1295  struct myphdr32 {
1283 1296          Elf32_Phdr      x;
1284 1297  };
1285 1298  
1286 1299  /*
1287 1300   * Calculate and return the number of loadable segments in the ELF Phdr
1288 1301   * represented by phdrbase as well as the len of the total mapping and
1289 1302   * the max alignment that is needed for a given segment.  On success,
1290 1303   * 0 is returned, and *len, *loadable and *align have been filled out.
1291 1304   * On failure, errno will be returned, which in this case is ENOTSUP
1292 1305   * if we were passed an ELF file with overlapping segments.
1293 1306   */
1294 1307  static int
1295 1308  calc_loadable(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, size_t *len,
1296 1309      int *loadable, size_t *align)
1297 1310  {
1298 1311          int i;
1299 1312          int hsize;
1300 1313          model_t model;
1301 1314          ushort_t e_type = ehdrp->e_type;        /* same offset 32 and 64 bit */
1302 1315          uint_t p_type;
1303 1316          offset_t p_offset;
1304 1317          size_t p_memsz;
1305 1318          size_t p_align;
1306 1319          caddr_t vaddr;
1307 1320          int num_segs = 0;
1308 1321          caddr_t start_addr = NULL;
1309 1322          caddr_t p_end = NULL;
1310 1323          size_t max_align = 0;
1311 1324          size_t min_align = PAGESIZE;    /* needed for vmem_xalloc */
1312 1325          STRUCT_HANDLE(myphdr, mph);
1313 1326  #if defined(__sparc)
1314 1327          extern int vac_size;
1315 1328  
1316 1329          /*
1317 1330           * Want to prevent aliasing by making the start address at least be
1318 1331           * aligned to vac_size.
1319 1332           */
1320 1333          min_align = MAX(PAGESIZE, vac_size);
1321 1334  #endif
1322 1335  
1323 1336          model = get_udatamodel();
1324 1337          STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase);
1325 1338  
1326 1339          /* hsize alignment should have been checked before calling this func */
1327 1340          if (model == DATAMODEL_LP64) {
1328 1341                  hsize = ehdrp->e_phentsize;
1329 1342                  if (hsize & 7) {
1330 1343                          return (ENOTSUP);
1331 1344                  }
1332 1345          } else {
1333 1346                  ASSERT(model == DATAMODEL_ILP32);
1334 1347                  hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize;
1335 1348                  if (hsize & 3) {
1336 1349                          return (ENOTSUP);
1337 1350                  }
1338 1351          }
1339 1352  
1340 1353          /*
1341 1354           * Determine the span of all loadable segments and calculate the
1342 1355           * number of loadable segments.
1343 1356           */
1344 1357          for (i = 0; i < nphdrs; i++) {
1345 1358                  p_type = STRUCT_FGET(mph, x.p_type);
1346 1359                  if (p_type == PT_LOAD || p_type == PT_SUNWBSS) {
1347 1360                          vaddr = (caddr_t)(uintptr_t)STRUCT_FGET(mph, x.p_vaddr);
1348 1361                          p_memsz = STRUCT_FGET(mph, x.p_memsz);
1349 1362  
1350 1363                          /*
1351 1364                           * Skip this header if it requests no memory to be
1352 1365                           * mapped.
1353 1366                           */
1354 1367                          if (p_memsz == 0) {
1355 1368                                  STRUCT_SET_HANDLE(mph, model,
1356 1369                                      (struct myphdr *)((size_t)STRUCT_BUF(mph) +
1357 1370                                      hsize));
1358 1371                                  MOBJ_STAT_ADD(nomem_header);
1359 1372                                  continue;
1360 1373                          }
1361 1374                          if (num_segs++ == 0) {
1362 1375                                  /*
1363 1376                                   * The p_vaddr of the first PT_LOAD segment
1364 1377                                   * must either be NULL or within the first
1365 1378                                   * page in order to be interpreted.
1366 1379                                   * Otherwise, its an invalid file.
1367 1380                                   */
1368 1381                                  if (e_type == ET_DYN &&
1369 1382                                      ((caddr_t)((uintptr_t)vaddr &
1370 1383                                      (uintptr_t)PAGEMASK) != NULL)) {
1371 1384                                          MOBJ_STAT_ADD(inval_header);
1372 1385                                          return (ENOTSUP);
1373 1386                                  }
1374 1387                                  start_addr = vaddr;
1375 1388                                  /*
1376 1389                                   * For the first segment, we need to map from
1377 1390                                   * the beginning of the file, so we will
1378 1391                                   * adjust the size of the mapping to include
1379 1392                                   * this memory.
1380 1393                                   */
1381 1394                                  p_offset = STRUCT_FGET(mph, x.p_offset);
1382 1395                          } else {
1383 1396                                  p_offset = 0;
1384 1397                          }
1385 1398                          /*
1386 1399                           * Check to make sure that this mapping wouldn't
1387 1400                           * overlap a previous mapping.
1388 1401                           */
1389 1402                          if (vaddr < p_end) {
1390 1403                                  MOBJ_STAT_ADD(overlap_header);
1391 1404                                  return (ENOTSUP);
1392 1405                          }
1393 1406  
1394 1407                          p_end = vaddr + p_memsz + p_offset;
1395 1408                          p_end = (caddr_t)P2ROUNDUP((size_t)p_end, PAGESIZE);
1396 1409  
1397 1410                          p_align = STRUCT_FGET(mph, x.p_align);
1398 1411                          if (p_align > 1 && p_align > max_align) {
1399 1412                                  max_align = p_align;
1400 1413                                  if (max_align < min_align) {
1401 1414                                          max_align = min_align;
1402 1415                                          MOBJ_STAT_ADD(min_align);
1403 1416                                  }
1404 1417                          }
1405 1418                  }
1406 1419                  STRUCT_SET_HANDLE(mph, model,
1407 1420                      (struct myphdr *)((size_t)STRUCT_BUF(mph) + hsize));
1408 1421          }
1409 1422  
1410 1423          /*
1411 1424           * The alignment should be a power of 2, if it isn't we forgive it
1412 1425           * and round up.  On overflow, we'll set the alignment to max_align
1413 1426           * rounded down to the nearest power of 2.
1414 1427           */
1415 1428          if (max_align > 0 && !ISP2(max_align)) {
1416 1429                  MOBJ_STAT_ADD(np2_align);
1417 1430                  *align = 2 * (1L << (highbit(max_align) - 1));
1418 1431                  if (*align < max_align ||
1419 1432                      (*align > UINT_MAX && model == DATAMODEL_ILP32)) {
1420 1433                          MOBJ_STAT_ADD(np2_align_overflow);
1421 1434                          *align = 1L << (highbit(max_align) - 1);
1422 1435                  }
1423 1436          } else {
1424 1437                  *align = max_align;
1425 1438          }
1426 1439  
1427 1440          ASSERT(*align >= PAGESIZE || *align == 0);
1428 1441  
1429 1442          *loadable = num_segs;
1430 1443          *len = p_end - start_addr;
1431 1444          return (0);
1432 1445  }
1433 1446  
1434 1447  /*
1435 1448   * Check the address space to see if the virtual addresses to be used are
1436 1449   * available.  If they are not, return errno for failure.  On success, 0
1437 1450   * will be returned, and the virtual addresses for each mmapobj_result_t
1438 1451   * will be reserved.  Note that a reservation could have earlier been made
1439 1452   * for a given segment via a /dev/null mapping.  If that is the case, then
1440 1453   * we can use that VA space for our mappings.
1441 1454   * Note: this function will only be used for ET_EXEC binaries.
1442 1455   */
1443 1456  int
1444 1457  check_exec_addrs(int loadable, mmapobj_result_t *mrp, caddr_t start_addr)
1445 1458  {
1446 1459          int i;
1447 1460          struct as *as = curproc->p_as;
1448 1461          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
1449 1462          int ret;
1450 1463          caddr_t myaddr;
1451 1464          size_t mylen;
1452 1465          struct seg *seg;
1453 1466  
1454 1467          /* No need to reserve swap space now since it will be reserved later */
1455 1468          crargs.flags |= MAP_NORESERVE;
1456 1469          as_rangelock(as);
1457 1470          for (i = 0; i < loadable; i++) {
1458 1471  
1459 1472                  myaddr = start_addr + (size_t)mrp[i].mr_addr;
1460 1473                  mylen = mrp[i].mr_msize;
1461 1474  
1462 1475                  /* See if there is a hole in the as for this range */
1463 1476                  if (as_gap(as, mylen, &myaddr, &mylen, 0, NULL) == 0) {
1464 1477                          ASSERT(myaddr == start_addr + (size_t)mrp[i].mr_addr);
1465 1478                          ASSERT(mylen == mrp[i].mr_msize);
1466 1479  
1467 1480  #ifdef DEBUG
1468 1481                          if (MR_GET_TYPE(mrp[i].mr_flags) == MR_PADDING) {
1469 1482                                  MOBJ_STAT_ADD(exec_padding);
1470 1483                          }
1471 1484  #endif
1472 1485                          ret = as_map(as, myaddr, mylen, segvn_create, &crargs);
1473 1486                          if (ret) {
1474 1487                                  as_rangeunlock(as);
1475 1488                                  mmapobj_unmap_exec(mrp, i, start_addr);
1476 1489                                  return (ret);
1477 1490                          }
1478 1491                  } else {
1479 1492                          /*
1480 1493                           * There is a mapping that exists in the range
1481 1494                           * so check to see if it was a "reservation"
1482 1495                           * from /dev/null.  The mapping is from
1483 1496                           * /dev/null if the mapping comes from
1484 1497                           * segdev and the type is neither MAP_SHARED
1485 1498                           * nor MAP_PRIVATE.
1486 1499                           */
1487 1500                          AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1488 1501                          seg = as_findseg(as, myaddr, 0);
1489 1502                          MOBJ_STAT_ADD(exec_addr_mapped);
1490 1503                          if (seg && seg->s_ops == &segdev_ops &&
1491 1504                              ((SEGOP_GETTYPE(seg, myaddr) &
1492 1505                              (MAP_SHARED | MAP_PRIVATE)) == 0) &&
1493 1506                              myaddr >= seg->s_base &&
1494 1507                              myaddr + mylen <=
1495 1508                              seg->s_base + seg->s_size) {
1496 1509                                  MOBJ_STAT_ADD(exec_addr_devnull);
1497 1510                                  AS_LOCK_EXIT(as, &as->a_lock);
1498 1511                                  (void) as_unmap(as, myaddr, mylen);
1499 1512                                  ret = as_map(as, myaddr, mylen, segvn_create,
1500 1513                                      &crargs);
1501 1514                                  mrp[i].mr_flags |= MR_RESV;
1502 1515                                  if (ret) {
1503 1516                                          as_rangeunlock(as);
1504 1517                                          /* Need to remap what we unmapped */
1505 1518                                          mmapobj_unmap_exec(mrp, i + 1,
1506 1519                                              start_addr);
1507 1520                                          return (ret);
1508 1521                                  }
1509 1522                          } else {
1510 1523                                  AS_LOCK_EXIT(as, &as->a_lock);
1511 1524                                  as_rangeunlock(as);
1512 1525                                  mmapobj_unmap_exec(mrp, i, start_addr);
1513 1526                                  MOBJ_STAT_ADD(exec_addr_in_use);
1514 1527                                  return (EADDRINUSE);
1515 1528                          }
1516 1529                  }
1517 1530          }

↓ open down ↓

643 lines elided

↑ open up ↑

1518 1531          as_rangeunlock(as);
1519 1532          return (0);
1520 1533  }
1521 1534  
1522 1535  /*
1523 1536   * Walk through the ELF program headers and extract all useful information
1524 1537   * for PT_LOAD and PT_SUNWBSS segments into mrp.
1525 1538   * Return 0 on success or error on failure.
1526 1539   */
1527 1540  static int
1528      -process_phdr(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, mmapobj_result_t *mrp,
     1541 +process_phdrs(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, mmapobj_result_t *mrp,
1529 1542      vnode_t *vp, uint_t *num_mapped, size_t padding, cred_t *fcred)
1530 1543  {
1531 1544          int i;
1532 1545          caddr_t start_addr = NULL;
1533 1546          caddr_t vaddr;
1534 1547          size_t len = 0;
1535 1548          size_t lib_len = 0;
1536 1549          int ret;
1537 1550          int prot;
1538 1551          struct lib_va *lvp = NULL;

1539 1552          vattr_t vattr;
1540 1553          struct as *as = curproc->p_as;
1541 1554          int error;
1542 1555          int loadable = 0;
1543 1556          int current = 0;
1544 1557          int use_lib_va = 1;
1545 1558          size_t align = 0;
1546 1559          size_t add_pad = 0;
1547 1560          int hdr_seen = 0;
1548 1561          ushort_t e_type = ehdrp->e_type;        /* same offset 32 and 64 bit */
1549 1562          uint_t p_type;
1550 1563          offset_t p_offset;
1551 1564          size_t p_memsz;
1552 1565          size_t p_filesz;
1553 1566          uint_t p_flags;
1554 1567          int hsize;
1555 1568          model_t model;
1556 1569          STRUCT_HANDLE(myphdr, mph);
1557 1570  
1558 1571          model = get_udatamodel();
1559 1572          STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase);
1560 1573  
1561 1574          /*
1562 1575           * Need to make sure that hsize is aligned properly.
1563 1576           * For 32bit processes, 4 byte alignment is required.
1564 1577           * For 64bit processes, 8 byte alignment is required.
1565 1578           * If the alignment isn't correct, we need to return failure
1566 1579           * since it could cause an alignment error panic while walking
1567 1580           * the phdr array.
1568 1581           */
1569 1582          if (model == DATAMODEL_LP64) {
1570 1583                  hsize = ehdrp->e_phentsize;
1571 1584                  if (hsize & 7) {
1572 1585                          MOBJ_STAT_ADD(phent_align64);
1573 1586                          return (ENOTSUP);

↓ open down ↓

35 lines elided

↑ open up ↑

1574 1587                  }
1575 1588          } else {
1576 1589                  ASSERT(model == DATAMODEL_ILP32);
1577 1590                  hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize;
1578 1591                  if (hsize & 3) {
1579 1592                          MOBJ_STAT_ADD(phent_align32);
1580 1593                          return (ENOTSUP);
1581 1594                  }
1582 1595          }
1583 1596  
1584      -        if (padding != 0) {
     1597 +        if ((padding != 0) || secflag_enabled(curproc, PROC_SEC_ASLR)) {
1585 1598                  use_lib_va = 0;
1586 1599          }
1587 1600          if (e_type == ET_DYN) {
1588 1601                  vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME;
1589 1602                  error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
1590 1603                  if (error) {
1591 1604                          return (error);
1592 1605                  }
1593 1606                  /* Check to see if we already have a description for this lib */
1594      -                lvp = lib_va_find(&vattr);
     1607 +                if (!secflag_enabled(curproc, PROC_SEC_ASLR))
     1608 +                        lvp = lib_va_find(&vattr);
1595 1609  
1596 1610                  if (lvp != NULL) {
1597 1611                          MOBJ_STAT_ADD(lvp_found);
1598 1612                          if (use_lib_va) {
1599 1613                                  start_addr = mmapobj_lookup_start_addr(lvp);
1600 1614                                  if (start_addr == NULL) {
1601 1615                                          lib_va_release(lvp);
1602 1616                                          return (ENOMEM);
1603 1617                                  }
1604 1618                          }

1605 1619  
1606 1620                          /*
1607 1621                           * loadable may be zero if the original allocator
1608 1622                           * of lvp hasn't finished setting it up but the rest
1609 1623                           * of the fields will be accurate.
1610 1624                           */
1611 1625                          loadable = lvp->lv_num_segs;
1612 1626                          len = lvp->lv_len;
1613 1627                          align = lvp->lv_align;
1614 1628                  }
1615 1629          }
1616 1630  
1617 1631          /*
1618 1632           * Determine the span of all loadable segments and calculate the
1619 1633           * number of loadable segments, the total len spanned by the mappings
1620 1634           * and the max alignment, if we didn't get them above.
1621 1635           */
1622 1636          if (loadable == 0) {
1623 1637                  MOBJ_STAT_ADD(no_loadable_yet);
1624 1638                  ret = calc_loadable(ehdrp, phdrbase, nphdrs, &len,
1625 1639                      &loadable, &align);
1626 1640                  if (ret != 0) {
1627 1641                          /*
1628 1642                           * Since it'd be an invalid file, we shouldn't have
1629 1643                           * cached it previously.
1630 1644                           */
1631 1645                          ASSERT(lvp == NULL);
1632 1646                          return (ret);
1633 1647                  }
1634 1648  #ifdef DEBUG
1635 1649                  if (lvp) {
1636 1650                          ASSERT(len == lvp->lv_len);
1637 1651                          ASSERT(align == lvp->lv_align);
1638 1652                  }
1639 1653  #endif
1640 1654          }
1641 1655  
1642 1656          /* Make sure there's something to map. */
1643 1657          if (len == 0 || loadable == 0) {
1644 1658                  /*
1645 1659                   * Since it'd be an invalid file, we shouldn't have
1646 1660                   * cached it previously.
1647 1661                   */
1648 1662                  ASSERT(lvp == NULL);
1649 1663                  MOBJ_STAT_ADD(nothing_to_map);
1650 1664                  return (ENOTSUP);
1651 1665          }
1652 1666  
1653 1667          lib_len = len;
1654 1668          if (padding != 0) {
1655 1669                  loadable += 2;
1656 1670          }
1657 1671          if (loadable > *num_mapped) {
1658 1672                  *num_mapped = loadable;
1659 1673                  /* cleanup previous reservation */
1660 1674                  if (start_addr) {
1661 1675                          (void) as_unmap(as, start_addr, lib_len);
1662 1676                  }
1663 1677                  MOBJ_STAT_ADD(e2big);
1664 1678                  if (lvp) {
1665 1679                          lib_va_release(lvp);
1666 1680                  }
1667 1681                  return (E2BIG);
1668 1682          }
1669 1683  
1670 1684          /*
1671 1685           * We now know the size of the object to map and now we need to
1672 1686           * get the start address to map it at.  It's possible we already
1673 1687           * have it if we found all the info we need in the lib_va cache.
1674 1688           */
1675 1689          if (e_type == ET_DYN && start_addr == NULL) {
1676 1690                  /*
1677 1691                   * Need to make sure padding does not throw off
1678 1692                   * required alignment.  We can only specify an
1679 1693                   * alignment for the starting address to be mapped,
1680 1694                   * so we round padding up to the alignment and map
1681 1695                   * from there and then throw out the extra later.
1682 1696                   */
1683 1697                  if (padding != 0) {
1684 1698                          if (align > 1) {
1685 1699                                  add_pad = P2ROUNDUP(padding, align);
1686 1700                                  len += add_pad;
1687 1701                                  MOBJ_STAT_ADD(dyn_pad_align);
1688 1702                          } else {
1689 1703                                  MOBJ_STAT_ADD(dyn_pad_noalign);
1690 1704                                  len += padding; /* at beginning */
1691 1705                          }
1692 1706                          len += padding; /* at end of mapping */
1693 1707                  }

↓ open down ↓

89 lines elided

↑ open up ↑

1694 1708                  /*
1695 1709                   * At this point, if lvp is non-NULL, then above we
1696 1710                   * already found it in the cache but did not get
1697 1711                   * the start address since we were not going to use lib_va.
1698 1712                   * Since we know that lib_va will not be used, it's safe
1699 1713                   * to call mmapobj_alloc_start_addr and know that lvp
1700 1714                   * will not be modified.
1701 1715                   */
1702 1716                  ASSERT(lvp ? use_lib_va == 0 : 1);
1703 1717                  start_addr = mmapobj_alloc_start_addr(&lvp, len,
1704      -                    use_lib_va, align, &vattr);
     1718 +                    use_lib_va,
     1719 +                    secflag_enabled(curproc, PROC_SEC_ASLR),
     1720 +                    align, &vattr);
1705 1721                  if (start_addr == NULL) {
1706 1722                          if (lvp) {
1707 1723                                  lib_va_release(lvp);
1708 1724                          }
1709 1725                          MOBJ_STAT_ADD(alloc_start_fail);
1710 1726                          return (ENOMEM);
1711 1727                  }
1712 1728                  /*
1713 1729                   * If we can't cache it, no need to hang on to it.
1714 1730                   * Setting lv_num_segs to non-zero will make that

1715 1731                   * field active and since there are too many segments
1716 1732                   * to cache, all future users will not try to use lv_mps.
1717 1733                   */
1718 1734                  if (lvp != NULL && loadable > LIBVA_CACHED_SEGS && use_lib_va) {
1719 1735                          lvp->lv_num_segs = loadable;
1720 1736                          lib_va_release(lvp);
1721 1737                          lvp = NULL;
1722 1738                          MOBJ_STAT_ADD(lvp_nocache);
1723 1739                  }
1724 1740                  /*
1725 1741                   * Free the beginning of the mapping if the padding
1726 1742                   * was not aligned correctly.
1727 1743                   */
1728 1744                  if (padding != 0 && add_pad != padding) {
1729 1745                          (void) as_unmap(as, start_addr,
1730 1746                              add_pad - padding);
1731 1747                          start_addr += (add_pad - padding);
1732 1748                          MOBJ_STAT_ADD(extra_padding);
1733 1749                  }
1734 1750          }
1735 1751  
1736 1752          /*
1737 1753           * At this point, we have reserved the virtual address space
1738 1754           * for our mappings.  Now we need to start filling out the mrp
1739 1755           * array to describe all of the individual mappings we are going
1740 1756           * to return.
1741 1757           * For ET_EXEC there has been no memory reservation since we are
1742 1758           * using fixed addresses.  While filling in the mrp array below,
1743 1759           * we will have the first segment biased to start at addr 0
1744 1760           * and the rest will be biased by this same amount.  Thus if there
1745 1761           * is padding, the first padding will start at addr 0, and the next
1746 1762           * segment will start at the value of padding.
1747 1763           */
1748 1764  
1749 1765          /* We'll fill out padding later, so start filling in mrp at index 1 */
1750 1766          if (padding != 0) {
1751 1767                  current = 1;
1752 1768          }
1753 1769  
1754 1770          /* If we have no more need for lvp let it go now */
1755 1771          if (lvp != NULL && use_lib_va == 0) {
1756 1772                  lib_va_release(lvp);
1757 1773                  MOBJ_STAT_ADD(lvp_not_needed);
1758 1774                  lvp = NULL;
1759 1775          }
1760 1776  
1761 1777          /* Now fill out the mrp structs from the program headers */
1762 1778          STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase);
1763 1779          for (i = 0; i < nphdrs; i++) {
1764 1780                  p_type = STRUCT_FGET(mph, x.p_type);
1765 1781                  if (p_type == PT_LOAD || p_type == PT_SUNWBSS) {
1766 1782                          vaddr = (caddr_t)(uintptr_t)STRUCT_FGET(mph, x.p_vaddr);
1767 1783                          p_memsz = STRUCT_FGET(mph, x.p_memsz);
1768 1784                          p_filesz = STRUCT_FGET(mph, x.p_filesz);
1769 1785                          p_offset = STRUCT_FGET(mph, x.p_offset);
1770 1786                          p_flags = STRUCT_FGET(mph, x.p_flags);
1771 1787  
1772 1788                          /*
1773 1789                           * Skip this header if it requests no memory to be
1774 1790                           * mapped.
1775 1791                           */
1776 1792                          if (p_memsz == 0) {
1777 1793                                  STRUCT_SET_HANDLE(mph, model,
1778 1794                                      (struct myphdr *)((size_t)STRUCT_BUF(mph) +
1779 1795                                      hsize));
1780 1796                                  MOBJ_STAT_ADD(no_mem_map_sz);
1781 1797                                  continue;
1782 1798                          }
1783 1799  
1784 1800                          prot = 0;
1785 1801                          if (p_flags & PF_R)
1786 1802                                  prot |= PROT_READ;
1787 1803                          if (p_flags & PF_W)
1788 1804                                  prot |= PROT_WRITE;
1789 1805                          if (p_flags & PF_X)
1790 1806                                  prot |= PROT_EXEC;
1791 1807  
1792 1808                          ASSERT(current < loadable);
1793 1809                          mrp[current].mr_msize = p_memsz;
1794 1810                          mrp[current].mr_fsize = p_filesz;
1795 1811                          mrp[current].mr_offset = p_offset;
1796 1812                          mrp[current].mr_prot = prot;
1797 1813  
1798 1814                          if (hdr_seen == 0 && p_filesz != 0) {
1799 1815                                  mrp[current].mr_flags = MR_HDR_ELF;
1800 1816                                  /*
1801 1817                                   * We modify mr_offset because we
1802 1818                                   * need to map the ELF header as well, and if
1803 1819                                   * we didn't then the header could be left out
1804 1820                                   * of the mapping that we will create later.
1805 1821                                   * Since we're removing the offset, we need to
1806 1822                                   * account for that in the other fields as well
1807 1823                                   * since we will be mapping the memory from 0
1808 1824                                   * to p_offset.
1809 1825                                   */
1810 1826                                  if (e_type == ET_DYN) {
1811 1827                                          mrp[current].mr_offset = 0;
1812 1828                                          mrp[current].mr_msize += p_offset;
1813 1829                                          mrp[current].mr_fsize += p_offset;
1814 1830                                  } else {
1815 1831                                          ASSERT(e_type == ET_EXEC);
1816 1832                                          /*
1817 1833                                           * Save off the start addr which will be
1818 1834                                           * our bias for the rest of the
1819 1835                                           * ET_EXEC mappings.
1820 1836                                           */
1821 1837                                          start_addr = vaddr - padding;
1822 1838                                  }
1823 1839                                  mrp[current].mr_addr = (caddr_t)padding;
1824 1840                                  hdr_seen = 1;
1825 1841                          } else {
1826 1842                                  if (e_type == ET_EXEC) {
1827 1843                                          /* bias mr_addr */
1828 1844                                          mrp[current].mr_addr =
1829 1845                                              vaddr - (size_t)start_addr;
1830 1846                                  } else {
1831 1847                                          mrp[current].mr_addr = vaddr + padding;
1832 1848                                  }
1833 1849                                  mrp[current].mr_flags = 0;
1834 1850                          }
1835 1851                          current++;
1836 1852                  }
1837 1853  
1838 1854                  /* Move to next phdr */
1839 1855                  STRUCT_SET_HANDLE(mph, model,
1840 1856                      (struct myphdr *)((size_t)STRUCT_BUF(mph) +
1841 1857                      hsize));
1842 1858          }
1843 1859  
1844 1860          /* Now fill out the padding segments */
1845 1861          if (padding != 0) {
1846 1862                  mrp[0].mr_addr = NULL;
1847 1863                  mrp[0].mr_msize = padding;
1848 1864                  mrp[0].mr_fsize = 0;
1849 1865                  mrp[0].mr_offset = 0;
1850 1866                  mrp[0].mr_prot = 0;
1851 1867                  mrp[0].mr_flags = MR_PADDING;
1852 1868  
1853 1869                  /* Setup padding for the last segment */
1854 1870                  ASSERT(current == loadable - 1);
1855 1871                  mrp[current].mr_addr = (caddr_t)lib_len + padding;
1856 1872                  mrp[current].mr_msize = padding;
1857 1873                  mrp[current].mr_fsize = 0;
1858 1874                  mrp[current].mr_offset = 0;
1859 1875                  mrp[current].mr_prot = 0;
1860 1876                  mrp[current].mr_flags = MR_PADDING;
1861 1877          }
1862 1878  
1863 1879          /*
1864 1880           * Need to make sure address ranges desired are not in use or
1865 1881           * are previously allocated reservations from /dev/null.  For
1866 1882           * ET_DYN, we already made sure our address range was free.
1867 1883           */
1868 1884          if (e_type == ET_EXEC) {
1869 1885                  ret = check_exec_addrs(loadable, mrp, start_addr);
1870 1886                  if (ret != 0) {
1871 1887                          ASSERT(lvp == NULL);
1872 1888                          MOBJ_STAT_ADD(check_exec_failed);
1873 1889                          return (ret);
1874 1890                  }
1875 1891          }
1876 1892  
1877 1893          /* Finish up our business with lvp. */
1878 1894          if (lvp) {
1879 1895                  ASSERT(e_type == ET_DYN);
1880 1896                  if (lvp->lv_num_segs == 0 && loadable <= LIBVA_CACHED_SEGS) {
1881 1897                          bcopy(mrp, lvp->lv_mps,
1882 1898                              loadable * sizeof (mmapobj_result_t));
1883 1899                          membar_producer();
1884 1900                  }
1885 1901                  /*
1886 1902                   * Setting lv_num_segs to a non-zero value indicates that
1887 1903                   * lv_mps is now valid and can be used by other threads.
1888 1904                   * So, the above stores need to finish before lv_num_segs
1889 1905                   * is updated. lv_mps is only valid if lv_num_segs is
1890 1906                   * greater than LIBVA_CACHED_SEGS.
1891 1907                   */
1892 1908                  lvp->lv_num_segs = loadable;
1893 1909                  lib_va_release(lvp);
1894 1910                  MOBJ_STAT_ADD(lvp_used);
1895 1911          }
1896 1912  
1897 1913          /* Now that we have mrp completely filled out go map it */
1898 1914          ret = mmapobj_map_elf(vp, start_addr, mrp, loadable, fcred, e_type);
1899 1915          if (ret == 0) {
1900 1916                  *num_mapped = loadable;
1901 1917          }
1902 1918  
1903 1919          return (ret);
1904 1920  }
1905 1921  
1906 1922  /*
1907 1923   * Take the ELF file passed in, and do the work of mapping it.
1908 1924   * num_mapped in - # elements in user buffer
1909 1925   * num_mapped out - # sections mapped and length of mrp array if
1910 1926   *                      no errors.
1911 1927   */
1912 1928  static int
1913 1929  doelfwork(Ehdr *ehdrp, vnode_t *vp, mmapobj_result_t *mrp,
1914 1930      uint_t *num_mapped, size_t padding, cred_t *fcred)
1915 1931  {
1916 1932          int error;
1917 1933          offset_t phoff;
1918 1934          int nphdrs;
1919 1935          unsigned char ei_class;
1920 1936          unsigned short phentsize;
1921 1937          ssize_t phsizep;
1922 1938          caddr_t phbasep;
1923 1939          int to_map;
1924 1940          model_t model;
1925 1941  
1926 1942          ei_class = ehdrp->e_ident[EI_CLASS];
1927 1943          model = get_udatamodel();
1928 1944          if ((model == DATAMODEL_ILP32 && ei_class == ELFCLASS64) ||
1929 1945              (model == DATAMODEL_LP64 && ei_class == ELFCLASS32)) {
1930 1946                  MOBJ_STAT_ADD(wrong_model);
1931 1947                  return (ENOTSUP);
1932 1948          }
1933 1949  
1934 1950          /* Can't execute code from "noexec" mounted filesystem. */
1935 1951          if (ehdrp->e_type == ET_EXEC &&
1936 1952              (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) {
1937 1953                  MOBJ_STAT_ADD(noexec_fs);
1938 1954                  return (EACCES);
1939 1955          }
1940 1956  
1941 1957          /*
1942 1958           * Relocatable and core files are mapped as a single flat file
1943 1959           * since no interpretation is done on them by mmapobj.
1944 1960           */
1945 1961          if (ehdrp->e_type == ET_REL || ehdrp->e_type == ET_CORE) {
1946 1962                  to_map = padding ? 3 : 1;
1947 1963                  if (*num_mapped < to_map) {
1948 1964                          *num_mapped = to_map;
1949 1965                          MOBJ_STAT_ADD(e2big_et_rel);
1950 1966                          return (E2BIG);
1951 1967                  }
1952 1968                  error = mmapobj_map_flat(vp, mrp, padding, fcred);
1953 1969                  if (error == 0) {
1954 1970                          *num_mapped = to_map;
1955 1971                          mrp[padding ? 1 : 0].mr_flags = MR_HDR_ELF;
1956 1972                          MOBJ_STAT_ADD(et_rel_mapped);
1957 1973                  }
1958 1974                  return (error);
1959 1975          }
1960 1976  
1961 1977          /* Check for an unknown ELF type */
1962 1978          if (ehdrp->e_type != ET_EXEC && ehdrp->e_type != ET_DYN) {
1963 1979                  MOBJ_STAT_ADD(unknown_elf_type);
1964 1980                  return (ENOTSUP);
1965 1981          }
1966 1982  
1967 1983          if (ei_class == ELFCLASS32) {
1968 1984                  Elf32_Ehdr *e32hdr = (Elf32_Ehdr *)ehdrp;
1969 1985                  ASSERT(model == DATAMODEL_ILP32);
1970 1986                  nphdrs = e32hdr->e_phnum;
1971 1987                  phentsize = e32hdr->e_phentsize;
1972 1988                  if (phentsize < sizeof (Elf32_Phdr)) {
1973 1989                          MOBJ_STAT_ADD(phent32_too_small);
1974 1990                          return (ENOTSUP);
1975 1991                  }
1976 1992                  phoff = e32hdr->e_phoff;
1977 1993          } else if (ei_class == ELFCLASS64) {
1978 1994                  Elf64_Ehdr *e64hdr = (Elf64_Ehdr *)ehdrp;
1979 1995                  ASSERT(model == DATAMODEL_LP64);
1980 1996                  nphdrs = e64hdr->e_phnum;
1981 1997                  phentsize = e64hdr->e_phentsize;
1982 1998                  if (phentsize < sizeof (Elf64_Phdr)) {
1983 1999                          MOBJ_STAT_ADD(phent64_too_small);
1984 2000                          return (ENOTSUP);
1985 2001                  }
1986 2002                  phoff = e64hdr->e_phoff;
1987 2003          } else {
1988 2004                  /* fallthrough case for an invalid ELF class */
1989 2005                  MOBJ_STAT_ADD(inval_elf_class);
1990 2006                  return (ENOTSUP);
1991 2007          }
1992 2008  
1993 2009          /*
1994 2010           * nphdrs should only have this value for core files which are handled
1995 2011           * above as a single mapping.  If other file types ever use this
1996 2012           * sentinel, then we'll add the support needed to handle this here.
1997 2013           */
1998 2014          if (nphdrs == PN_XNUM) {
1999 2015                  MOBJ_STAT_ADD(too_many_phdrs);
2000 2016                  return (ENOTSUP);
2001 2017          }
2002 2018  
2003 2019          phsizep = nphdrs * phentsize;
2004 2020  
2005 2021          if (phsizep == 0) {
2006 2022                  MOBJ_STAT_ADD(no_phsize);
2007 2023                  return (ENOTSUP);
2008 2024          }
2009 2025  
2010 2026          /* Make sure we only wait for memory if it's a reasonable request */
2011 2027          if (phsizep > mmapobj_alloc_threshold) {
2012 2028                  MOBJ_STAT_ADD(phsize_large);
2013 2029                  if ((phbasep = kmem_alloc(phsizep, KM_NOSLEEP)) == NULL) {
2014 2030                          MOBJ_STAT_ADD(phsize_xtralarge);
2015 2031                          return (ENOMEM);
2016 2032                  }
2017 2033          } else {
2018 2034                  phbasep = kmem_alloc(phsizep, KM_SLEEP);

↓ open down ↓

304 lines elided

↑ open up ↑

2019 2035          }
2020 2036  
2021 2037          if ((error = vn_rdwr(UIO_READ, vp, phbasep, phsizep,
2022 2038              (offset_t)phoff, UIO_SYSSPACE, 0, (rlim64_t)0,
2023 2039              fcred, NULL)) != 0) {
2024 2040                  kmem_free(phbasep, phsizep);
2025 2041                  return (error);
2026 2042          }
2027 2043  
2028 2044          /* Now process the phdr's */
2029      -        error = process_phdr(ehdrp, phbasep, nphdrs, mrp, vp, num_mapped,
     2045 +        error = process_phdrs(ehdrp, phbasep, nphdrs, mrp, vp, num_mapped,
2030 2046              padding, fcred);
2031 2047          kmem_free(phbasep, phsizep);
2032 2048          return (error);
2033 2049  }
2034 2050  
2035 2051  #if defined(__sparc)
2036 2052  /*
2037 2053   * Hack to support 64 bit kernels running AOUT 4.x programs.
2038 2054   * This is the sizeof (struct nlist) for a 32 bit kernel.
2039 2055   * Since AOUT programs are 32 bit only, they will never use the 64 bit

2040 2056   * sizeof (struct nlist) and thus creating a #define is the simplest
2041 2057   * way around this since this is a format which is not being updated.
2042 2058   * This will be used in the place of sizeof (struct nlist) below.
2043 2059   */
2044 2060  #define NLIST_SIZE      (0xC)
2045 2061  
2046 2062  static int
2047 2063  doaoutwork(vnode_t *vp, mmapobj_result_t *mrp,
2048 2064      uint_t *num_mapped, struct exec *hdr, cred_t *fcred)
2049 2065  {
2050 2066          int error;
2051 2067          size_t size;
2052 2068          size_t osize;
2053 2069          size_t nsize;   /* nlist size */
2054 2070          size_t msize;
2055 2071          size_t zfoddiff;
2056 2072          caddr_t addr;
2057 2073          caddr_t start_addr;
2058 2074          struct as *as = curproc->p_as;
2059 2075          int prot = PROT_USER | PROT_READ | PROT_EXEC;
2060 2076          uint_t mflag = MAP_PRIVATE | _MAP_LOW32;
2061 2077          offset_t off = 0;
2062 2078          int segnum = 0;
2063 2079          uint_t to_map;
2064 2080          int is_library = 0;
2065 2081          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
2066 2082  
2067 2083          /* Only 32bit apps supported by this file format */
2068 2084          if (get_udatamodel() != DATAMODEL_ILP32) {
2069 2085                  MOBJ_STAT_ADD(aout_64bit_try);
2070 2086                  return (ENOTSUP);
2071 2087          }
2072 2088  
2073 2089          /* Check to see if this is a library */
2074 2090          if (hdr->a_magic == ZMAGIC && hdr->a_entry < PAGESIZE) {
2075 2091                  is_library = 1;
2076 2092          }
2077 2093  
2078 2094          /* Can't execute code from "noexec" mounted filesystem. */
2079 2095          if (((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) && (is_library == 0)) {
2080 2096                  MOBJ_STAT_ADD(aout_noexec);
2081 2097                  return (EACCES);
2082 2098          }
2083 2099  
2084 2100          /*
2085 2101           * There are 2 ways to calculate the mapped size of executable:
2086 2102           * 1) rounded text size + data size + bss size.
2087 2103           * 2) starting offset for text + text size + data size + text relocation
2088 2104           *    size + data relocation size + room for nlist data structure.
2089 2105           *
2090 2106           * The larger of the two sizes will be used to map this binary.
2091 2107           */
2092 2108          osize = P2ROUNDUP(hdr->a_text, PAGESIZE) + hdr->a_data + hdr->a_bss;
2093 2109  
2094 2110          off = hdr->a_magic == ZMAGIC ? 0 : sizeof (struct exec);
2095 2111  
2096 2112          nsize = off + hdr->a_text + hdr->a_data + hdr->a_trsize +
2097 2113              hdr->a_drsize + NLIST_SIZE;
2098 2114  
2099 2115          size = MAX(osize, nsize);
2100 2116          if (size != nsize) {
2101 2117                  nsize = 0;
2102 2118          }
2103 2119  
2104 2120          /*
2105 2121           * 1 seg for text and 1 seg for initialized data.
2106 2122           * 1 seg for bss (if can't fit in leftover space of init data)
2107 2123           * 1 seg for nlist if needed.
2108 2124           */
2109 2125          to_map = 2 + (nsize ? 1 : 0) +
2110 2126              (hdr->a_bss > PAGESIZE - P2PHASE(hdr->a_data, PAGESIZE) ? 1 : 0);
2111 2127          if (*num_mapped < to_map) {
2112 2128                  *num_mapped = to_map;
2113 2129                  MOBJ_STAT_ADD(aout_e2big);
2114 2130                  return (E2BIG);
2115 2131          }
2116 2132  
2117 2133          /* Reserve address space for the whole mapping */
2118 2134          if (is_library) {
2119 2135                  /* We'll let VOP_MAP below pick our address for us */
2120 2136                  addr = NULL;
2121 2137                  MOBJ_STAT_ADD(aout_lib);
2122 2138          } else {
2123 2139                  /*
2124 2140                   * default start address for fixed binaries from AOUT 4.x
2125 2141                   * standard.
2126 2142                   */
2127 2143                  MOBJ_STAT_ADD(aout_fixed);
2128 2144                  mflag |= MAP_FIXED;
2129 2145                  addr = (caddr_t)0x2000;
2130 2146                  as_rangelock(as);
2131 2147                  if (as_gap(as, size, &addr, &size, 0, NULL) != 0) {
2132 2148                          as_rangeunlock(as);
2133 2149                          MOBJ_STAT_ADD(aout_addr_in_use);
2134 2150                          return (EADDRINUSE);
2135 2151                  }
2136 2152                  crargs.flags |= MAP_NORESERVE;
2137 2153                  error = as_map(as, addr, size, segvn_create, &crargs);
2138 2154                  ASSERT(addr == (caddr_t)0x2000);
2139 2155                  as_rangeunlock(as);
2140 2156          }
2141 2157  
2142 2158          start_addr = addr;
2143 2159          osize = size;
2144 2160  
2145 2161          /*
2146 2162           * Map as large as we need, backed by file, this will be text, and
2147 2163           * possibly the nlist segment.  We map over this mapping for bss and
2148 2164           * initialized data segments.
2149 2165           */
2150 2166          error = VOP_MAP(vp, off, as, &addr, size, prot, PROT_ALL,
2151 2167              mflag, fcred, NULL);
2152 2168          if (error) {
2153 2169                  if (!is_library) {
2154 2170                          (void) as_unmap(as, start_addr, osize);
2155 2171                  }
2156 2172                  return (error);
2157 2173          }
2158 2174  
2159 2175          /* pickup the value of start_addr and osize for libraries */
2160 2176          start_addr = addr;
2161 2177          osize = size;
2162 2178  
2163 2179          /*
2164 2180           * We have our initial reservation/allocation so we need to use fixed
2165 2181           * addresses from now on.
2166 2182           */
2167 2183          mflag |= MAP_FIXED;
2168 2184  
2169 2185          mrp[0].mr_addr = addr;
2170 2186          mrp[0].mr_msize = hdr->a_text;
2171 2187          mrp[0].mr_fsize = hdr->a_text;
2172 2188          mrp[0].mr_offset = 0;
2173 2189          mrp[0].mr_prot = PROT_READ | PROT_EXEC;
2174 2190          mrp[0].mr_flags = MR_HDR_AOUT;
2175 2191  
2176 2192  
2177 2193          /*
2178 2194           * Map initialized data. We are mapping over a portion of the
2179 2195           * previous mapping which will be unmapped in VOP_MAP below.
2180 2196           */
2181 2197          off = P2ROUNDUP((offset_t)(hdr->a_text), PAGESIZE);
2182 2198          msize = off;
2183 2199          addr += off;
2184 2200          size = hdr->a_data;
2185 2201          error = VOP_MAP(vp, off, as, &addr, size, PROT_ALL, PROT_ALL,
2186 2202              mflag, fcred, NULL);
2187 2203          if (error) {
2188 2204                  (void) as_unmap(as, start_addr, osize);
2189 2205                  return (error);
2190 2206          }
2191 2207          msize += size;
2192 2208          mrp[1].mr_addr = addr;
2193 2209          mrp[1].mr_msize = size;
2194 2210          mrp[1].mr_fsize = size;
2195 2211          mrp[1].mr_offset = 0;
2196 2212          mrp[1].mr_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
2197 2213          mrp[1].mr_flags = 0;
2198 2214  
2199 2215          /* Need to zero out remainder of page */
2200 2216          addr += hdr->a_data;
2201 2217          zfoddiff = P2PHASE((size_t)addr, PAGESIZE);
2202 2218          if (zfoddiff) {
2203 2219                  label_t ljb;
2204 2220  
2205 2221                  MOBJ_STAT_ADD(aout_zfoddiff);
2206 2222                  zfoddiff = PAGESIZE - zfoddiff;
2207 2223                  if (on_fault(&ljb)) {
2208 2224                          no_fault();
2209 2225                          MOBJ_STAT_ADD(aout_uzero_fault);
2210 2226                          (void) as_unmap(as, start_addr, osize);
2211 2227                          return (EFAULT);
2212 2228                  }
2213 2229                  uzero(addr, zfoddiff);
2214 2230                  no_fault();
2215 2231          }
2216 2232          msize += zfoddiff;
2217 2233          segnum = 2;
2218 2234  
2219 2235          /* Map bss */
2220 2236          if (hdr->a_bss > zfoddiff) {
2221 2237                  struct segvn_crargs crargs =
2222 2238                      SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
2223 2239                  MOBJ_STAT_ADD(aout_map_bss);
2224 2240                  addr += zfoddiff;
2225 2241                  size = hdr->a_bss - zfoddiff;
2226 2242                  as_rangelock(as);
2227 2243                  (void) as_unmap(as, addr, size);
2228 2244                  error = as_map(as, addr, size, segvn_create, &crargs);
2229 2245                  as_rangeunlock(as);
2230 2246                  msize += size;
2231 2247  
2232 2248                  if (error) {
2233 2249                          MOBJ_STAT_ADD(aout_bss_fail);
2234 2250                          (void) as_unmap(as, start_addr, osize);
2235 2251                          return (error);
2236 2252                  }
2237 2253                  mrp[2].mr_addr = addr;
2238 2254                  mrp[2].mr_msize = size;
2239 2255                  mrp[2].mr_fsize = 0;
2240 2256                  mrp[2].mr_offset = 0;
2241 2257                  mrp[2].mr_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
2242 2258                  mrp[2].mr_flags = 0;
2243 2259  
2244 2260                  addr += size;
2245 2261                  segnum = 3;
2246 2262          }
2247 2263  
2248 2264          /*
2249 2265           * If we have extra bits left over, we need to include that in how
2250 2266           * much we mapped to make sure the nlist logic is correct
2251 2267           */
2252 2268          msize = P2ROUNDUP(msize, PAGESIZE);
2253 2269  
2254 2270          if (nsize && msize < nsize) {
2255 2271                  MOBJ_STAT_ADD(aout_nlist);
2256 2272                  mrp[segnum].mr_addr = addr;
2257 2273                  mrp[segnum].mr_msize = nsize - msize;
2258 2274                  mrp[segnum].mr_fsize = 0;
2259 2275                  mrp[segnum].mr_offset = 0;
2260 2276                  mrp[segnum].mr_prot = PROT_READ | PROT_EXEC;
2261 2277                  mrp[segnum].mr_flags = 0;
2262 2278          }
2263 2279  
2264 2280          *num_mapped = to_map;
2265 2281          return (0);
2266 2282  }
2267 2283  #endif
2268 2284  
2269 2285  /*
2270 2286   * These are the two types of files that we can interpret and we want to read
2271 2287   * in enough info to cover both types when looking at the initial header.
2272 2288   */
2273 2289  #define MAX_HEADER_SIZE (MAX(sizeof (Ehdr), sizeof (struct exec)))
2274 2290  
2275 2291  /*
2276 2292   * Map vp passed in in an interpreted manner.  ELF and AOUT files will be
2277 2293   * interpreted and mapped appropriately for execution.
2278 2294   * num_mapped in - # elements in mrp
2279 2295   * num_mapped out - # sections mapped and length of mrp array if
2280 2296   *                  no errors or E2BIG returned.
2281 2297   *
2282 2298   * Returns 0 on success, errno value on failure.
2283 2299   */
2284 2300  static int
2285 2301  mmapobj_map_interpret(vnode_t *vp, mmapobj_result_t *mrp,
2286 2302      uint_t *num_mapped, size_t padding, cred_t *fcred)
2287 2303  {
2288 2304          int error = 0;
2289 2305          vattr_t vattr;
2290 2306          struct lib_va *lvp;
2291 2307          caddr_t start_addr;
2292 2308          model_t model;
2293 2309  
2294 2310          /*
2295 2311           * header has to be aligned to the native size of ulong_t in order
2296 2312           * to avoid an unaligned access when dereferencing the header as
2297 2313           * a ulong_t.  Thus we allocate our array on the stack of type
2298 2314           * ulong_t and then have header, which we dereference later as a char
2299 2315           * array point at lheader.
2300 2316           */
2301 2317          ulong_t lheader[(MAX_HEADER_SIZE / (sizeof (ulong_t))) + 1];
2302 2318          caddr_t header = (caddr_t)&lheader;
2303 2319  
2304 2320          vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME | AT_SIZE;

↓ open down ↓

265 lines elided

↑ open up ↑

2305 2321          error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL);
2306 2322          if (error) {
2307 2323                  return (error);
2308 2324          }
2309 2325  
2310 2326          /*
2311 2327           * Check lib_va to see if we already have a full description
2312 2328           * for this library.  This is the fast path and only used for
2313 2329           * ET_DYN ELF files (dynamic libraries).
2314 2330           */
2315      -        if (padding == 0 && (lvp = lib_va_find(&vattr)) != NULL) {
     2331 +        if (padding == 0 && !secflag_enabled(curproc, PROC_SEC_ASLR) &&
     2332 +            ((lvp = lib_va_find(&vattr)) != NULL)) {
2316 2333                  int num_segs;
2317 2334  
2318 2335                  model = get_udatamodel();
2319 2336                  if ((model == DATAMODEL_ILP32 &&
2320 2337                      lvp->lv_flags & LV_ELF64) ||
2321 2338                      (model == DATAMODEL_LP64 &&
2322 2339                      lvp->lv_flags & LV_ELF32)) {
2323 2340                          lib_va_release(lvp);
2324 2341                          MOBJ_STAT_ADD(fast_wrong_model);
2325 2342                          return (ENOTSUP);

2326 2343                  }
2327 2344                  num_segs = lvp->lv_num_segs;
2328 2345                  if (*num_mapped < num_segs) {
2329 2346                          *num_mapped = num_segs;
2330 2347                          lib_va_release(lvp);
2331 2348                          MOBJ_STAT_ADD(fast_e2big);
2332 2349                          return (E2BIG);
2333 2350                  }
2334 2351  
2335 2352                  /*
2336 2353                   * Check to see if we have all the mappable program headers
2337 2354                   * cached.
2338 2355                   */
2339 2356                  if (num_segs <= LIBVA_CACHED_SEGS && num_segs != 0) {
2340 2357                          MOBJ_STAT_ADD(fast);
2341 2358                          start_addr = mmapobj_lookup_start_addr(lvp);
2342 2359                          if (start_addr == NULL) {
2343 2360                                  lib_va_release(lvp);
2344 2361                                  return (ENOMEM);
2345 2362                          }
2346 2363  
2347 2364                          bcopy(lvp->lv_mps, mrp,
2348 2365                              num_segs * sizeof (mmapobj_result_t));
2349 2366  
2350 2367                          error = mmapobj_map_elf(vp, start_addr, mrp,
2351 2368                              num_segs, fcred, ET_DYN);
2352 2369  
2353 2370                          lib_va_release(lvp);
2354 2371                          if (error == 0) {
2355 2372                                  *num_mapped = num_segs;
2356 2373                                  MOBJ_STAT_ADD(fast_success);
2357 2374                          }
2358 2375                          return (error);
2359 2376                  }
2360 2377                  MOBJ_STAT_ADD(fast_not_now);
2361 2378  
2362 2379                  /* Release it for now since we'll look it up below */
2363 2380                  lib_va_release(lvp);
2364 2381          }
2365 2382  
2366 2383          /*
2367 2384           * Time to see if this is a file we can interpret.  If it's smaller
2368 2385           * than this, then we can't interpret it.
2369 2386           */
2370 2387          if (vattr.va_size < MAX_HEADER_SIZE) {
2371 2388                  MOBJ_STAT_ADD(small_file);
2372 2389                  return (ENOTSUP);
2373 2390          }
2374 2391  
2375 2392          if ((error = vn_rdwr(UIO_READ, vp, header, MAX_HEADER_SIZE, 0,
2376 2393              UIO_SYSSPACE, 0, (rlim64_t)0, fcred, NULL)) != 0) {
2377 2394                  MOBJ_STAT_ADD(read_error);
2378 2395                  return (error);
2379 2396          }
2380 2397  
2381 2398          /* Verify file type */
2382 2399          if (header[EI_MAG0] == ELFMAG0 && header[EI_MAG1] == ELFMAG1 &&
2383 2400              header[EI_MAG2] == ELFMAG2 && header[EI_MAG3] == ELFMAG3) {
2384 2401                  return (doelfwork((Ehdr *)lheader, vp, mrp, num_mapped,
2385 2402                      padding, fcred));
2386 2403          }
2387 2404  
2388 2405  #if defined(__sparc)
2389 2406          /* On sparc, check for 4.X AOUT format */
2390 2407          switch (((struct exec *)header)->a_magic) {
2391 2408          case OMAGIC:
2392 2409          case ZMAGIC:
2393 2410          case NMAGIC:
2394 2411                  return (doaoutwork(vp, mrp, num_mapped,
2395 2412                      (struct exec *)lheader, fcred));
2396 2413          }
2397 2414  #endif
2398 2415  
2399 2416          /* Unsupported type */
2400 2417          MOBJ_STAT_ADD(unsupported);
2401 2418          return (ENOTSUP);
2402 2419  }
2403 2420  
2404 2421  /*
2405 2422   * Given a vnode, map it as either a flat file or interpret it and map
2406 2423   * it according to the rules of the file type.
2407 2424   * *num_mapped will contain the size of the mmapobj_result_t array passed in.
2408 2425   * If padding is non-zero, the mappings will be padded by that amount
2409 2426   * rounded up to the nearest pagesize.
2410 2427   * If the mapping is successful, *num_mapped will contain the number of
2411 2428   * distinct mappings created, and mrp will point to the array of
2412 2429   * mmapobj_result_t's which describe these mappings.
2413 2430   *
2414 2431   * On error, -1 is returned and errno is set appropriately.
2415 2432   * A special error case will set errno to E2BIG when there are more than
2416 2433   * *num_mapped mappings to be created and *num_mapped will be set to the
2417 2434   * number of mappings needed.
2418 2435   */
2419 2436  int
2420 2437  mmapobj(vnode_t *vp, uint_t flags, mmapobj_result_t *mrp,
2421 2438      uint_t *num_mapped, size_t padding, cred_t *fcred)
2422 2439  {
2423 2440          int to_map;
2424 2441          int error = 0;
2425 2442  
2426 2443          ASSERT((padding & PAGEOFFSET) == 0);
2427 2444          ASSERT((flags & ~MMOBJ_ALL_FLAGS) == 0);
2428 2445          ASSERT(num_mapped != NULL);
2429 2446          ASSERT((flags & MMOBJ_PADDING) ? padding != 0 : padding == 0);
2430 2447  
2431 2448          if ((flags & MMOBJ_INTERPRET) == 0) {
2432 2449                  to_map = padding ? 3 : 1;
2433 2450                  if (*num_mapped < to_map) {
2434 2451                          *num_mapped = to_map;
2435 2452                          MOBJ_STAT_ADD(flat_e2big);
2436 2453                          return (E2BIG);
2437 2454                  }
2438 2455                  error = mmapobj_map_flat(vp, mrp, padding, fcred);
2439 2456  
2440 2457                  if (error) {
2441 2458                          return (error);
2442 2459                  }
2443 2460                  *num_mapped = to_map;
2444 2461                  return (0);
2445 2462          }
2446 2463  
2447 2464          error = mmapobj_map_interpret(vp, mrp, num_mapped, padding, fcred);
2448 2465          return (error);
2449 2466  }

↓ open down ↓

124 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX