il_7029-4 Wdiff usr/src/uts/common/os/grow.c

Print this page

sync further changes from uts/aslr
7029 want per-process exploit mitigation features (secflags)
7030 want basic address space layout randomization (aslr)
7031 noexec_user_stack should be a secflag
7032 want a means to forbid mappings around NULL.

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/grow.c
          +++ new/usr/src/uts/common/os/grow.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
  23   23  
  24   24  /*
  25   25   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  26   26   * Use is subject to license terms.
  27   27   */
  28   28  
  29   29  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  30   30  /*        All Rights Reserved   */
  31   31  
  32   32  #include <sys/types.h>
  33   33  #include <sys/inttypes.h>
  34   34  #include <sys/param.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/signal.h>
  38   38  #include <sys/user.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/var.h>
  41   41  #include <sys/proc.h>
  42   42  #include <sys/tuneable.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/cred.h>
  46   46  #include <sys/vnode.h>
  47   47  #include <sys/vfs.h>
  48   48  #include <sys/vm.h>
  49   49  #include <sys/file.h>
  50   50  #include <sys/mman.h>
  51   51  #include <sys/vmparam.h>
  52   52  #include <sys/fcntl.h>
  53   53  #include <sys/lwpchan_impl.h>
  54   54  #include <sys/nbmlock.h>

↓ open down ↓

54 lines elided

↑ open up ↑

  55   55  
  56   56  #include <vm/hat.h>
  57   57  #include <vm/as.h>
  58   58  #include <vm/seg.h>
  59   59  #include <vm/seg_dev.h>
  60   60  #include <vm/seg_vn.h>
  61   61  
  62   62  int use_brk_lpg = 1;
  63   63  int use_stk_lpg = 1;
  64   64  
       65 +/*
       66 + * If set, we will not randomize mappings where the 'addr' argument is
       67 + * non-NULL and not an alignment.
       68 + */
       69 +int aslr_respect_mmap_hint = 0;
       70 +
  65   71  static int brk_lpg(caddr_t nva);
  66   72  static int grow_lpg(caddr_t sp);
  67   73  
  68      -int
       74 +intptr_t
  69   75  brk(caddr_t nva)
  70   76  {
  71   77          int error;
  72   78          proc_t *p = curproc;
  73   79  
  74   80          /*
  75   81           * Serialize brk operations on an address space.
  76   82           * This also serves as the lock protecting p_brksize
  77   83           * and p_brkpageszc.
  78   84           */
  79   85          as_rangelock(p->p_as);
       86 +
       87 +        /*
       88 +         * As a special case to aid the implementation of sbrk(3C), if given a
       89 +         * new brk of 0, return the current brk.  We'll hide this in brk(3C).
       90 +         */
       91 +        if (nva == 0) {
       92 +                as_rangeunlock(p->p_as);
       93 +                return ((intptr_t)(p->p_brkbase + p->p_brksize));
       94 +        }
       95 +
  80   96          if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
  81   97                  error = brk_lpg(nva);
  82   98          } else {
  83   99                  error = brk_internal(nva, p->p_brkpageszc);
  84  100          }
  85  101          as_rangeunlock(p->p_as);
  86  102          return ((error != 0 ? set_errno(error) : 0));
  87  103  }
  88  104  
  89  105  /*

  90  106   * Algorithm: call arch-specific map_pgsz to get best page size to use,
  91  107   * then call brk_internal().
  92  108   * Returns 0 on success.
  93  109   */
  94  110  static int
  95  111  brk_lpg(caddr_t nva)
  96  112  {
  97  113          struct proc *p = curproc;
  98  114          size_t pgsz, len;
  99  115          caddr_t addr, brkend;
 100  116          caddr_t bssbase = p->p_bssbase;
 101  117          caddr_t brkbase = p->p_brkbase;
 102  118          int oszc, szc;
 103  119          int err;
 104  120  
 105  121          oszc = p->p_brkpageszc;
 106  122  
 107  123          /*
 108  124           * If p_brkbase has not yet been set, the first call
 109  125           * to brk_internal() will initialize it.
 110  126           */
 111  127          if (brkbase == 0) {
 112  128                  return (brk_internal(nva, oszc));
 113  129          }
 114  130  
 115  131          len = nva - bssbase;
 116  132  
 117  133          pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
 118  134          szc = page_szc(pgsz);
 119  135  
 120  136          /*
 121  137           * Covers two cases:
 122  138           * 1. page_szc() returns -1 for invalid page size, so we want to
 123  139           * ignore it in that case.
 124  140           * 2. By design we never decrease page size, as it is more stable.
 125  141           */
 126  142          if (szc <= oszc) {
 127  143                  err = brk_internal(nva, oszc);
 128  144                  /* If failed, back off to base page size. */
 129  145                  if (err != 0 && oszc != 0) {
 130  146                          err = brk_internal(nva, 0);
 131  147                  }
 132  148                  return (err);
 133  149          }
 134  150  
 135  151          err = brk_internal(nva, szc);
 136  152          /* If using szc failed, map with base page size and return. */
 137  153          if (err != 0) {
 138  154                  if (szc != 0) {
 139  155                          err = brk_internal(nva, 0);
 140  156                  }
 141  157                  return (err);
 142  158          }
 143  159  
 144  160          /*
 145  161           * Round up brk base to a large page boundary and remap
 146  162           * anything in the segment already faulted in beyond that
 147  163           * point.
 148  164           */
 149  165          addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
 150  166          brkend = brkbase + p->p_brksize;
 151  167          len = brkend - addr;
 152  168          /* Check that len is not negative. Update page size code for heap. */
 153  169          if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
 154  170                  (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
 155  171                  p->p_brkpageszc = szc;
 156  172          }
 157  173  
 158  174          ASSERT(err == 0);
 159  175          return (err);           /* should always be 0 */
 160  176  }
 161  177  
 162  178  /*
 163  179   * Returns 0 on success.
 164  180   */
 165  181  int
 166  182  brk_internal(caddr_t nva, uint_t brkszc)
 167  183  {
 168  184          caddr_t ova;                    /* current break address */
 169  185          size_t size;
 170  186          int     error;
 171  187          struct proc *p = curproc;
 172  188          struct as *as = p->p_as;
 173  189          size_t pgsz;
 174  190          uint_t szc;
 175  191          rctl_qty_t as_rctl;
 176  192  
 177  193          /*
 178  194           * extend heap to brkszc alignment but use current p->p_brkpageszc
 179  195           * for the newly created segment. This allows the new extension
 180  196           * segment to be concatenated successfully with the existing brk
 181  197           * segment.
 182  198           */
 183  199          if ((szc = brkszc) != 0) {
 184  200                  pgsz = page_get_pagesize(szc);
 185  201                  ASSERT(pgsz > PAGESIZE);
 186  202          } else {
 187  203                  pgsz = PAGESIZE;
 188  204          }
 189  205  
 190  206          mutex_enter(&p->p_lock);
 191  207          as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
 192  208              p->p_rctls, p);
 193  209          mutex_exit(&p->p_lock);
 194  210  
 195  211          /*
 196  212           * If p_brkbase has not yet been set, the first call
 197  213           * to brk() will initialize it.
 198  214           */
 199  215          if (p->p_brkbase == 0)
 200  216                  p->p_brkbase = nva;
 201  217  
 202  218          /*
 203  219           * Before multiple page size support existed p_brksize was the value
 204  220           * not rounded to the pagesize (i.e. it stored the exact user request
 205  221           * for heap size). If pgsz is greater than PAGESIZE calculate the
 206  222           * heap size as the real new heap size by rounding it up to pgsz.
 207  223           * This is useful since we may want to know where the heap ends
 208  224           * without knowing heap pagesize (e.g. some old code) and also if
 209  225           * heap pagesize changes we can update p_brkpageszc but delay adding
 210  226           * new mapping yet still know from p_brksize where the heap really
 211  227           * ends. The user requested heap end is stored in libc variable.
 212  228           */
 213  229          if (pgsz > PAGESIZE) {
 214  230                  caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
 215  231                  size = tnva - p->p_brkbase;
 216  232                  if (tnva < p->p_brkbase || (size > p->p_brksize &&
 217  233                      size > (size_t)as_rctl)) {
 218  234                          szc = 0;
 219  235                          pgsz = PAGESIZE;
 220  236                          size = nva - p->p_brkbase;
 221  237                  }
 222  238          } else {
 223  239                  size = nva - p->p_brkbase;
 224  240          }
 225  241  
 226  242          /*
 227  243           * use PAGESIZE to roundup ova because we want to know the real value
 228  244           * of the current heap end in case p_brkpageszc changes since the last
 229  245           * p_brksize was computed.
 230  246           */
 231  247          nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
 232  248          ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
 233  249              PAGESIZE);
 234  250  
 235  251          if ((nva < p->p_brkbase) || (size > p->p_brksize &&
 236  252              size > as_rctl)) {
 237  253                  mutex_enter(&p->p_lock);
 238  254                  (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
 239  255                      RCA_SAFE);
 240  256                  mutex_exit(&p->p_lock);
 241  257                  return (ENOMEM);
 242  258          }
 243  259  
 244  260          if (nva > ova) {
 245  261                  struct segvn_crargs crargs =
 246  262                      SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
 247  263  
 248  264                  if (!(p->p_datprot & PROT_EXEC)) {
 249  265                          crargs.prot &= ~PROT_EXEC;
 250  266                  }
 251  267  
 252  268                  /*
 253  269                   * Add new zfod mapping to extend UNIX data segment
 254  270                   * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
 255  271                   * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
 256  272                   * page sizes if ova is not aligned to szc's pgsz.
 257  273                   */
 258  274                  if (szc > 0) {
 259  275                          caddr_t rbss;
 260  276  
 261  277                          rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
 262  278                              pgsz);
 263  279                          if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
 264  280                                  crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
 265  281                                      AS_MAP_NO_LPOOB;
 266  282                          } else if (ova == rbss) {
 267  283                                  crargs.szc = szc;
 268  284                          } else {
 269  285                                  crargs.szc = AS_MAP_HEAP;
 270  286                          }
 271  287                  } else {
 272  288                          crargs.szc = AS_MAP_NO_LPOOB;
 273  289                  }
 274  290                  crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
 275  291                  error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
 276  292                      &crargs);
 277  293                  if (error) {
 278  294                          return (error);
 279  295                  }
 280  296  
 281  297          } else if (nva < ova) {
 282  298                  /*
 283  299                   * Release mapping to shrink UNIX data segment.
 284  300                   */
 285  301                  (void) as_unmap(as, nva, (size_t)(ova - nva));
 286  302          }
 287  303          p->p_brksize = size;
 288  304          return (0);
 289  305  }
 290  306  
 291  307  /*
 292  308   * Grow the stack to include sp.  Return 1 if successful, 0 otherwise.
 293  309   * This routine assumes that the stack grows downward.
 294  310   */
 295  311  int
 296  312  grow(caddr_t sp)
 297  313  {
 298  314          struct proc *p = curproc;
 299  315          struct as *as = p->p_as;
 300  316          size_t oldsize = p->p_stksize;
 301  317          size_t newsize;
 302  318          int err;
 303  319  
 304  320          /*
 305  321           * Serialize grow operations on an address space.
 306  322           * This also serves as the lock protecting p_stksize
 307  323           * and p_stkpageszc.
 308  324           */
 309  325          as_rangelock(as);
 310  326          if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
 311  327                  err = grow_lpg(sp);
 312  328          } else {
 313  329                  err = grow_internal(sp, p->p_stkpageszc);
 314  330          }
 315  331          as_rangeunlock(as);
 316  332  
 317  333          if (err == 0 && (newsize = p->p_stksize) > oldsize) {
 318  334                  ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
 319  335                  ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
 320  336                  /*
 321  337                   * Set up translations so the process doesn't have to fault in
 322  338                   * the stack pages we just gave it.
 323  339                   */
 324  340                  (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
 325  341                      newsize - oldsize, F_INVAL, S_WRITE);
 326  342          }
 327  343          return ((err == 0 ? 1 : 0));
 328  344  }
 329  345  
 330  346  /*
 331  347   * Algorithm: call arch-specific map_pgsz to get best page size to use,
 332  348   * then call grow_internal().
 333  349   * Returns 0 on success.
 334  350   */
 335  351  static int
 336  352  grow_lpg(caddr_t sp)
 337  353  {
 338  354          struct proc *p = curproc;
 339  355          size_t pgsz;
 340  356          size_t len, newsize;
 341  357          caddr_t addr, saddr;
 342  358          caddr_t growend;
 343  359          int oszc, szc;
 344  360          int err;
 345  361  
 346  362          newsize = p->p_usrstack - sp;
 347  363  
 348  364          oszc = p->p_stkpageszc;
 349  365          pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
 350  366          szc = page_szc(pgsz);
 351  367  
 352  368          /*
 353  369           * Covers two cases:
 354  370           * 1. page_szc() returns -1 for invalid page size, so we want to
 355  371           * ignore it in that case.
 356  372           * 2. By design we never decrease page size, as it is more stable.
 357  373           * This shouldn't happen as the stack never shrinks.
 358  374           */
 359  375          if (szc <= oszc) {
 360  376                  err = grow_internal(sp, oszc);
 361  377                  /* failed, fall back to base page size */
 362  378                  if (err != 0 && oszc != 0) {
 363  379                          err = grow_internal(sp, 0);
 364  380                  }
 365  381                  return (err);
 366  382          }
 367  383  
 368  384          /*
 369  385           * We've grown sufficiently to switch to a new page size.
 370  386           * So we are going to remap the whole segment with the new page size.
 371  387           */
 372  388          err = grow_internal(sp, szc);
 373  389          /* The grow with szc failed, so fall back to base page size. */
 374  390          if (err != 0) {
 375  391                  if (szc != 0) {
 376  392                          err = grow_internal(sp, 0);
 377  393                  }
 378  394                  return (err);
 379  395          }
 380  396  
 381  397          /*
 382  398           * Round up stack pointer to a large page boundary and remap
 383  399           * any pgsz pages in the segment already faulted in beyond that
 384  400           * point.
 385  401           */
 386  402          saddr = p->p_usrstack - p->p_stksize;
 387  403          addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
 388  404          growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
 389  405          len = growend - addr;
 390  406          /* Check that len is not negative. Update page size code for stack. */
 391  407          if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
 392  408                  (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
 393  409                  p->p_stkpageszc = szc;
 394  410          }
 395  411  
 396  412          ASSERT(err == 0);
 397  413          return (err);           /* should always be 0 */
 398  414  }
 399  415  
 400  416  /*
 401  417   * This routine assumes that the stack grows downward.
 402  418   * Returns 0 on success, errno on failure.
 403  419   */
 404  420  int
 405  421  grow_internal(caddr_t sp, uint_t growszc)
 406  422  {
 407  423          struct proc *p = curproc;
 408  424          size_t newsize;
 409  425          size_t oldsize;
 410  426          int    error;
 411  427          size_t pgsz;
 412  428          uint_t szc;
 413  429          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
 414  430  
 415  431          ASSERT(sp < p->p_usrstack);
 416  432          sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
 417  433  
 418  434          /*
 419  435           * grow to growszc alignment but use current p->p_stkpageszc for
 420  436           * the segvn_crargs szc passed to segvn_create. For memcntl to
 421  437           * increase the szc, this allows the new extension segment to be
 422  438           * concatenated successfully with the existing stack segment.
 423  439           */
 424  440          if ((szc = growszc) != 0) {
 425  441                  pgsz = page_get_pagesize(szc);
 426  442                  ASSERT(pgsz > PAGESIZE);
 427  443                  newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
 428  444                  if (newsize > (size_t)p->p_stk_ctl) {
 429  445                          szc = 0;
 430  446                          pgsz = PAGESIZE;
 431  447                          newsize = p->p_usrstack - sp;
 432  448                  }
 433  449          } else {
 434  450                  pgsz = PAGESIZE;
 435  451                  newsize = p->p_usrstack - sp;
 436  452          }
 437  453  
 438  454          if (newsize > (size_t)p->p_stk_ctl) {
 439  455                  (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
 440  456                      RCA_UNSAFE_ALL);
 441  457  
 442  458                  return (ENOMEM);
 443  459          }
 444  460  
 445  461          oldsize = p->p_stksize;
 446  462          ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
 447  463  
 448  464          if (newsize <= oldsize) {       /* prevent the stack from shrinking */
 449  465                  return (0);
 450  466          }
 451  467  
 452  468          if (!(p->p_stkprot & PROT_EXEC)) {
 453  469                  crargs.prot &= ~PROT_EXEC;
 454  470          }
 455  471          /*
 456  472           * extend stack with the proposed new growszc, which is different
 457  473           * than p_stkpageszc only on a memcntl to increase the stack pagesize.
 458  474           * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
 459  475           * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
 460  476           * if not aligned to szc's pgsz.
 461  477           */
 462  478          if (szc > 0) {
 463  479                  caddr_t oldsp = p->p_usrstack - oldsize;
 464  480                  caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
 465  481                      pgsz);
 466  482  
 467  483                  if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
 468  484                          crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
 469  485                              AS_MAP_NO_LPOOB;
 470  486                  } else if (oldsp == austk) {
 471  487                          crargs.szc = szc;
 472  488                  } else {
 473  489                          crargs.szc = AS_MAP_STACK;
 474  490                  }
 475  491          } else {
 476  492                  crargs.szc = AS_MAP_NO_LPOOB;
 477  493          }
 478  494          crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
 479  495  
 480  496          if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
 481  497              segvn_create, &crargs)) != 0) {
 482  498                  if (error == EAGAIN) {

↓ open down ↓

393 lines elided

↑ open up ↑

 483  499                          cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
 484  500                              "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
 485  501                  }
 486  502                  return (error);
 487  503          }
 488  504          p->p_stksize = newsize;
 489  505          return (0);
 490  506  }
 491  507  
 492  508  /*
 493      - * Find address for user to map.
 494      - * If MAP_FIXED is not specified, we can pick any address we want, but we will
 495      - * first try the value in *addrp if it is non-NULL.  Thus this is implementing
 496      - * a way to try and get a preferred address.
      509 + * Find address for user to map.  If MAP_FIXED is not specified, we can pick
      510 + * any address we want, but we will first try the value in *addrp if it is
      511 + * non-NULL and _MAP_RANDOMIZE is not set.  Thus this is implementing a way to
      512 + * try and get a preferred address.
 497  513   */
 498  514  int
 499  515  choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
 500  516      int vacalign, uint_t flags)
 501  517  {
 502  518          caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
 503  519          size_t lenp = len;
 504  520  
 505  521          ASSERT(AS_ISCLAIMGAP(as));      /* searches should be serialized */
 506  522          if (flags & MAP_FIXED) {
 507  523                  (void) as_unmap(as, *addrp, len);
 508  524                  return (0);
 509      -        } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
      525 +        } else if (basep != NULL &&
      526 +            ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
 510  527              !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
 511  528                  /* User supplied address was available */
 512  529                  *addrp = basep;
 513  530          } else {
 514  531                  /*
 515  532                   * No user supplied address or the address supplied was not
 516  533                   * available.
 517  534                   */
 518  535                  map_addr(addrp, len, off, vacalign, flags);
 519  536          }

 520  537          if (*addrp == NULL)
 521  538                  return (ENOMEM);
 522  539          return (0);
 523  540  }
 524  541  
 525  542  
 526  543  /*
 527  544   * Used for MAP_ANON - fast way to get anonymous pages
 528  545   */
 529  546  static int
 530  547  zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
 531  548      offset_t pos)
 532  549  {
 533  550          struct segvn_crargs vn_a;
 534  551          int error;
 535  552  
 536  553          if (((PROT_ALL & uprot) != uprot))
 537  554                  return (EACCES);
 538  555  
 539  556          if ((flags & MAP_FIXED) != 0) {
 540  557                  caddr_t userlimit;
 541  558  
 542  559                  /*
 543  560                   * Use the user address.  First verify that
 544  561                   * the address to be used is page aligned.
 545  562                   * Then make some simple bounds checks.
 546  563                   */
 547  564                  if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
 548  565                          return (EINVAL);
 549  566  
 550  567                  userlimit = flags & _MAP_LOW32 ?
 551  568                      (caddr_t)USERLIMIT32 : as->a_userlimit;
 552  569                  switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
 553  570                  case RANGE_OKAY:
 554  571                          break;
 555  572                  case RANGE_BADPROT:
 556  573                          return (ENOTSUP);
 557  574                  case RANGE_BADADDR:
 558  575                  default:
 559  576                          return (ENOMEM);
 560  577                  }
 561  578          }
 562  579          /*
 563  580           * No need to worry about vac alignment for anonymous
 564  581           * pages since this is a "clone" object that doesn't
 565  582           * yet exist.
 566  583           */
 567  584          error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
 568  585          if (error != 0) {
 569  586                  return (error);
 570  587          }
 571  588  
 572  589          /*
 573  590           * Use the seg_vn segment driver; passing in the NULL amp
 574  591           * gives the desired "cloning" effect.
 575  592           */
 576  593          vn_a.vp = NULL;
 577  594          vn_a.offset = 0;
 578  595          vn_a.type = flags & MAP_TYPE;
 579  596          vn_a.prot = uprot;

↓ open down ↓

60 lines elided

↑ open up ↑

 580  597          vn_a.maxprot = PROT_ALL;
 581  598          vn_a.flags = flags & ~MAP_TYPE;
 582  599          vn_a.cred = CRED();
 583  600          vn_a.amp = NULL;
 584  601          vn_a.szc = 0;
 585  602          vn_a.lgrp_mem_policy_flags = 0;
 586  603  
 587  604          return (as_map(as, *addrp, len, segvn_create, &vn_a));
 588  605  }
 589  606  
      607 +#define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \
      608 +        !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint))
      609 +
 590  610  static int
 591  611  smmap_common(caddr_t *addrp, size_t len,
 592  612      int prot, int flags, struct file *fp, offset_t pos)
 593  613  {
 594  614          struct vnode *vp;
 595  615          struct as *as = curproc->p_as;
 596  616          uint_t uprot, maxprot, type;
 597  617          int error;
 598  618          int in_crit = 0;
 599  619

 600  620          if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
 601  621              _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
 602  622              MAP_TEXT | MAP_INITDATA)) != 0) {
 603  623                  /* | MAP_RENAME */      /* not implemented, let user know */
 604  624                  return (EINVAL);

↓ open down ↓

5 lines elided

↑ open up ↑

 605  625          }
 606  626  
 607  627          if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
 608  628                  return (EINVAL);
 609  629          }
 610  630  
 611  631          if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
 612  632                  return (EINVAL);
 613  633          }
 614  634  
      635 +        if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
      636 +            (MAP_FIXED | _MAP_RANDOMIZE)) {
      637 +                return (EINVAL);
      638 +        }
      639 +
      640 +        /*
      641 +         * If it's not a fixed allocation and mmap ASLR is enabled, randomize
      642 +         * it.
      643 +         */
      644 +        if (RANDOMIZABLE_MAPPING(*addrp, flags) &&
      645 +            secflag_enabled(curproc, PROC_SEC_ASLR))
      646 +                flags |= _MAP_RANDOMIZE;
      647 +
 615  648  #if defined(__sparc)
 616  649          /*
 617  650           * See if this is an "old mmap call".  If so, remember this
 618  651           * fact and convert the flags value given to mmap to indicate
 619  652           * the specified address in the system call must be used.
 620  653           * _MAP_NEW is turned set by all new uses of mmap.
 621  654           */
 622  655          if ((flags & _MAP_NEW) == 0)
 623  656                  flags |= MAP_FIXED;
 624  657  #endif
 625  658          flags &= ~_MAP_NEW;
 626  659  
 627  660          type = flags & MAP_TYPE;
 628  661          if (type != MAP_PRIVATE && type != MAP_SHARED)
 629  662                  return (EINVAL);
 630  663  
 631  664  
 632  665          if (flags & MAP_ALIGN) {
 633      -
 634  666                  if (flags & MAP_FIXED)
 635  667                          return (EINVAL);
 636  668  
 637  669                  /* alignment needs to be a power of 2 >= page size */
 638  670                  if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
 639  671                      !ISP2((uintptr_t)*addrp))
 640  672                          return (EINVAL);
 641  673          }
 642  674          /*
 643  675           * Check for bad lengths and file position.

 644  676           * We let the VOP_MAP routine check for negative lengths
 645  677           * since on some vnode types this might be appropriate.
 646  678           */
 647  679          if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
 648  680                  return (EINVAL);
 649  681  
 650  682          maxprot = PROT_ALL;             /* start out allowing all accesses */
 651  683          uprot = prot | PROT_USER;
 652  684  
 653  685          if (fp == NULL) {
 654  686                  ASSERT(flags & MAP_ANON);
 655  687                  /* discard lwpchan mappings, like munmap() */
 656  688                  if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
 657  689                          lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
 658  690                  as_rangelock(as);
 659  691                  error = zmap(as, addrp, len, uprot, flags, pos);
 660  692                  as_rangeunlock(as);
 661  693                  /*
 662  694                   * Tell machine specific code that lwp has mapped shared memory
 663  695                   */
 664  696                  if (error == 0 && (flags & MAP_SHARED)) {
 665  697                          /* EMPTY */
 666  698                          LWP_MMODEL_SHARED_AS(*addrp, len);
 667  699                  }
 668  700                  return (error);
 669  701          } else if ((flags & MAP_ANON) != 0)
 670  702                  return (EINVAL);
 671  703  
 672  704          vp = fp->f_vnode;
 673  705  
 674  706          /* Can't execute code from "noexec" mounted filesystem. */
 675  707          if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
 676  708                  maxprot &= ~PROT_EXEC;
 677  709  
 678  710          /*
 679  711           * These checks were added as part of large files.
 680  712           *
 681  713           * Return ENXIO if the initial position is negative; return EOVERFLOW
 682  714           * if (offset + len) would overflow the maximum allowed offset for the
 683  715           * type of file descriptor being used.
 684  716           */
 685  717          if (vp->v_type == VREG) {
 686  718                  if (pos < 0)
 687  719                          return (ENXIO);
 688  720                  if ((offset_t)len > (OFFSET_MAX(fp) - pos))
 689  721                          return (EOVERFLOW);
 690  722          }
 691  723  
 692  724          if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
 693  725                  /* no write access allowed */
 694  726                  maxprot &= ~PROT_WRITE;
 695  727          }
 696  728  
 697  729          /*
 698  730           * XXX - Do we also adjust maxprot based on protections
 699  731           * of the vnode?  E.g. if no execute permission is given
 700  732           * on the vnode for the current user, maxprot probably
 701  733           * should disallow PROT_EXEC also?  This is different
 702  734           * from the write access as this would be a per vnode
 703  735           * test as opposed to a per fd test for writability.
 704  736           */
 705  737  
 706  738          /*
 707  739           * Verify that the specified protections are not greater than
 708  740           * the maximum allowable protections.  Also test to make sure
 709  741           * that the file descriptor does allows for read access since
 710  742           * "write only" mappings are hard to do since normally we do
 711  743           * the read from the file before the page can be written.
 712  744           */
 713  745          if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
 714  746                  return (EACCES);
 715  747  
 716  748          /*
 717  749           * If the user specified an address, do some simple checks here
 718  750           */
 719  751          if ((flags & MAP_FIXED) != 0) {
 720  752                  caddr_t userlimit;
 721  753  
 722  754                  /*
 723  755                   * Use the user address.  First verify that
 724  756                   * the address to be used is page aligned.
 725  757                   * Then make some simple bounds checks.
 726  758                   */
 727  759                  if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
 728  760                          return (EINVAL);
 729  761  
 730  762                  userlimit = flags & _MAP_LOW32 ?
 731  763                      (caddr_t)USERLIMIT32 : as->a_userlimit;
 732  764                  switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
 733  765                  case RANGE_OKAY:
 734  766                          break;
 735  767                  case RANGE_BADPROT:
 736  768                          return (ENOTSUP);
 737  769                  case RANGE_BADADDR:
 738  770                  default:
 739  771                          return (ENOMEM);
 740  772                  }
 741  773          }
 742  774  
 743  775          if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
 744  776              nbl_need_check(vp)) {
 745  777                  int svmand;
 746  778                  nbl_op_t nop;
 747  779  
 748  780                  nbl_start_crit(vp, RW_READER);
 749  781                  in_crit = 1;
 750  782                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 751  783                  if (error != 0)
 752  784                          goto done;
 753  785                  if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
 754  786                          if (prot & (PROT_READ | PROT_EXEC)) {
 755  787                                  nop = NBL_READWRITE;
 756  788                          } else {
 757  789                                  nop = NBL_WRITE;
 758  790                          }
 759  791                  } else {
 760  792                          nop = NBL_READ;
 761  793                  }
 762  794                  if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
 763  795                          error = EACCES;
 764  796                          goto done;
 765  797                  }
 766  798          }
 767  799  
 768  800          /* discard lwpchan mappings, like munmap() */
 769  801          if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
 770  802                  lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
 771  803  
 772  804          /*
 773  805           * Ok, now let the vnode map routine do its thing to set things up.
 774  806           */
 775  807          error = VOP_MAP(vp, pos, as,
 776  808              addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
 777  809  
 778  810          if (error == 0) {
 779  811                  /*
 780  812                   * Tell machine specific code that lwp has mapped shared memory
 781  813                   */
 782  814                  if (flags & MAP_SHARED) {
 783  815                          /* EMPTY */
 784  816                          LWP_MMODEL_SHARED_AS(*addrp, len);
 785  817                  }
 786  818                  if (vp->v_type == VREG &&
 787  819                      (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
 788  820                          /*
 789  821                           * Mark this as an executable vnode
 790  822                           */
 791  823                          mutex_enter(&vp->v_lock);
 792  824                          vp->v_flag |= VVMEXEC;
 793  825                          mutex_exit(&vp->v_lock);
 794  826                  }
 795  827          }
 796  828  
 797  829  done:
 798  830          if (in_crit)
 799  831                  nbl_end_crit(vp);
 800  832          return (error);
 801  833  }
 802  834  
 803  835  #ifdef _LP64
 804  836  /*
 805  837   * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
 806  838   *
 807  839   * The "large file" mmap routine mmap64(2) is also mapped to this routine
 808  840   * by the 64-bit version of libc.
 809  841   *
 810  842   * Eventually, this should be the only version, and have smmap_common()
 811  843   * folded back into it again.  Some day.
 812  844   */
 813  845  caddr_t
 814  846  smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
 815  847  {
 816  848          struct file *fp;
 817  849          int error;
 818  850  
 819  851          if (fd == -1 && (flags & MAP_ANON) != 0)
 820  852                  error = smmap_common(&addr, len, prot, flags,
 821  853                      NULL, (offset_t)pos);
 822  854          else if ((fp = getf(fd)) != NULL) {
 823  855                  error = smmap_common(&addr, len, prot, flags,
 824  856                      fp, (offset_t)pos);
 825  857                  releasef(fd);
 826  858          } else
 827  859                  error = EBADF;
 828  860  
 829  861          return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
 830  862  }
 831  863  #endif  /* _LP64 */
 832  864  
 833  865  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
 834  866  
 835  867  /*
 836  868   * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
 837  869   */
 838  870  caddr_t
 839  871  smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
 840  872  {
 841  873          struct file *fp;
 842  874          int error;
 843  875          caddr_t a = (caddr_t)(uintptr_t)addr;
 844  876  
 845  877          if (flags & _MAP_LOW32)
 846  878                  error = EINVAL;
 847  879          else if (fd == -1 && (flags & MAP_ANON) != 0)
 848  880                  error = smmap_common(&a, (size_t)len, prot,
 849  881                      flags | _MAP_LOW32, NULL, (offset_t)pos);
 850  882          else if ((fp = getf(fd)) != NULL) {
 851  883                  error = smmap_common(&a, (size_t)len, prot,
 852  884                      flags | _MAP_LOW32, fp, (offset_t)pos);
 853  885                  releasef(fd);
 854  886          } else
 855  887                  error = EBADF;
 856  888  
 857  889          ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
 858  890  
 859  891          return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
 860  892  }
 861  893  
 862  894  /*
 863  895   * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
 864  896   *
 865  897   * Now things really get ugly because we can't use the C-style
 866  898   * calling convention for more than 6 args, and 64-bit parameter
 867  899   * passing on 32-bit systems is less than clean.
 868  900   */
 869  901  
 870  902  struct mmaplf32a {
 871  903          caddr_t addr;
 872  904          size_t len;
 873  905  #ifdef _LP64
 874  906          /*
 875  907           * 32-bit contents, 64-bit cells
 876  908           */
 877  909          uint64_t prot;
 878  910          uint64_t flags;
 879  911          uint64_t fd;
 880  912          uint64_t offhi;
 881  913          uint64_t offlo;
 882  914  #else
 883  915          /*
 884  916           * 32-bit contents, 32-bit cells
 885  917           */
 886  918          uint32_t prot;
 887  919          uint32_t flags;
 888  920          uint32_t fd;
 889  921          uint32_t offhi;
 890  922          uint32_t offlo;
 891  923  #endif
 892  924  };
 893  925  
 894  926  int
 895  927  smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
 896  928  {
 897  929          struct file *fp;
 898  930          int error;
 899  931          caddr_t a = uap->addr;
 900  932          int flags = (int)uap->flags;
 901  933          int fd = (int)uap->fd;
 902  934  #ifdef _BIG_ENDIAN
 903  935          offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
 904  936  #else
 905  937          offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
 906  938  #endif
 907  939  
 908  940          if (flags & _MAP_LOW32)
 909  941                  error = EINVAL;
 910  942          else if (fd == -1 && (flags & MAP_ANON) != 0)
 911  943                  error = smmap_common(&a, uap->len, (int)uap->prot,
 912  944                      flags | _MAP_LOW32, NULL, off);
 913  945          else if ((fp = getf(fd)) != NULL) {
 914  946                  error = smmap_common(&a, uap->len, (int)uap->prot,
 915  947                      flags | _MAP_LOW32, fp, off);
 916  948                  releasef(fd);
 917  949          } else
 918  950                  error = EBADF;
 919  951  
 920  952          if (error == 0)
 921  953                  rvp->r_val1 = (uintptr_t)a;
 922  954          return (error);
 923  955  }
 924  956  
 925  957  #endif  /* _SYSCALL32_IMPL || _ILP32 */
 926  958  
 927  959  int
 928  960  munmap(caddr_t addr, size_t len)
 929  961  {
 930  962          struct proc *p = curproc;
 931  963          struct as *as = p->p_as;
 932  964  
 933  965          if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
 934  966                  return (set_errno(EINVAL));
 935  967  
 936  968          if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
 937  969                  return (set_errno(EINVAL));
 938  970  
 939  971          /*
 940  972           * Discard lwpchan mappings.
 941  973           */
 942  974          if (p->p_lcp != NULL)
 943  975                  lwpchan_delete_mapping(p, addr, addr + len);
 944  976          if (as_unmap(as, addr, len) != 0)
 945  977                  return (set_errno(EINVAL));
 946  978  
 947  979          return (0);
 948  980  }
 949  981  
 950  982  int
 951  983  mprotect(caddr_t addr, size_t len, int prot)
 952  984  {
 953  985          struct as *as = curproc->p_as;
 954  986          uint_t uprot = prot | PROT_USER;
 955  987          int error;
 956  988  
 957  989          if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
 958  990                  return (set_errno(EINVAL));
 959  991  
 960  992          switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
 961  993          case RANGE_OKAY:
 962  994                  break;
 963  995          case RANGE_BADPROT:
 964  996                  return (set_errno(ENOTSUP));
 965  997          case RANGE_BADADDR:
 966  998          default:
 967  999                  return (set_errno(ENOMEM));
 968 1000          }
 969 1001  
 970 1002          error = as_setprot(as, addr, len, uprot);
 971 1003          if (error)
 972 1004                  return (set_errno(error));
 973 1005          return (0);
 974 1006  }
 975 1007  
 976 1008  #define MC_CACHE        128                     /* internal result buffer */
 977 1009  #define MC_QUANTUM      (MC_CACHE * PAGESIZE)   /* addresses covered in loop */
 978 1010  
 979 1011  int
 980 1012  mincore(caddr_t addr, size_t len, char *vecp)
 981 1013  {
 982 1014          struct as *as = curproc->p_as;
 983 1015          caddr_t ea;                     /* end address of loop */
 984 1016          size_t rl;                      /* inner result length */
 985 1017          char vec[MC_CACHE];             /* local vector cache */
 986 1018          int error;
 987 1019          model_t model;
 988 1020          long    llen;
 989 1021  
 990 1022          model = get_udatamodel();
 991 1023          /*
 992 1024           * Validate form of address parameters.
 993 1025           */
 994 1026          if (model == DATAMODEL_NATIVE) {
 995 1027                  llen = (long)len;
 996 1028          } else {
 997 1029                  llen = (int32_t)(size32_t)len;
 998 1030          }
 999 1031          if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1000 1032                  return (set_errno(EINVAL));
1001 1033  
1002 1034          if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1003 1035                  return (set_errno(ENOMEM));
1004 1036  
1005 1037          /*
1006 1038           * Loop over subranges of interval [addr : addr + len), recovering
1007 1039           * results internally and then copying them out to caller.  Subrange
1008 1040           * is based on the size of MC_CACHE, defined above.
1009 1041           */
1010 1042          for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1011 1043                  error = as_incore(as, addr,
1012 1044                      (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1013 1045                  if (rl != 0) {
1014 1046                          rl = (rl + PAGESIZE - 1) / PAGESIZE;
1015 1047                          if (copyout(vec, vecp, rl) != 0)
1016 1048                                  return (set_errno(EFAULT));
1017 1049                          vecp += rl;
1018 1050                  }
1019 1051                  if (error != 0)
1020 1052                          return (set_errno(ENOMEM));
1021 1053          }
1022 1054          return (0);
1023 1055  }

↓ open down ↓

380 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX