il_7029-3-4 Wdiff usr/src/uts/common/os/grow.c

Print this page

sync further changes from uts/aslr

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/os/grow.c
          +++ new/usr/src/uts/common/os/grow.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
  23   23  
  24   24  /*
  25   25   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  26   26   * Use is subject to license terms.
  27   27   */
  28   28  
  29   29  /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  30   30  /*        All Rights Reserved   */
  31   31  
  32   32  #include <sys/types.h>
  33   33  #include <sys/inttypes.h>
  34   34  #include <sys/param.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/systm.h>
  37   37  #include <sys/signal.h>
  38   38  #include <sys/user.h>
  39   39  #include <sys/errno.h>
  40   40  #include <sys/var.h>
  41   41  #include <sys/proc.h>
  42   42  #include <sys/tuneable.h>
  43   43  #include <sys/debug.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/cred.h>
  46   46  #include <sys/vnode.h>
  47   47  #include <sys/vfs.h>
  48   48  #include <sys/vm.h>
  49   49  #include <sys/file.h>
  50   50  #include <sys/mman.h>
  51   51  #include <sys/vmparam.h>
  52   52  #include <sys/fcntl.h>
  53   53  #include <sys/lwpchan_impl.h>
  54   54  #include <sys/nbmlock.h>

↓ open down ↓

54 lines elided

↑ open up ↑

  55   55  
  56   56  #include <vm/hat.h>
  57   57  #include <vm/as.h>
  58   58  #include <vm/seg.h>
  59   59  #include <vm/seg_dev.h>
  60   60  #include <vm/seg_vn.h>
  61   61  
  62   62  int use_brk_lpg = 1;
  63   63  int use_stk_lpg = 1;
  64   64  
       65 +/*
       66 + * If set, we will not randomize mappings where the 'addr' argument is
       67 + * non-NULL and not an alignment.
       68 + */
       69 +int aslr_respect_mmap_hint = 0;
       70 +
  65   71  static int brk_lpg(caddr_t nva);
  66   72  static int grow_lpg(caddr_t sp);
  67   73  
  68   74  intptr_t
  69   75  brk(caddr_t nva)
  70   76  {
  71   77          int error;
  72   78          proc_t *p = curproc;
  73   79  
  74   80          /*
  75      -         * As a special case to aid the implementation of sbrk(3C), if given a
  76      -         * new brk of 0, return the current brk.  We'll hide this in brk(3C).
  77      -         */
  78      -        if (nva == 0)
  79      -                return ((intptr_t)(p->p_brkbase + p->p_brksize));
  80      -
  81      -        /*
  82   81           * Serialize brk operations on an address space.
  83   82           * This also serves as the lock protecting p_brksize
  84   83           * and p_brkpageszc.
  85   84           */
  86   85          as_rangelock(p->p_as);
       86 +
       87 +        /*
       88 +         * As a special case to aid the implementation of sbrk(3C), if given a
       89 +         * new brk of 0, return the current brk.  We'll hide this in brk(3C).
       90 +         */
       91 +        if (nva == 0) {
       92 +                as_rangeunlock(p->p_as);
       93 +                return ((intptr_t)(p->p_brkbase + p->p_brksize));
       94 +        }
       95 +
  87   96          if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
  88   97                  error = brk_lpg(nva);
  89   98          } else {
  90   99                  error = brk_internal(nva, p->p_brkpageszc);
  91  100          }
  92  101          as_rangeunlock(p->p_as);
  93  102          return ((error != 0 ? set_errno(error) : 0));
  94  103  }
  95  104  
  96  105  /*

  97  106   * Algorithm: call arch-specific map_pgsz to get best page size to use,
  98  107   * then call brk_internal().
  99  108   * Returns 0 on success.
 100  109   */
 101  110  static int
 102  111  brk_lpg(caddr_t nva)
 103  112  {
 104  113          struct proc *p = curproc;
 105  114          size_t pgsz, len;
 106  115          caddr_t addr, brkend;
 107  116          caddr_t bssbase = p->p_bssbase;
 108  117          caddr_t brkbase = p->p_brkbase;
 109  118          int oszc, szc;
 110  119          int err;
 111  120  
 112  121          oszc = p->p_brkpageszc;
 113  122  
 114  123          /*
 115  124           * If p_brkbase has not yet been set, the first call
 116  125           * to brk_internal() will initialize it.
 117  126           */
 118  127          if (brkbase == 0) {
 119  128                  return (brk_internal(nva, oszc));
 120  129          }
 121  130  
 122  131          len = nva - bssbase;
 123  132  
 124  133          pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
 125  134          szc = page_szc(pgsz);
 126  135  
 127  136          /*
 128  137           * Covers two cases:
 129  138           * 1. page_szc() returns -1 for invalid page size, so we want to
 130  139           * ignore it in that case.
 131  140           * 2. By design we never decrease page size, as it is more stable.
 132  141           */
 133  142          if (szc <= oszc) {
 134  143                  err = brk_internal(nva, oszc);
 135  144                  /* If failed, back off to base page size. */
 136  145                  if (err != 0 && oszc != 0) {
 137  146                          err = brk_internal(nva, 0);
 138  147                  }
 139  148                  return (err);
 140  149          }
 141  150  
 142  151          err = brk_internal(nva, szc);
 143  152          /* If using szc failed, map with base page size and return. */
 144  153          if (err != 0) {
 145  154                  if (szc != 0) {
 146  155                          err = brk_internal(nva, 0);
 147  156                  }
 148  157                  return (err);
 149  158          }
 150  159  
 151  160          /*
 152  161           * Round up brk base to a large page boundary and remap
 153  162           * anything in the segment already faulted in beyond that
 154  163           * point.
 155  164           */
 156  165          addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
 157  166          brkend = brkbase + p->p_brksize;
 158  167          len = brkend - addr;
 159  168          /* Check that len is not negative. Update page size code for heap. */
 160  169          if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
 161  170                  (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
 162  171                  p->p_brkpageszc = szc;
 163  172          }
 164  173  
 165  174          ASSERT(err == 0);
 166  175          return (err);           /* should always be 0 */
 167  176  }
 168  177  
 169  178  /*
 170  179   * Returns 0 on success.
 171  180   */
 172  181  int
 173  182  brk_internal(caddr_t nva, uint_t brkszc)
 174  183  {
 175  184          caddr_t ova;                    /* current break address */
 176  185          size_t size;
 177  186          int     error;
 178  187          struct proc *p = curproc;
 179  188          struct as *as = p->p_as;
 180  189          size_t pgsz;
 181  190          uint_t szc;
 182  191          rctl_qty_t as_rctl;
 183  192  
 184  193          /*
 185  194           * extend heap to brkszc alignment but use current p->p_brkpageszc
 186  195           * for the newly created segment. This allows the new extension
 187  196           * segment to be concatenated successfully with the existing brk
 188  197           * segment.
 189  198           */
 190  199          if ((szc = brkszc) != 0) {
 191  200                  pgsz = page_get_pagesize(szc);
 192  201                  ASSERT(pgsz > PAGESIZE);
 193  202          } else {
 194  203                  pgsz = PAGESIZE;
 195  204          }
 196  205  
 197  206          mutex_enter(&p->p_lock);
 198  207          as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
 199  208              p->p_rctls, p);
 200  209          mutex_exit(&p->p_lock);
 201  210  
 202  211          /*
 203  212           * If p_brkbase has not yet been set, the first call
 204  213           * to brk() will initialize it.
 205  214           */
 206  215          if (p->p_brkbase == 0)
 207  216                  p->p_brkbase = nva;
 208  217  
 209  218          /*
 210  219           * Before multiple page size support existed p_brksize was the value
 211  220           * not rounded to the pagesize (i.e. it stored the exact user request
 212  221           * for heap size). If pgsz is greater than PAGESIZE calculate the
 213  222           * heap size as the real new heap size by rounding it up to pgsz.
 214  223           * This is useful since we may want to know where the heap ends
 215  224           * without knowing heap pagesize (e.g. some old code) and also if
 216  225           * heap pagesize changes we can update p_brkpageszc but delay adding
 217  226           * new mapping yet still know from p_brksize where the heap really
 218  227           * ends. The user requested heap end is stored in libc variable.
 219  228           */
 220  229          if (pgsz > PAGESIZE) {
 221  230                  caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
 222  231                  size = tnva - p->p_brkbase;
 223  232                  if (tnva < p->p_brkbase || (size > p->p_brksize &&
 224  233                      size > (size_t)as_rctl)) {
 225  234                          szc = 0;
 226  235                          pgsz = PAGESIZE;
 227  236                          size = nva - p->p_brkbase;
 228  237                  }
 229  238          } else {
 230  239                  size = nva - p->p_brkbase;
 231  240          }
 232  241  
 233  242          /*
 234  243           * use PAGESIZE to roundup ova because we want to know the real value
 235  244           * of the current heap end in case p_brkpageszc changes since the last
 236  245           * p_brksize was computed.
 237  246           */
 238  247          nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
 239  248          ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
 240  249              PAGESIZE);
 241  250  
 242  251          if ((nva < p->p_brkbase) || (size > p->p_brksize &&
 243  252              size > as_rctl)) {
 244  253                  mutex_enter(&p->p_lock);
 245  254                  (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
 246  255                      RCA_SAFE);
 247  256                  mutex_exit(&p->p_lock);
 248  257                  return (ENOMEM);
 249  258          }
 250  259  
 251  260          if (nva > ova) {
 252  261                  struct segvn_crargs crargs =
 253  262                      SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
 254  263  
 255  264                  if (!(p->p_datprot & PROT_EXEC)) {
 256  265                          crargs.prot &= ~PROT_EXEC;
 257  266                  }
 258  267  
 259  268                  /*
 260  269                   * Add new zfod mapping to extend UNIX data segment
 261  270                   * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
 262  271                   * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
 263  272                   * page sizes if ova is not aligned to szc's pgsz.
 264  273                   */
 265  274                  if (szc > 0) {
 266  275                          caddr_t rbss;
 267  276  
 268  277                          rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
 269  278                              pgsz);
 270  279                          if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
 271  280                                  crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
 272  281                                      AS_MAP_NO_LPOOB;
 273  282                          } else if (ova == rbss) {
 274  283                                  crargs.szc = szc;
 275  284                          } else {
 276  285                                  crargs.szc = AS_MAP_HEAP;
 277  286                          }
 278  287                  } else {
 279  288                          crargs.szc = AS_MAP_NO_LPOOB;
 280  289                  }
 281  290                  crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
 282  291                  error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
 283  292                      &crargs);
 284  293                  if (error) {
 285  294                          return (error);
 286  295                  }
 287  296  
 288  297          } else if (nva < ova) {
 289  298                  /*
 290  299                   * Release mapping to shrink UNIX data segment.
 291  300                   */
 292  301                  (void) as_unmap(as, nva, (size_t)(ova - nva));
 293  302          }
 294  303          p->p_brksize = size;
 295  304          return (0);
 296  305  }
 297  306  
 298  307  /*
 299  308   * Grow the stack to include sp.  Return 1 if successful, 0 otherwise.
 300  309   * This routine assumes that the stack grows downward.
 301  310   */
 302  311  int
 303  312  grow(caddr_t sp)
 304  313  {
 305  314          struct proc *p = curproc;
 306  315          struct as *as = p->p_as;
 307  316          size_t oldsize = p->p_stksize;
 308  317          size_t newsize;
 309  318          int err;
 310  319  
 311  320          /*
 312  321           * Serialize grow operations on an address space.
 313  322           * This also serves as the lock protecting p_stksize
 314  323           * and p_stkpageszc.
 315  324           */
 316  325          as_rangelock(as);
 317  326          if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
 318  327                  err = grow_lpg(sp);
 319  328          } else {
 320  329                  err = grow_internal(sp, p->p_stkpageszc);
 321  330          }
 322  331          as_rangeunlock(as);
 323  332  
 324  333          if (err == 0 && (newsize = p->p_stksize) > oldsize) {
 325  334                  ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
 326  335                  ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
 327  336                  /*
 328  337                   * Set up translations so the process doesn't have to fault in
 329  338                   * the stack pages we just gave it.
 330  339                   */
 331  340                  (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
 332  341                      newsize - oldsize, F_INVAL, S_WRITE);
 333  342          }
 334  343          return ((err == 0 ? 1 : 0));
 335  344  }
 336  345  
 337  346  /*
 338  347   * Algorithm: call arch-specific map_pgsz to get best page size to use,
 339  348   * then call grow_internal().
 340  349   * Returns 0 on success.
 341  350   */
 342  351  static int
 343  352  grow_lpg(caddr_t sp)
 344  353  {
 345  354          struct proc *p = curproc;
 346  355          size_t pgsz;
 347  356          size_t len, newsize;
 348  357          caddr_t addr, saddr;
 349  358          caddr_t growend;
 350  359          int oszc, szc;
 351  360          int err;
 352  361  
 353  362          newsize = p->p_usrstack - sp;
 354  363  
 355  364          oszc = p->p_stkpageszc;
 356  365          pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
 357  366          szc = page_szc(pgsz);
 358  367  
 359  368          /*
 360  369           * Covers two cases:
 361  370           * 1. page_szc() returns -1 for invalid page size, so we want to
 362  371           * ignore it in that case.
 363  372           * 2. By design we never decrease page size, as it is more stable.
 364  373           * This shouldn't happen as the stack never shrinks.
 365  374           */
 366  375          if (szc <= oszc) {
 367  376                  err = grow_internal(sp, oszc);
 368  377                  /* failed, fall back to base page size */
 369  378                  if (err != 0 && oszc != 0) {
 370  379                          err = grow_internal(sp, 0);
 371  380                  }
 372  381                  return (err);
 373  382          }
 374  383  
 375  384          /*
 376  385           * We've grown sufficiently to switch to a new page size.
 377  386           * So we are going to remap the whole segment with the new page size.
 378  387           */
 379  388          err = grow_internal(sp, szc);
 380  389          /* The grow with szc failed, so fall back to base page size. */
 381  390          if (err != 0) {
 382  391                  if (szc != 0) {
 383  392                          err = grow_internal(sp, 0);
 384  393                  }
 385  394                  return (err);
 386  395          }
 387  396  
 388  397          /*
 389  398           * Round up stack pointer to a large page boundary and remap
 390  399           * any pgsz pages in the segment already faulted in beyond that
 391  400           * point.
 392  401           */
 393  402          saddr = p->p_usrstack - p->p_stksize;
 394  403          addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
 395  404          growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
 396  405          len = growend - addr;
 397  406          /* Check that len is not negative. Update page size code for stack. */
 398  407          if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
 399  408                  (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
 400  409                  p->p_stkpageszc = szc;
 401  410          }
 402  411  
 403  412          ASSERT(err == 0);
 404  413          return (err);           /* should always be 0 */
 405  414  }
 406  415  
 407  416  /*
 408  417   * This routine assumes that the stack grows downward.
 409  418   * Returns 0 on success, errno on failure.
 410  419   */
 411  420  int
 412  421  grow_internal(caddr_t sp, uint_t growszc)
 413  422  {
 414  423          struct proc *p = curproc;
 415  424          size_t newsize;
 416  425          size_t oldsize;
 417  426          int    error;
 418  427          size_t pgsz;
 419  428          uint_t szc;
 420  429          struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
 421  430  
 422  431          ASSERT(sp < p->p_usrstack);
 423  432          sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
 424  433  
 425  434          /*
 426  435           * grow to growszc alignment but use current p->p_stkpageszc for
 427  436           * the segvn_crargs szc passed to segvn_create. For memcntl to
 428  437           * increase the szc, this allows the new extension segment to be
 429  438           * concatenated successfully with the existing stack segment.
 430  439           */
 431  440          if ((szc = growszc) != 0) {
 432  441                  pgsz = page_get_pagesize(szc);
 433  442                  ASSERT(pgsz > PAGESIZE);
 434  443                  newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
 435  444                  if (newsize > (size_t)p->p_stk_ctl) {
 436  445                          szc = 0;
 437  446                          pgsz = PAGESIZE;
 438  447                          newsize = p->p_usrstack - sp;
 439  448                  }
 440  449          } else {
 441  450                  pgsz = PAGESIZE;
 442  451                  newsize = p->p_usrstack - sp;
 443  452          }
 444  453  
 445  454          if (newsize > (size_t)p->p_stk_ctl) {
 446  455                  (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
 447  456                      RCA_UNSAFE_ALL);
 448  457  
 449  458                  return (ENOMEM);
 450  459          }
 451  460  
 452  461          oldsize = p->p_stksize;
 453  462          ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
 454  463  
 455  464          if (newsize <= oldsize) {       /* prevent the stack from shrinking */
 456  465                  return (0);
 457  466          }
 458  467  
 459  468          if (!(p->p_stkprot & PROT_EXEC)) {
 460  469                  crargs.prot &= ~PROT_EXEC;
 461  470          }
 462  471          /*
 463  472           * extend stack with the proposed new growszc, which is different
 464  473           * than p_stkpageszc only on a memcntl to increase the stack pagesize.
 465  474           * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
 466  475           * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
 467  476           * if not aligned to szc's pgsz.
 468  477           */
 469  478          if (szc > 0) {
 470  479                  caddr_t oldsp = p->p_usrstack - oldsize;
 471  480                  caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
 472  481                      pgsz);
 473  482  
 474  483                  if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
 475  484                          crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
 476  485                              AS_MAP_NO_LPOOB;
 477  486                  } else if (oldsp == austk) {
 478  487                          crargs.szc = szc;
 479  488                  } else {
 480  489                          crargs.szc = AS_MAP_STACK;
 481  490                  }
 482  491          } else {
 483  492                  crargs.szc = AS_MAP_NO_LPOOB;
 484  493          }
 485  494          crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
 486  495  
 487  496          if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
 488  497              segvn_create, &crargs)) != 0) {
 489  498                  if (error == EAGAIN) {
 490  499                          cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
 491  500                              "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
 492  501                  }
 493  502                  return (error);
 494  503          }
 495  504          p->p_stksize = newsize;
 496  505          return (0);
 497  506  }
 498  507  
 499  508  /*
 500  509   * Find address for user to map.  If MAP_FIXED is not specified, we can pick
 501  510   * any address we want, but we will first try the value in *addrp if it is
 502  511   * non-NULL and _MAP_RANDOMIZE is not set.  Thus this is implementing a way to
 503  512   * try and get a preferred address.
 504  513   */
 505  514  int
 506  515  choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
 507  516      int vacalign, uint_t flags)
 508  517  {
 509  518          caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
 510  519          size_t lenp = len;
 511  520  
 512  521          ASSERT(AS_ISCLAIMGAP(as));      /* searches should be serialized */
 513  522          if (flags & MAP_FIXED) {
 514  523                  (void) as_unmap(as, *addrp, len);
 515  524                  return (0);
 516  525          } else if (basep != NULL &&
 517  526              ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
 518  527              !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
 519  528                  /* User supplied address was available */
 520  529                  *addrp = basep;
 521  530          } else {
 522  531                  /*
 523  532                   * No user supplied address or the address supplied was not
 524  533                   * available.
 525  534                   */
 526  535                  map_addr(addrp, len, off, vacalign, flags);
 527  536          }
 528  537          if (*addrp == NULL)
 529  538                  return (ENOMEM);
 530  539          return (0);
 531  540  }
 532  541  
 533  542  
 534  543  /*
 535  544   * Used for MAP_ANON - fast way to get anonymous pages
 536  545   */
 537  546  static int
 538  547  zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
 539  548      offset_t pos)
 540  549  {
 541  550          struct segvn_crargs vn_a;
 542  551          int error;
 543  552  
 544  553          if (((PROT_ALL & uprot) != uprot))
 545  554                  return (EACCES);
 546  555  
 547  556          if ((flags & MAP_FIXED) != 0) {
 548  557                  caddr_t userlimit;
 549  558  
 550  559                  /*
 551  560                   * Use the user address.  First verify that
 552  561                   * the address to be used is page aligned.
 553  562                   * Then make some simple bounds checks.
 554  563                   */
 555  564                  if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
 556  565                          return (EINVAL);
 557  566  
 558  567                  userlimit = flags & _MAP_LOW32 ?
 559  568                      (caddr_t)USERLIMIT32 : as->a_userlimit;
 560  569                  switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
 561  570                  case RANGE_OKAY:
 562  571                          break;
 563  572                  case RANGE_BADPROT:
 564  573                          return (ENOTSUP);
 565  574                  case RANGE_BADADDR:
 566  575                  default:
 567  576                          return (ENOMEM);
 568  577                  }
 569  578          }
 570  579          /*
 571  580           * No need to worry about vac alignment for anonymous
 572  581           * pages since this is a "clone" object that doesn't
 573  582           * yet exist.
 574  583           */
 575  584          error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
 576  585          if (error != 0) {
 577  586                  return (error);
 578  587          }
 579  588  
 580  589          /*
 581  590           * Use the seg_vn segment driver; passing in the NULL amp
 582  591           * gives the desired "cloning" effect.
 583  592           */
 584  593          vn_a.vp = NULL;
 585  594          vn_a.offset = 0;
 586  595          vn_a.type = flags & MAP_TYPE;
 587  596          vn_a.prot = uprot;

↓ open down ↓

491 lines elided

↑ open up ↑

 588  597          vn_a.maxprot = PROT_ALL;
 589  598          vn_a.flags = flags & ~MAP_TYPE;
 590  599          vn_a.cred = CRED();
 591  600          vn_a.amp = NULL;
 592  601          vn_a.szc = 0;
 593  602          vn_a.lgrp_mem_policy_flags = 0;
 594  603  
 595  604          return (as_map(as, *addrp, len, segvn_create, &vn_a));
 596  605  }
 597  606  
      607 +#define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \
      608 +        !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint))
      609 +
 598  610  static int
 599  611  smmap_common(caddr_t *addrp, size_t len,
 600  612      int prot, int flags, struct file *fp, offset_t pos)
 601  613  {
 602  614          struct vnode *vp;
 603  615          struct as *as = curproc->p_as;
 604  616          uint_t uprot, maxprot, type;
 605  617          int error;
 606  618          int in_crit = 0;
 607  619

 608  620          if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
 609  621              _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
 610  622              MAP_TEXT | MAP_INITDATA)) != 0) {
 611  623                  /* | MAP_RENAME */      /* not implemented, let user know */
 612  624                  return (EINVAL);
 613  625          }
 614  626  
 615  627          if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
 616  628                  return (EINVAL);
 617  629          }
 618  630  
 619  631          if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
 620  632                  return (EINVAL);
 621  633          }

↓ open down ↓

14 lines elided

↑ open up ↑

 622  634  
 623  635          if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
 624  636              (MAP_FIXED | _MAP_RANDOMIZE)) {
 625  637                  return (EINVAL);
 626  638          }
 627  639  
 628  640          /*
 629  641           * If it's not a fixed allocation and mmap ASLR is enabled, randomize
 630  642           * it.
 631  643           */
 632      -        if (((flags & MAP_FIXED) == 0) &&
      644 +        if (RANDOMIZABLE_MAPPING(*addrp, flags) &&
 633  645              secflag_enabled(curproc, PROC_SEC_ASLR))
 634  646                  flags |= _MAP_RANDOMIZE;
 635  647  
 636  648  #if defined(__sparc)
 637  649          /*
 638  650           * See if this is an "old mmap call".  If so, remember this
 639  651           * fact and convert the flags value given to mmap to indicate
 640  652           * the specified address in the system call must be used.
 641  653           * _MAP_NEW is turned set by all new uses of mmap.
 642  654           */

 643  655          if ((flags & _MAP_NEW) == 0)
 644  656                  flags |= MAP_FIXED;
 645  657  #endif
 646  658          flags &= ~_MAP_NEW;
 647  659  
 648  660          type = flags & MAP_TYPE;
 649  661          if (type != MAP_PRIVATE && type != MAP_SHARED)
 650  662                  return (EINVAL);
 651  663  
 652  664  
 653  665          if (flags & MAP_ALIGN) {
 654  666                  if (flags & MAP_FIXED)
 655  667                          return (EINVAL);
 656  668  
 657  669                  /* alignment needs to be a power of 2 >= page size */
 658  670                  if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
 659  671                      !ISP2((uintptr_t)*addrp))
 660  672                          return (EINVAL);
 661  673          }
 662  674          /*
 663  675           * Check for bad lengths and file position.
 664  676           * We let the VOP_MAP routine check for negative lengths
 665  677           * since on some vnode types this might be appropriate.
 666  678           */
 667  679          if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
 668  680                  return (EINVAL);
 669  681  
 670  682          maxprot = PROT_ALL;             /* start out allowing all accesses */
 671  683          uprot = prot | PROT_USER;
 672  684  
 673  685          if (fp == NULL) {
 674  686                  ASSERT(flags & MAP_ANON);
 675  687                  /* discard lwpchan mappings, like munmap() */
 676  688                  if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
 677  689                          lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
 678  690                  as_rangelock(as);
 679  691                  error = zmap(as, addrp, len, uprot, flags, pos);
 680  692                  as_rangeunlock(as);
 681  693                  /*
 682  694                   * Tell machine specific code that lwp has mapped shared memory
 683  695                   */
 684  696                  if (error == 0 && (flags & MAP_SHARED)) {
 685  697                          /* EMPTY */
 686  698                          LWP_MMODEL_SHARED_AS(*addrp, len);
 687  699                  }
 688  700                  return (error);
 689  701          } else if ((flags & MAP_ANON) != 0)
 690  702                  return (EINVAL);
 691  703  
 692  704          vp = fp->f_vnode;
 693  705  
 694  706          /* Can't execute code from "noexec" mounted filesystem. */
 695  707          if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
 696  708                  maxprot &= ~PROT_EXEC;
 697  709  
 698  710          /*
 699  711           * These checks were added as part of large files.
 700  712           *
 701  713           * Return ENXIO if the initial position is negative; return EOVERFLOW
 702  714           * if (offset + len) would overflow the maximum allowed offset for the
 703  715           * type of file descriptor being used.
 704  716           */
 705  717          if (vp->v_type == VREG) {
 706  718                  if (pos < 0)
 707  719                          return (ENXIO);
 708  720                  if ((offset_t)len > (OFFSET_MAX(fp) - pos))
 709  721                          return (EOVERFLOW);
 710  722          }
 711  723  
 712  724          if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
 713  725                  /* no write access allowed */
 714  726                  maxprot &= ~PROT_WRITE;
 715  727          }
 716  728  
 717  729          /*
 718  730           * XXX - Do we also adjust maxprot based on protections
 719  731           * of the vnode?  E.g. if no execute permission is given
 720  732           * on the vnode for the current user, maxprot probably
 721  733           * should disallow PROT_EXEC also?  This is different
 722  734           * from the write access as this would be a per vnode
 723  735           * test as opposed to a per fd test for writability.
 724  736           */
 725  737  
 726  738          /*
 727  739           * Verify that the specified protections are not greater than
 728  740           * the maximum allowable protections.  Also test to make sure
 729  741           * that the file descriptor does allows for read access since
 730  742           * "write only" mappings are hard to do since normally we do
 731  743           * the read from the file before the page can be written.
 732  744           */
 733  745          if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
 734  746                  return (EACCES);
 735  747  
 736  748          /*
 737  749           * If the user specified an address, do some simple checks here
 738  750           */
 739  751          if ((flags & MAP_FIXED) != 0) {
 740  752                  caddr_t userlimit;
 741  753  
 742  754                  /*
 743  755                   * Use the user address.  First verify that
 744  756                   * the address to be used is page aligned.
 745  757                   * Then make some simple bounds checks.
 746  758                   */
 747  759                  if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
 748  760                          return (EINVAL);
 749  761  
 750  762                  userlimit = flags & _MAP_LOW32 ?
 751  763                      (caddr_t)USERLIMIT32 : as->a_userlimit;
 752  764                  switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
 753  765                  case RANGE_OKAY:
 754  766                          break;
 755  767                  case RANGE_BADPROT:
 756  768                          return (ENOTSUP);
 757  769                  case RANGE_BADADDR:
 758  770                  default:
 759  771                          return (ENOMEM);
 760  772                  }
 761  773          }
 762  774  
 763  775          if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
 764  776              nbl_need_check(vp)) {
 765  777                  int svmand;
 766  778                  nbl_op_t nop;
 767  779  
 768  780                  nbl_start_crit(vp, RW_READER);
 769  781                  in_crit = 1;
 770  782                  error = nbl_svmand(vp, fp->f_cred, &svmand);
 771  783                  if (error != 0)
 772  784                          goto done;
 773  785                  if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
 774  786                          if (prot & (PROT_READ | PROT_EXEC)) {
 775  787                                  nop = NBL_READWRITE;
 776  788                          } else {
 777  789                                  nop = NBL_WRITE;
 778  790                          }
 779  791                  } else {
 780  792                          nop = NBL_READ;
 781  793                  }
 782  794                  if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
 783  795                          error = EACCES;
 784  796                          goto done;
 785  797                  }
 786  798          }
 787  799  
 788  800          /* discard lwpchan mappings, like munmap() */
 789  801          if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
 790  802                  lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
 791  803  
 792  804          /*
 793  805           * Ok, now let the vnode map routine do its thing to set things up.
 794  806           */
 795  807          error = VOP_MAP(vp, pos, as,
 796  808              addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
 797  809  
 798  810          if (error == 0) {
 799  811                  /*
 800  812                   * Tell machine specific code that lwp has mapped shared memory
 801  813                   */
 802  814                  if (flags & MAP_SHARED) {
 803  815                          /* EMPTY */
 804  816                          LWP_MMODEL_SHARED_AS(*addrp, len);
 805  817                  }
 806  818                  if (vp->v_type == VREG &&
 807  819                      (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
 808  820                          /*
 809  821                           * Mark this as an executable vnode
 810  822                           */
 811  823                          mutex_enter(&vp->v_lock);
 812  824                          vp->v_flag |= VVMEXEC;
 813  825                          mutex_exit(&vp->v_lock);
 814  826                  }
 815  827          }
 816  828  
 817  829  done:
 818  830          if (in_crit)
 819  831                  nbl_end_crit(vp);
 820  832          return (error);
 821  833  }
 822  834  
 823  835  #ifdef _LP64
 824  836  /*
 825  837   * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
 826  838   *
 827  839   * The "large file" mmap routine mmap64(2) is also mapped to this routine
 828  840   * by the 64-bit version of libc.
 829  841   *
 830  842   * Eventually, this should be the only version, and have smmap_common()
 831  843   * folded back into it again.  Some day.
 832  844   */
 833  845  caddr_t
 834  846  smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
 835  847  {
 836  848          struct file *fp;
 837  849          int error;
 838  850  
 839  851          if (fd == -1 && (flags & MAP_ANON) != 0)
 840  852                  error = smmap_common(&addr, len, prot, flags,
 841  853                      NULL, (offset_t)pos);
 842  854          else if ((fp = getf(fd)) != NULL) {
 843  855                  error = smmap_common(&addr, len, prot, flags,
 844  856                      fp, (offset_t)pos);
 845  857                  releasef(fd);
 846  858          } else
 847  859                  error = EBADF;
 848  860  
 849  861          return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
 850  862  }
 851  863  #endif  /* _LP64 */
 852  864  
 853  865  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
 854  866  
 855  867  /*
 856  868   * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
 857  869   */
 858  870  caddr_t
 859  871  smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
 860  872  {
 861  873          struct file *fp;
 862  874          int error;
 863  875          caddr_t a = (caddr_t)(uintptr_t)addr;
 864  876  
 865  877          if (flags & _MAP_LOW32)
 866  878                  error = EINVAL;
 867  879          else if (fd == -1 && (flags & MAP_ANON) != 0)
 868  880                  error = smmap_common(&a, (size_t)len, prot,
 869  881                      flags | _MAP_LOW32, NULL, (offset_t)pos);
 870  882          else if ((fp = getf(fd)) != NULL) {
 871  883                  error = smmap_common(&a, (size_t)len, prot,
 872  884                      flags | _MAP_LOW32, fp, (offset_t)pos);
 873  885                  releasef(fd);
 874  886          } else
 875  887                  error = EBADF;
 876  888  
 877  889          ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
 878  890  
 879  891          return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
 880  892  }
 881  893  
 882  894  /*
 883  895   * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
 884  896   *
 885  897   * Now things really get ugly because we can't use the C-style
 886  898   * calling convention for more than 6 args, and 64-bit parameter
 887  899   * passing on 32-bit systems is less than clean.
 888  900   */
 889  901  
 890  902  struct mmaplf32a {
 891  903          caddr_t addr;
 892  904          size_t len;
 893  905  #ifdef _LP64
 894  906          /*
 895  907           * 32-bit contents, 64-bit cells
 896  908           */
 897  909          uint64_t prot;
 898  910          uint64_t flags;
 899  911          uint64_t fd;
 900  912          uint64_t offhi;
 901  913          uint64_t offlo;
 902  914  #else
 903  915          /*
 904  916           * 32-bit contents, 32-bit cells
 905  917           */
 906  918          uint32_t prot;
 907  919          uint32_t flags;
 908  920          uint32_t fd;
 909  921          uint32_t offhi;
 910  922          uint32_t offlo;
 911  923  #endif
 912  924  };
 913  925  
 914  926  int
 915  927  smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
 916  928  {
 917  929          struct file *fp;
 918  930          int error;
 919  931          caddr_t a = uap->addr;
 920  932          int flags = (int)uap->flags;
 921  933          int fd = (int)uap->fd;
 922  934  #ifdef _BIG_ENDIAN
 923  935          offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
 924  936  #else
 925  937          offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
 926  938  #endif
 927  939  
 928  940          if (flags & _MAP_LOW32)
 929  941                  error = EINVAL;
 930  942          else if (fd == -1 && (flags & MAP_ANON) != 0)
 931  943                  error = smmap_common(&a, uap->len, (int)uap->prot,
 932  944                      flags | _MAP_LOW32, NULL, off);
 933  945          else if ((fp = getf(fd)) != NULL) {
 934  946                  error = smmap_common(&a, uap->len, (int)uap->prot,
 935  947                      flags | _MAP_LOW32, fp, off);
 936  948                  releasef(fd);
 937  949          } else
 938  950                  error = EBADF;
 939  951  
 940  952          if (error == 0)
 941  953                  rvp->r_val1 = (uintptr_t)a;
 942  954          return (error);
 943  955  }
 944  956  
 945  957  #endif  /* _SYSCALL32_IMPL || _ILP32 */
 946  958  
 947  959  int
 948  960  munmap(caddr_t addr, size_t len)
 949  961  {
 950  962          struct proc *p = curproc;
 951  963          struct as *as = p->p_as;
 952  964  
 953  965          if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
 954  966                  return (set_errno(EINVAL));
 955  967  
 956  968          if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
 957  969                  return (set_errno(EINVAL));
 958  970  
 959  971          /*
 960  972           * Discard lwpchan mappings.
 961  973           */
 962  974          if (p->p_lcp != NULL)
 963  975                  lwpchan_delete_mapping(p, addr, addr + len);
 964  976          if (as_unmap(as, addr, len) != 0)
 965  977                  return (set_errno(EINVAL));
 966  978  
 967  979          return (0);
 968  980  }
 969  981  
 970  982  int
 971  983  mprotect(caddr_t addr, size_t len, int prot)
 972  984  {
 973  985          struct as *as = curproc->p_as;
 974  986          uint_t uprot = prot | PROT_USER;
 975  987          int error;
 976  988  
 977  989          if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
 978  990                  return (set_errno(EINVAL));
 979  991  
 980  992          switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
 981  993          case RANGE_OKAY:
 982  994                  break;
 983  995          case RANGE_BADPROT:
 984  996                  return (set_errno(ENOTSUP));
 985  997          case RANGE_BADADDR:
 986  998          default:
 987  999                  return (set_errno(ENOMEM));
 988 1000          }
 989 1001  
 990 1002          error = as_setprot(as, addr, len, uprot);
 991 1003          if (error)
 992 1004                  return (set_errno(error));
 993 1005          return (0);
 994 1006  }
 995 1007  
 996 1008  #define MC_CACHE        128                     /* internal result buffer */
 997 1009  #define MC_QUANTUM      (MC_CACHE * PAGESIZE)   /* addresses covered in loop */
 998 1010  
 999 1011  int
1000 1012  mincore(caddr_t addr, size_t len, char *vecp)
1001 1013  {
1002 1014          struct as *as = curproc->p_as;
1003 1015          caddr_t ea;                     /* end address of loop */
1004 1016          size_t rl;                      /* inner result length */
1005 1017          char vec[MC_CACHE];             /* local vector cache */
1006 1018          int error;
1007 1019          model_t model;
1008 1020          long    llen;
1009 1021  
1010 1022          model = get_udatamodel();
1011 1023          /*
1012 1024           * Validate form of address parameters.
1013 1025           */
1014 1026          if (model == DATAMODEL_NATIVE) {
1015 1027                  llen = (long)len;
1016 1028          } else {
1017 1029                  llen = (int32_t)(size32_t)len;
1018 1030          }
1019 1031          if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1020 1032                  return (set_errno(EINVAL));
1021 1033  
1022 1034          if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1023 1035                  return (set_errno(ENOMEM));
1024 1036  
1025 1037          /*
1026 1038           * Loop over subranges of interval [addr : addr + len), recovering
1027 1039           * results internally and then copying them out to caller.  Subrange
1028 1040           * is based on the size of MC_CACHE, defined above.
1029 1041           */
1030 1042          for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1031 1043                  error = as_incore(as, addr,
1032 1044                      (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1033 1045                  if (rl != 0) {
1034 1046                          rl = (rl + PAGESIZE - 1) / PAGESIZE;
1035 1047                          if (copyout(vec, vecp, rl) != 0)
1036 1048                                  return (set_errno(EFAULT));
1037 1049                          vecp += rl;
1038 1050                  }
1039 1051                  if (error != 0)
1040 1052                          return (set_errno(ENOMEM));
1041 1053          }
1042 1054          return (0);
1043 1055  }

↓ open down ↓

401 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX