1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  27 /*      All Rights Reserved   */
  28 
  29 /*
  30  * Portions of this source code were derived from Berkeley 4.3 BSD
  31  * under license from the Regents of the University of California.
  32  */
  33 
  34 /*
  35  * UNIX machine dependent virtual memory support.
  36  */
  37 
  38 #include <sys/vm.h>
  39 #include <sys/exec.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/cpu_module.h>
  42 #include <sys/cpu.h>
  43 #include <sys/elf_SPARC.h>
  44 #include <sys/archsystm.h>
  45 #include <vm/hat_sfmmu.h>
  46 #include <sys/memnode.h>
  47 #include <sys/mem_cage.h>
  48 #include <vm/vm_dep.h>
  49 #include <sys/random.h>
  50 
  51 #if defined(__sparcv9) && defined(SF_ERRATA_57)
  52 caddr_t errata57_limit;
  53 #endif
  54 
  55 uint_t page_colors = 0;
  56 uint_t page_colors_mask = 0;
  57 uint_t page_coloring_shift = 0;
  58 int consistent_coloring;
  59 int update_proc_pgcolorbase_after_fork = 0;
  60 
  61 uint_t mmu_page_sizes = DEFAULT_MMU_PAGE_SIZES;
  62 uint_t max_mmu_page_sizes = MMU_PAGE_SIZES;
  63 uint_t mmu_hashcnt = DEFAULT_MAX_HASHCNT;
  64 uint_t max_mmu_hashcnt = MAX_HASHCNT;
  65 size_t mmu_ism_pagesize = DEFAULT_ISM_PAGESIZE;
  66 
  67 /*
  68  * The sun4u hardware mapping sizes which will always be supported are
  69  * 8K, 64K, 512K and 4M.  If sun4u based machines need to support other
  70  * page sizes, platform or cpu specific routines need to modify the value.
  71  * The base pagesize (p_szc == 0) must always be supported by the hardware.
  72  */
  73 int mmu_exported_pagesize_mask = (1 << TTE8K) | (1 << TTE64K) |
  74         (1 << TTE512K) | (1 << TTE4M);
  75 uint_t mmu_exported_page_sizes;
  76 
  77 uint_t szc_2_userszc[MMU_PAGE_SIZES];
  78 uint_t userszc_2_szc[MMU_PAGE_SIZES];
  79 
  80 extern uint_t vac_colors_mask;
  81 extern int vac_shift;
  82 
  83 hw_pagesize_t hw_page_array[] = {
  84         {MMU_PAGESIZE, MMU_PAGESHIFT, 0, MMU_PAGESIZE >> MMU_PAGESHIFT},
  85         {MMU_PAGESIZE64K, MMU_PAGESHIFT64K, 0,
  86             MMU_PAGESIZE64K >> MMU_PAGESHIFT},
  87         {MMU_PAGESIZE512K, MMU_PAGESHIFT512K, 0,
  88             MMU_PAGESIZE512K >> MMU_PAGESHIFT},
  89         {MMU_PAGESIZE4M, MMU_PAGESHIFT4M, 0, MMU_PAGESIZE4M >> MMU_PAGESHIFT},
  90         {MMU_PAGESIZE32M, MMU_PAGESHIFT32M, 0,
  91             MMU_PAGESIZE32M >> MMU_PAGESHIFT},
  92         {MMU_PAGESIZE256M, MMU_PAGESHIFT256M, 0,
  93             MMU_PAGESIZE256M >> MMU_PAGESHIFT},
  94         {0, 0, 0, 0}
  95 };
  96 
  97 /*
  98  * Maximum page size used to map 64-bit memory segment kmem64_base..kmem64_end
  99  */
 100 int     max_bootlp_tteszc = TTE4M;
 101 
 102 /*
 103  * use_text_pgsz64k and use_text_pgsz512k allow the user to turn on these
 104  * additional text page sizes for USIII-IV+ and OPL by changing the default
 105  * values via /etc/system.
 106  */
 107 int     use_text_pgsz64K = 0;
 108 int     use_text_pgsz512K = 0;
 109 
 110 /*
 111  * Maximum and default segment size tunables for user heap, stack, private
 112  * and shared anonymous memory, and user text and initialized data.
 113  */
 114 size_t max_uheap_lpsize = MMU_PAGESIZE4M;
 115 size_t default_uheap_lpsize = MMU_PAGESIZE;
 116 size_t max_ustack_lpsize = MMU_PAGESIZE4M;
 117 size_t default_ustack_lpsize = MMU_PAGESIZE;
 118 size_t max_privmap_lpsize = MMU_PAGESIZE4M;
 119 size_t max_uidata_lpsize = MMU_PAGESIZE;
 120 size_t max_utext_lpsize = MMU_PAGESIZE4M;
 121 size_t max_shm_lpsize = MMU_PAGESIZE4M;
 122 
 123 void
 124 adjust_data_maxlpsize(size_t ismpagesize)
 125 {
 126         if (max_uheap_lpsize == MMU_PAGESIZE4M) {
 127                 max_uheap_lpsize = ismpagesize;
 128         }
 129         if (max_ustack_lpsize == MMU_PAGESIZE4M) {
 130                 max_ustack_lpsize = ismpagesize;
 131         }
 132         if (max_privmap_lpsize == MMU_PAGESIZE4M) {
 133                 max_privmap_lpsize = ismpagesize;
 134         }
 135         if (max_shm_lpsize == MMU_PAGESIZE4M) {
 136                 max_shm_lpsize = ismpagesize;
 137         }
 138 }
 139 
 140 /*
 141  * The maximum amount a randomized mapping will be slewed.  We should perhaps
 142  * arrange things so these tunables can be separate for mmap, mmapobj, and
 143  * ld.so
 144  */
 145 size_t aslr_max_map_skew = 256 * 1024 * 1024; /* 256MB */
 146 
 147 /*
 148  * map_addr_proc() is the routine called when the system is to
 149  * choose an address for the user.  We will pick an address
 150  * range which is just below the current stack limit.  The
 151  * algorithm used for cache consistency on machines with virtual
 152  * address caches is such that offset 0 in the vnode is always
 153  * on a shm_alignment'ed aligned address.  Unfortunately, this
 154  * means that vnodes which are demand paged will not be mapped
 155  * cache consistently with the executable images.  When the
 156  * cache alignment for a given object is inconsistent, the
 157  * lower level code must manage the translations so that this
 158  * is not seen here (at the cost of efficiency, of course).
 159  *
 160  * Every mapping will have a redzone of a single page on either side of
 161  * the request. This is done to leave one page unmapped between segments.
 162  * This is not required, but it's useful for the user because if their
 163  * program strays across a segment boundary, it will catch a fault
 164  * immediately making debugging a little easier.  Currently the redzone
 165  * is mandatory.
 166  *
 167  *
 168  * addrp is a value/result parameter.
 169  *      On input it is a hint from the user to be used in a completely
 170  *      machine dependent fashion.  For MAP_ALIGN, addrp contains the
 171  *      minimal alignment, which must be some "power of two" multiple of
 172  *      pagesize.
 173  *
 174  *      On output it is NULL if no address can be found in the current
 175  *      processes address space or else an address that is currently
 176  *      not mapped for len bytes with a page of red zone on either side.
 177  *      If vacalign is true, then the selected address will obey the alignment
 178  *      constraints of a vac machine based on the given off value.
 179  */
 180 /*ARGSUSED4*/
 181 void
 182 map_addr_proc(caddr_t *addrp, size_t len, offset_t off, int vacalign,
 183     caddr_t userlimit, struct proc *p, uint_t flags)
 184 {
 185         struct as *as = p->p_as;
 186         caddr_t addr;
 187         caddr_t base;
 188         size_t slen;
 189         uintptr_t align_amount;
 190         int allow_largepage_alignment = 1;
 191 
 192         base = p->p_brkbase;
 193         if (userlimit < as->a_userlimit) {
 194                 /*
 195                  * This happens when a program wants to map something in
 196                  * a range that's accessible to a program in a smaller
 197                  * address space.  For example, a 64-bit program might
 198                  * be calling mmap32(2) to guarantee that the returned
 199                  * address is below 4Gbytes.
 200                  */
 201                 ASSERT(userlimit > base);
 202                 slen = userlimit - base;
 203         } else {
 204                 slen = p->p_usrstack - base -
 205                     ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
 206         }
 207 
 208         /* Make len be a multiple of PAGESIZE */
 209         len = (len + PAGEOFFSET) & PAGEMASK;
 210 
 211         /*
 212          *  If the request is larger than the size of a particular
 213          *  mmu level, then we use that level to map the request.
 214          *  But this requires that both the virtual and the physical
 215          *  addresses be aligned with respect to that level, so we
 216          *  do the virtual bit of nastiness here.
 217          *
 218          *  For 32-bit processes, only those which have specified
 219          *  MAP_ALIGN or an addr will be aligned on a page size > 4MB. Otherwise
 220          *  we can potentially waste up to 256MB of the 4G process address
 221          *  space just for alignment.
 222          */
 223         if (p->p_model == DATAMODEL_ILP32 && ((flags & MAP_ALIGN) == 0 ||
 224             ((uintptr_t)*addrp) != 0)) {
 225                 allow_largepage_alignment = 0;
 226         }
 227         if ((mmu_page_sizes == max_mmu_page_sizes) &&
 228             allow_largepage_alignment &&
 229             (len >= MMU_PAGESIZE256M)) {     /* 256MB mappings */
 230                 align_amount = MMU_PAGESIZE256M;
 231         } else if ((mmu_page_sizes == max_mmu_page_sizes) &&
 232             allow_largepage_alignment &&
 233             (len >= MMU_PAGESIZE32M)) {      /* 32MB mappings */
 234                 align_amount = MMU_PAGESIZE32M;
 235         } else if (len >= MMU_PAGESIZE4M) {  /* 4MB mappings */
 236                 align_amount = MMU_PAGESIZE4M;
 237         } else if (len >= MMU_PAGESIZE512K) { /* 512KB mappings */
 238                 align_amount = MMU_PAGESIZE512K;
 239         } else if (len >= MMU_PAGESIZE64K) { /* 64KB mappings */
 240                 align_amount = MMU_PAGESIZE64K;
 241         } else  {
 242                 /*
 243                  * Align virtual addresses on a 64K boundary to ensure
 244                  * that ELF shared libraries are mapped with the appropriate
 245                  * alignment constraints by the run-time linker.
 246                  */
 247                 align_amount = ELF_SPARC_MAXPGSZ;
 248                 if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp != 0) &&
 249                     ((uintptr_t)*addrp < align_amount))
 250                         align_amount = (uintptr_t)*addrp;
 251         }
 252 
 253         /*
 254          * 64-bit processes require 1024K alignment of ELF shared libraries.
 255          */
 256         if (p->p_model == DATAMODEL_LP64)
 257                 align_amount = MAX(align_amount, ELF_SPARCV9_MAXPGSZ);
 258 #ifdef VAC
 259         if (vac && vacalign && (align_amount < shm_alignment))
 260                 align_amount = shm_alignment;
 261 #endif
 262 
 263         if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) {
 264                 align_amount = (uintptr_t)*addrp;
 265         }
 266 
 267         ASSERT(ISP2(align_amount));
 268         ASSERT(align_amount == 0 || align_amount >= PAGESIZE);
 269 
 270         /*
 271          * Look for a large enough hole starting below the stack limit.
 272          * After finding it, use the upper part.
 273          */
 274         as_purge(as);
 275         off = off & (align_amount - 1);
 276 
 277         if (as_gap_aligned(as, len, &base, &slen, AH_HI, NULL, align_amount,
 278             PAGESIZE, off) == 0) {
 279                 caddr_t as_addr;
 280 
 281                 /*
 282                  * addr is the highest possible address to use since we have
 283                  * a PAGESIZE redzone at the beginning and end.
 284                  */
 285                 addr = base + slen - (PAGESIZE + len);
 286                 as_addr = addr;
 287                 /*
 288                  * Round address DOWN to the alignment amount and
 289                  * add the offset in.
 290                  * If addr is greater than as_addr, len would not be large
 291                  * enough to include the redzone, so we must adjust down
 292                  * by the alignment amount.
 293                  */
 294                 addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1l)));
 295                 addr += (long)off;
 296                 if (addr > as_addr) {
 297                         addr -= align_amount;
 298                 }
 299 
 300                 /*
 301                  * If randomization is requested, slew the allocation
 302                  * backwards, within the same gap, by a random amount.
 303                  */
 304                 if (flags & _MAP_RANDOMIZE) {
 305                         uint32_t slew;
 306                         uint32_t maxslew;
 307 
 308                         (void) random_get_pseudo_bytes((uint8_t *)&slew,
 309                             sizeof (slew));
 310 
 311                         maxslew = MIN(aslr_max_map_skew, (addr - base));
 312                         /*
 313                          * Don't allow ASLR to cause mappings to fail below
 314                          * because of SF erratum #57
 315                          */
 316                         maxslew = MIN(maxslew, (addr - errata57_limit));
 317 
 318                         slew = slew % maxslew;
 319                         addr -= P2ALIGN(slew, align_amount);
 320                 }
 321 
 322                 ASSERT(addr > base);
 323                 ASSERT(addr + len < base + slen);
 324                 ASSERT(((uintptr_t)addr & (align_amount - 1l)) ==
 325                     ((uintptr_t)(off)));
 326                 *addrp = addr;
 327 
 328 #if defined(SF_ERRATA_57)
 329                 if (AS_TYPE_64BIT(as) && addr < errata57_limit) {
 330                         *addrp = NULL;
 331                 }
 332 #endif
 333         } else {
 334                 *addrp = NULL;  /* no more virtual space */
 335         }
 336 }
 337 
 338 /*
 339  * Platform-dependent page scrub call.
 340  */
 341 void
 342 pagescrub(page_t *pp, uint_t off, uint_t len)
 343 {
 344         /*
 345          * For now, we rely on the fact that pagezero() will
 346          * always clear UEs.
 347          */
 348         pagezero(pp, off, len);
 349 }
 350 
 351 /*ARGSUSED*/
 352 void
 353 sync_data_memory(caddr_t va, size_t len)
 354 {
 355         cpu_flush_ecache();
 356 }
 357 
 358 /*
 359  * platform specific large pages for kernel heap support
 360  */
 361 void
 362 mmu_init_kcontext()
 363 {
 364         extern void set_kcontextreg();
 365 
 366         if (kcontextreg)
 367                 set_kcontextreg();
 368 }
 369 
 370 void
 371 contig_mem_init(void)
 372 {
 373         /* not applicable to sun4u */
 374 }
 375 
 376 /*ARGSUSED*/
 377 caddr_t
 378 contig_mem_prealloc(caddr_t alloc_base, pgcnt_t npages)
 379 {
 380         /* not applicable to sun4u */
 381         return (alloc_base);
 382 }