1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 #include <sys/nbmlock.h> 55 56 #include <vm/hat.h> 57 #include <vm/as.h> 58 #include <vm/seg.h> 59 #include <vm/seg_dev.h> 60 #include <vm/seg_vn.h> 61 62 int use_brk_lpg = 1; 63 int use_stk_lpg = 1; 64 65 /* 66 * If set, we will not randomize mappings where the 'addr' argument is 67 * non-NULL and not an alignment. 68 */ 69 int aslr_respect_mmap_hint = 0; 70 71 static int brk_lpg(caddr_t nva); 72 static int grow_lpg(caddr_t sp); 73 74 intptr_t 75 brk(caddr_t nva) 76 { 77 int error; 78 proc_t *p = curproc; 79 80 /* 81 * Serialize brk operations on an address space. 82 * This also serves as the lock protecting p_brksize 83 * and p_brkpageszc. 84 */ 85 as_rangelock(p->p_as); 86 87 /* 88 * As a special case to aid the implementation of sbrk(3C), if given a 89 * new brk of 0, return the current brk. We'll hide this in brk(3C). 90 */ 91 if (nva == 0) { 92 as_rangeunlock(p->p_as); 93 return ((intptr_t)(p->p_brkbase + p->p_brksize)); 94 } 95 96 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 97 error = brk_lpg(nva); 98 } else { 99 error = brk_internal(nva, p->p_brkpageszc); 100 } 101 as_rangeunlock(p->p_as); 102 return ((error != 0 ? set_errno(error) : 0)); 103 } 104 105 /* 106 * Algorithm: call arch-specific map_pgsz to get best page size to use, 107 * then call brk_internal(). 108 * Returns 0 on success. 109 */ 110 static int 111 brk_lpg(caddr_t nva) 112 { 113 struct proc *p = curproc; 114 size_t pgsz, len; 115 caddr_t addr, brkend; 116 caddr_t bssbase = p->p_bssbase; 117 caddr_t brkbase = p->p_brkbase; 118 int oszc, szc; 119 int err; 120 121 oszc = p->p_brkpageszc; 122 123 /* 124 * If p_brkbase has not yet been set, the first call 125 * to brk_internal() will initialize it. 126 */ 127 if (brkbase == 0) { 128 return (brk_internal(nva, oszc)); 129 } 130 131 len = nva - bssbase; 132 133 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 134 szc = page_szc(pgsz); 135 136 /* 137 * Covers two cases: 138 * 1. page_szc() returns -1 for invalid page size, so we want to 139 * ignore it in that case. 140 * 2. By design we never decrease page size, as it is more stable. 141 */ 142 if (szc <= oszc) { 143 err = brk_internal(nva, oszc); 144 /* If failed, back off to base page size. */ 145 if (err != 0 && oszc != 0) { 146 err = brk_internal(nva, 0); 147 } 148 return (err); 149 } 150 151 err = brk_internal(nva, szc); 152 /* If using szc failed, map with base page size and return. */ 153 if (err != 0) { 154 if (szc != 0) { 155 err = brk_internal(nva, 0); 156 } 157 return (err); 158 } 159 160 /* 161 * Round up brk base to a large page boundary and remap 162 * anything in the segment already faulted in beyond that 163 * point. 164 */ 165 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 166 brkend = brkbase + p->p_brksize; 167 len = brkend - addr; 168 /* Check that len is not negative. Update page size code for heap. */ 169 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 170 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 171 p->p_brkpageszc = szc; 172 } 173 174 ASSERT(err == 0); 175 return (err); /* should always be 0 */ 176 } 177 178 /* 179 * Returns 0 on success. 180 */ 181 int 182 brk_internal(caddr_t nva, uint_t brkszc) 183 { 184 caddr_t ova; /* current break address */ 185 size_t size; 186 int error; 187 struct proc *p = curproc; 188 struct as *as = p->p_as; 189 size_t pgsz; 190 uint_t szc; 191 rctl_qty_t as_rctl; 192 193 /* 194 * extend heap to brkszc alignment but use current p->p_brkpageszc 195 * for the newly created segment. This allows the new extension 196 * segment to be concatenated successfully with the existing brk 197 * segment. 198 */ 199 if ((szc = brkszc) != 0) { 200 pgsz = page_get_pagesize(szc); 201 ASSERT(pgsz > PAGESIZE); 202 } else { 203 pgsz = PAGESIZE; 204 } 205 206 mutex_enter(&p->p_lock); 207 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 208 p->p_rctls, p); 209 mutex_exit(&p->p_lock); 210 211 /* 212 * If p_brkbase has not yet been set, the first call 213 * to brk() will initialize it. 214 */ 215 if (p->p_brkbase == 0) 216 p->p_brkbase = nva; 217 218 /* 219 * Before multiple page size support existed p_brksize was the value 220 * not rounded to the pagesize (i.e. it stored the exact user request 221 * for heap size). If pgsz is greater than PAGESIZE calculate the 222 * heap size as the real new heap size by rounding it up to pgsz. 223 * This is useful since we may want to know where the heap ends 224 * without knowing heap pagesize (e.g. some old code) and also if 225 * heap pagesize changes we can update p_brkpageszc but delay adding 226 * new mapping yet still know from p_brksize where the heap really 227 * ends. The user requested heap end is stored in libc variable. 228 */ 229 if (pgsz > PAGESIZE) { 230 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 231 size = tnva - p->p_brkbase; 232 if (tnva < p->p_brkbase || (size > p->p_brksize && 233 size > (size_t)as_rctl)) { 234 szc = 0; 235 pgsz = PAGESIZE; 236 size = nva - p->p_brkbase; 237 } 238 } else { 239 size = nva - p->p_brkbase; 240 } 241 242 /* 243 * use PAGESIZE to roundup ova because we want to know the real value 244 * of the current heap end in case p_brkpageszc changes since the last 245 * p_brksize was computed. 246 */ 247 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 248 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 249 PAGESIZE); 250 251 if ((nva < p->p_brkbase) || (size > p->p_brksize && 252 size > as_rctl)) { 253 mutex_enter(&p->p_lock); 254 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 255 RCA_SAFE); 256 mutex_exit(&p->p_lock); 257 return (ENOMEM); 258 } 259 260 if (nva > ova) { 261 struct segvn_crargs crargs = 262 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 263 264 if (!(p->p_datprot & PROT_EXEC)) { 265 crargs.prot &= ~PROT_EXEC; 266 } 267 268 /* 269 * Add new zfod mapping to extend UNIX data segment 270 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 271 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 272 * page sizes if ova is not aligned to szc's pgsz. 273 */ 274 if (szc > 0) { 275 caddr_t rbss; 276 277 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 278 pgsz); 279 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 280 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 281 AS_MAP_NO_LPOOB; 282 } else if (ova == rbss) { 283 crargs.szc = szc; 284 } else { 285 crargs.szc = AS_MAP_HEAP; 286 } 287 } else { 288 crargs.szc = AS_MAP_NO_LPOOB; 289 } 290 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 291 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 292 &crargs); 293 if (error) { 294 return (error); 295 } 296 297 } else if (nva < ova) { 298 /* 299 * Release mapping to shrink UNIX data segment. 300 */ 301 (void) as_unmap(as, nva, (size_t)(ova - nva)); 302 } 303 p->p_brksize = size; 304 return (0); 305 } 306 307 /* 308 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 309 * This routine assumes that the stack grows downward. 310 */ 311 int 312 grow(caddr_t sp) 313 { 314 struct proc *p = curproc; 315 struct as *as = p->p_as; 316 size_t oldsize = p->p_stksize; 317 size_t newsize; 318 int err; 319 320 /* 321 * Serialize grow operations on an address space. 322 * This also serves as the lock protecting p_stksize 323 * and p_stkpageszc. 324 */ 325 as_rangelock(as); 326 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 327 err = grow_lpg(sp); 328 } else { 329 err = grow_internal(sp, p->p_stkpageszc); 330 } 331 as_rangeunlock(as); 332 333 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 334 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 335 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 336 /* 337 * Set up translations so the process doesn't have to fault in 338 * the stack pages we just gave it. 339 */ 340 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 341 newsize - oldsize, F_INVAL, S_WRITE); 342 } 343 return ((err == 0 ? 1 : 0)); 344 } 345 346 /* 347 * Algorithm: call arch-specific map_pgsz to get best page size to use, 348 * then call grow_internal(). 349 * Returns 0 on success. 350 */ 351 static int 352 grow_lpg(caddr_t sp) 353 { 354 struct proc *p = curproc; 355 size_t pgsz; 356 size_t len, newsize; 357 caddr_t addr, saddr; 358 caddr_t growend; 359 int oszc, szc; 360 int err; 361 362 newsize = p->p_usrstack - sp; 363 364 oszc = p->p_stkpageszc; 365 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 366 szc = page_szc(pgsz); 367 368 /* 369 * Covers two cases: 370 * 1. page_szc() returns -1 for invalid page size, so we want to 371 * ignore it in that case. 372 * 2. By design we never decrease page size, as it is more stable. 373 * This shouldn't happen as the stack never shrinks. 374 */ 375 if (szc <= oszc) { 376 err = grow_internal(sp, oszc); 377 /* failed, fall back to base page size */ 378 if (err != 0 && oszc != 0) { 379 err = grow_internal(sp, 0); 380 } 381 return (err); 382 } 383 384 /* 385 * We've grown sufficiently to switch to a new page size. 386 * So we are going to remap the whole segment with the new page size. 387 */ 388 err = grow_internal(sp, szc); 389 /* The grow with szc failed, so fall back to base page size. */ 390 if (err != 0) { 391 if (szc != 0) { 392 err = grow_internal(sp, 0); 393 } 394 return (err); 395 } 396 397 /* 398 * Round up stack pointer to a large page boundary and remap 399 * any pgsz pages in the segment already faulted in beyond that 400 * point. 401 */ 402 saddr = p->p_usrstack - p->p_stksize; 403 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 404 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 405 len = growend - addr; 406 /* Check that len is not negative. Update page size code for stack. */ 407 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 408 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 409 p->p_stkpageszc = szc; 410 } 411 412 ASSERT(err == 0); 413 return (err); /* should always be 0 */ 414 } 415 416 /* 417 * This routine assumes that the stack grows downward. 418 * Returns 0 on success, errno on failure. 419 */ 420 int 421 grow_internal(caddr_t sp, uint_t growszc) 422 { 423 struct proc *p = curproc; 424 size_t newsize; 425 size_t oldsize; 426 int error; 427 size_t pgsz; 428 uint_t szc; 429 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 430 431 ASSERT(sp < p->p_usrstack); 432 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 433 434 /* 435 * grow to growszc alignment but use current p->p_stkpageszc for 436 * the segvn_crargs szc passed to segvn_create. For memcntl to 437 * increase the szc, this allows the new extension segment to be 438 * concatenated successfully with the existing stack segment. 439 */ 440 if ((szc = growszc) != 0) { 441 pgsz = page_get_pagesize(szc); 442 ASSERT(pgsz > PAGESIZE); 443 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 444 if (newsize > (size_t)p->p_stk_ctl) { 445 szc = 0; 446 pgsz = PAGESIZE; 447 newsize = p->p_usrstack - sp; 448 } 449 } else { 450 pgsz = PAGESIZE; 451 newsize = p->p_usrstack - sp; 452 } 453 454 if (newsize > (size_t)p->p_stk_ctl) { 455 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 456 RCA_UNSAFE_ALL); 457 458 return (ENOMEM); 459 } 460 461 oldsize = p->p_stksize; 462 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 463 464 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 465 return (0); 466 } 467 468 if (!(p->p_stkprot & PROT_EXEC)) { 469 crargs.prot &= ~PROT_EXEC; 470 } 471 /* 472 * extend stack with the proposed new growszc, which is different 473 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 474 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 475 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 476 * if not aligned to szc's pgsz. 477 */ 478 if (szc > 0) { 479 caddr_t oldsp = p->p_usrstack - oldsize; 480 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 481 pgsz); 482 483 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 484 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 485 AS_MAP_NO_LPOOB; 486 } else if (oldsp == austk) { 487 crargs.szc = szc; 488 } else { 489 crargs.szc = AS_MAP_STACK; 490 } 491 } else { 492 crargs.szc = AS_MAP_NO_LPOOB; 493 } 494 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 495 496 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 497 segvn_create, &crargs)) != 0) { 498 if (error == EAGAIN) { 499 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 500 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 501 } 502 return (error); 503 } 504 p->p_stksize = newsize; 505 return (0); 506 } 507 508 /* 509 * Find address for user to map. If MAP_FIXED is not specified, we can pick 510 * any address we want, but we will first try the value in *addrp if it is 511 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to 512 * try and get a preferred address. 513 */ 514 int 515 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 516 int vacalign, uint_t flags) 517 { 518 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 519 size_t lenp = len; 520 521 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 522 if (flags & MAP_FIXED) { 523 (void) as_unmap(as, *addrp, len); 524 return (0); 525 } else if (basep != NULL && 526 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) && 527 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 528 /* User supplied address was available */ 529 *addrp = basep; 530 } else { 531 /* 532 * No user supplied address or the address supplied was not 533 * available. 534 */ 535 map_addr(addrp, len, off, vacalign, flags); 536 } 537 if (*addrp == NULL) 538 return (ENOMEM); 539 return (0); 540 } 541 542 543 /* 544 * Used for MAP_ANON - fast way to get anonymous pages 545 */ 546 static int 547 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 548 offset_t pos) 549 { 550 struct segvn_crargs vn_a; 551 int error; 552 553 if (((PROT_ALL & uprot) != uprot)) 554 return (EACCES); 555 556 if ((flags & MAP_FIXED) != 0) { 557 caddr_t userlimit; 558 559 /* 560 * Use the user address. First verify that 561 * the address to be used is page aligned. 562 * Then make some simple bounds checks. 563 */ 564 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 565 return (EINVAL); 566 567 userlimit = flags & _MAP_LOW32 ? 568 (caddr_t)USERLIMIT32 : as->a_userlimit; 569 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 570 case RANGE_OKAY: 571 break; 572 case RANGE_BADPROT: 573 return (ENOTSUP); 574 case RANGE_BADADDR: 575 default: 576 return (ENOMEM); 577 } 578 } 579 /* 580 * No need to worry about vac alignment for anonymous 581 * pages since this is a "clone" object that doesn't 582 * yet exist. 583 */ 584 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 585 if (error != 0) { 586 return (error); 587 } 588 589 /* 590 * Use the seg_vn segment driver; passing in the NULL amp 591 * gives the desired "cloning" effect. 592 */ 593 vn_a.vp = NULL; 594 vn_a.offset = 0; 595 vn_a.type = flags & MAP_TYPE; 596 vn_a.prot = uprot; 597 vn_a.maxprot = PROT_ALL; 598 vn_a.flags = flags & ~MAP_TYPE; 599 vn_a.cred = CRED(); 600 vn_a.amp = NULL; 601 vn_a.szc = 0; 602 vn_a.lgrp_mem_policy_flags = 0; 603 604 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 605 } 606 607 #define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \ 608 !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint)) 609 610 static int 611 smmap_common(caddr_t *addrp, size_t len, 612 int prot, int flags, struct file *fp, offset_t pos) 613 { 614 struct vnode *vp; 615 struct as *as = curproc->p_as; 616 uint_t uprot, maxprot, type; 617 int error; 618 int in_crit = 0; 619 620 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 621 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 622 MAP_TEXT | MAP_INITDATA)) != 0) { 623 /* | MAP_RENAME */ /* not implemented, let user know */ 624 return (EINVAL); 625 } 626 627 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 628 return (EINVAL); 629 } 630 631 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 632 return (EINVAL); 633 } 634 635 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) == 636 (MAP_FIXED | _MAP_RANDOMIZE)) { 637 return (EINVAL); 638 } 639 640 /* 641 * If it's not a fixed allocation and mmap ASLR is enabled, randomize 642 * it. 643 */ 644 if (RANDOMIZABLE_MAPPING(*addrp, flags) && 645 secflag_enabled(curproc, PROC_SEC_ASLR)) 646 flags |= _MAP_RANDOMIZE; 647 648 #if defined(__sparc) 649 /* 650 * See if this is an "old mmap call". If so, remember this 651 * fact and convert the flags value given to mmap to indicate 652 * the specified address in the system call must be used. 653 * _MAP_NEW is turned set by all new uses of mmap. 654 */ 655 if ((flags & _MAP_NEW) == 0) 656 flags |= MAP_FIXED; 657 #endif 658 flags &= ~_MAP_NEW; 659 660 type = flags & MAP_TYPE; 661 if (type != MAP_PRIVATE && type != MAP_SHARED) 662 return (EINVAL); 663 664 665 if (flags & MAP_ALIGN) { 666 if (flags & MAP_FIXED) 667 return (EINVAL); 668 669 /* alignment needs to be a power of 2 >= page size */ 670 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 671 !ISP2((uintptr_t)*addrp)) 672 return (EINVAL); 673 } 674 /* 675 * Check for bad lengths and file position. 676 * We let the VOP_MAP routine check for negative lengths 677 * since on some vnode types this might be appropriate. 678 */ 679 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 680 return (EINVAL); 681 682 maxprot = PROT_ALL; /* start out allowing all accesses */ 683 uprot = prot | PROT_USER; 684 685 if (fp == NULL) { 686 ASSERT(flags & MAP_ANON); 687 /* discard lwpchan mappings, like munmap() */ 688 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 689 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 690 as_rangelock(as); 691 error = zmap(as, addrp, len, uprot, flags, pos); 692 as_rangeunlock(as); 693 /* 694 * Tell machine specific code that lwp has mapped shared memory 695 */ 696 if (error == 0 && (flags & MAP_SHARED)) { 697 /* EMPTY */ 698 LWP_MMODEL_SHARED_AS(*addrp, len); 699 } 700 return (error); 701 } else if ((flags & MAP_ANON) != 0) 702 return (EINVAL); 703 704 vp = fp->f_vnode; 705 706 /* Can't execute code from "noexec" mounted filesystem. */ 707 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 708 maxprot &= ~PROT_EXEC; 709 710 /* 711 * These checks were added as part of large files. 712 * 713 * Return ENXIO if the initial position is negative; return EOVERFLOW 714 * if (offset + len) would overflow the maximum allowed offset for the 715 * type of file descriptor being used. 716 */ 717 if (vp->v_type == VREG) { 718 if (pos < 0) 719 return (ENXIO); 720 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 721 return (EOVERFLOW); 722 } 723 724 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 725 /* no write access allowed */ 726 maxprot &= ~PROT_WRITE; 727 } 728 729 /* 730 * XXX - Do we also adjust maxprot based on protections 731 * of the vnode? E.g. if no execute permission is given 732 * on the vnode for the current user, maxprot probably 733 * should disallow PROT_EXEC also? This is different 734 * from the write access as this would be a per vnode 735 * test as opposed to a per fd test for writability. 736 */ 737 738 /* 739 * Verify that the specified protections are not greater than 740 * the maximum allowable protections. Also test to make sure 741 * that the file descriptor does allows for read access since 742 * "write only" mappings are hard to do since normally we do 743 * the read from the file before the page can be written. 744 */ 745 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 746 return (EACCES); 747 748 /* 749 * If the user specified an address, do some simple checks here 750 */ 751 if ((flags & MAP_FIXED) != 0) { 752 caddr_t userlimit; 753 754 /* 755 * Use the user address. First verify that 756 * the address to be used is page aligned. 757 * Then make some simple bounds checks. 758 */ 759 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 760 return (EINVAL); 761 762 userlimit = flags & _MAP_LOW32 ? 763 (caddr_t)USERLIMIT32 : as->a_userlimit; 764 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 765 case RANGE_OKAY: 766 break; 767 case RANGE_BADPROT: 768 return (ENOTSUP); 769 case RANGE_BADADDR: 770 default: 771 return (ENOMEM); 772 } 773 } 774 775 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 776 nbl_need_check(vp)) { 777 int svmand; 778 nbl_op_t nop; 779 780 nbl_start_crit(vp, RW_READER); 781 in_crit = 1; 782 error = nbl_svmand(vp, fp->f_cred, &svmand); 783 if (error != 0) 784 goto done; 785 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 786 if (prot & (PROT_READ | PROT_EXEC)) { 787 nop = NBL_READWRITE; 788 } else { 789 nop = NBL_WRITE; 790 } 791 } else { 792 nop = NBL_READ; 793 } 794 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 795 error = EACCES; 796 goto done; 797 } 798 } 799 800 /* discard lwpchan mappings, like munmap() */ 801 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 802 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 803 804 /* 805 * Ok, now let the vnode map routine do its thing to set things up. 806 */ 807 error = VOP_MAP(vp, pos, as, 808 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 809 810 if (error == 0) { 811 /* 812 * Tell machine specific code that lwp has mapped shared memory 813 */ 814 if (flags & MAP_SHARED) { 815 /* EMPTY */ 816 LWP_MMODEL_SHARED_AS(*addrp, len); 817 } 818 if (vp->v_type == VREG && 819 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 820 /* 821 * Mark this as an executable vnode 822 */ 823 mutex_enter(&vp->v_lock); 824 vp->v_flag |= VVMEXEC; 825 mutex_exit(&vp->v_lock); 826 } 827 } 828 829 done: 830 if (in_crit) 831 nbl_end_crit(vp); 832 return (error); 833 } 834 835 #ifdef _LP64 836 /* 837 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 838 * 839 * The "large file" mmap routine mmap64(2) is also mapped to this routine 840 * by the 64-bit version of libc. 841 * 842 * Eventually, this should be the only version, and have smmap_common() 843 * folded back into it again. Some day. 844 */ 845 caddr_t 846 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 847 { 848 struct file *fp; 849 int error; 850 851 if (fd == -1 && (flags & MAP_ANON) != 0) 852 error = smmap_common(&addr, len, prot, flags, 853 NULL, (offset_t)pos); 854 else if ((fp = getf(fd)) != NULL) { 855 error = smmap_common(&addr, len, prot, flags, 856 fp, (offset_t)pos); 857 releasef(fd); 858 } else 859 error = EBADF; 860 861 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 862 } 863 #endif /* _LP64 */ 864 865 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 866 867 /* 868 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 869 */ 870 caddr_t 871 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 872 { 873 struct file *fp; 874 int error; 875 caddr_t a = (caddr_t)(uintptr_t)addr; 876 877 if (flags & _MAP_LOW32) 878 error = EINVAL; 879 else if (fd == -1 && (flags & MAP_ANON) != 0) 880 error = smmap_common(&a, (size_t)len, prot, 881 flags | _MAP_LOW32, NULL, (offset_t)pos); 882 else if ((fp = getf(fd)) != NULL) { 883 error = smmap_common(&a, (size_t)len, prot, 884 flags | _MAP_LOW32, fp, (offset_t)pos); 885 releasef(fd); 886 } else 887 error = EBADF; 888 889 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 890 891 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 892 } 893 894 /* 895 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 896 * 897 * Now things really get ugly because we can't use the C-style 898 * calling convention for more than 6 args, and 64-bit parameter 899 * passing on 32-bit systems is less than clean. 900 */ 901 902 struct mmaplf32a { 903 caddr_t addr; 904 size_t len; 905 #ifdef _LP64 906 /* 907 * 32-bit contents, 64-bit cells 908 */ 909 uint64_t prot; 910 uint64_t flags; 911 uint64_t fd; 912 uint64_t offhi; 913 uint64_t offlo; 914 #else 915 /* 916 * 32-bit contents, 32-bit cells 917 */ 918 uint32_t prot; 919 uint32_t flags; 920 uint32_t fd; 921 uint32_t offhi; 922 uint32_t offlo; 923 #endif 924 }; 925 926 int 927 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 928 { 929 struct file *fp; 930 int error; 931 caddr_t a = uap->addr; 932 int flags = (int)uap->flags; 933 int fd = (int)uap->fd; 934 #ifdef _BIG_ENDIAN 935 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 936 #else 937 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 938 #endif 939 940 if (flags & _MAP_LOW32) 941 error = EINVAL; 942 else if (fd == -1 && (flags & MAP_ANON) != 0) 943 error = smmap_common(&a, uap->len, (int)uap->prot, 944 flags | _MAP_LOW32, NULL, off); 945 else if ((fp = getf(fd)) != NULL) { 946 error = smmap_common(&a, uap->len, (int)uap->prot, 947 flags | _MAP_LOW32, fp, off); 948 releasef(fd); 949 } else 950 error = EBADF; 951 952 if (error == 0) 953 rvp->r_val1 = (uintptr_t)a; 954 return (error); 955 } 956 957 #endif /* _SYSCALL32_IMPL || _ILP32 */ 958 959 int 960 munmap(caddr_t addr, size_t len) 961 { 962 struct proc *p = curproc; 963 struct as *as = p->p_as; 964 965 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 966 return (set_errno(EINVAL)); 967 968 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 969 return (set_errno(EINVAL)); 970 971 /* 972 * Discard lwpchan mappings. 973 */ 974 if (p->p_lcp != NULL) 975 lwpchan_delete_mapping(p, addr, addr + len); 976 if (as_unmap(as, addr, len) != 0) 977 return (set_errno(EINVAL)); 978 979 return (0); 980 } 981 982 int 983 mprotect(caddr_t addr, size_t len, int prot) 984 { 985 struct as *as = curproc->p_as; 986 uint_t uprot = prot | PROT_USER; 987 int error; 988 989 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 990 return (set_errno(EINVAL)); 991 992 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 993 case RANGE_OKAY: 994 break; 995 case RANGE_BADPROT: 996 return (set_errno(ENOTSUP)); 997 case RANGE_BADADDR: 998 default: 999 return (set_errno(ENOMEM)); 1000 } 1001 1002 error = as_setprot(as, addr, len, uprot); 1003 if (error) 1004 return (set_errno(error)); 1005 return (0); 1006 } 1007 1008 #define MC_CACHE 128 /* internal result buffer */ 1009 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 1010 1011 int 1012 mincore(caddr_t addr, size_t len, char *vecp) 1013 { 1014 struct as *as = curproc->p_as; 1015 caddr_t ea; /* end address of loop */ 1016 size_t rl; /* inner result length */ 1017 char vec[MC_CACHE]; /* local vector cache */ 1018 int error; 1019 model_t model; 1020 long llen; 1021 1022 model = get_udatamodel(); 1023 /* 1024 * Validate form of address parameters. 1025 */ 1026 if (model == DATAMODEL_NATIVE) { 1027 llen = (long)len; 1028 } else { 1029 llen = (int32_t)(size32_t)len; 1030 } 1031 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1032 return (set_errno(EINVAL)); 1033 1034 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1035 return (set_errno(ENOMEM)); 1036 1037 /* 1038 * Loop over subranges of interval [addr : addr + len), recovering 1039 * results internally and then copying them out to caller. Subrange 1040 * is based on the size of MC_CACHE, defined above. 1041 */ 1042 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1043 error = as_incore(as, addr, 1044 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1045 if (rl != 0) { 1046 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1047 if (copyout(vec, vecp, rl) != 0) 1048 return (set_errno(EFAULT)); 1049 vecp += rl; 1050 } 1051 if (error != 0) 1052 return (set_errno(ENOMEM)); 1053 } 1054 return (0); 1055 }