1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 #include <sys/nbmlock.h> 55 56 #include <vm/hat.h> 57 #include <vm/as.h> 58 #include <vm/seg.h> 59 #include <vm/seg_dev.h> 60 #include <vm/seg_vn.h> 61 62 int use_brk_lpg = 1; 63 int use_stk_lpg = 1; 64 65 static int brk_lpg(caddr_t nva); 66 static int grow_lpg(caddr_t sp); 67 68 intptr_t 69 brk(caddr_t nva) 70 { 71 int error; 72 proc_t *p = curproc; 73 74 /* 75 * As a special case to aid the implementation of sbrk(3C), if given a 76 * new brk of 0, return the current brk. We'll hide this in brk(3C). 77 */ 78 if (nva == 0) 79 return ((intptr_t)(p->p_brkbase + p->p_brksize)); 80 81 /* 82 * Serialize brk operations on an address space. 83 * This also serves as the lock protecting p_brksize 84 * and p_brkpageszc. 85 */ 86 as_rangelock(p->p_as); 87 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 88 error = brk_lpg(nva); 89 } else { 90 error = brk_internal(nva, p->p_brkpageszc); 91 } 92 as_rangeunlock(p->p_as); 93 return ((error != 0 ? set_errno(error) : 0)); 94 } 95 96 /* 97 * Algorithm: call arch-specific map_pgsz to get best page size to use, 98 * then call brk_internal(). 99 * Returns 0 on success. 100 */ 101 static int 102 brk_lpg(caddr_t nva) 103 { 104 struct proc *p = curproc; 105 size_t pgsz, len; 106 caddr_t addr, brkend; 107 caddr_t bssbase = p->p_bssbase; 108 caddr_t brkbase = p->p_brkbase; 109 int oszc, szc; 110 int err; 111 112 oszc = p->p_brkpageszc; 113 114 /* 115 * If p_brkbase has not yet been set, the first call 116 * to brk_internal() will initialize it. 117 */ 118 if (brkbase == 0) { 119 return (brk_internal(nva, oszc)); 120 } 121 122 len = nva - bssbase; 123 124 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 125 szc = page_szc(pgsz); 126 127 /* 128 * Covers two cases: 129 * 1. page_szc() returns -1 for invalid page size, so we want to 130 * ignore it in that case. 131 * 2. By design we never decrease page size, as it is more stable. 132 */ 133 if (szc <= oszc) { 134 err = brk_internal(nva, oszc); 135 /* If failed, back off to base page size. */ 136 if (err != 0 && oszc != 0) { 137 err = brk_internal(nva, 0); 138 } 139 return (err); 140 } 141 142 err = brk_internal(nva, szc); 143 /* If using szc failed, map with base page size and return. */ 144 if (err != 0) { 145 if (szc != 0) { 146 err = brk_internal(nva, 0); 147 } 148 return (err); 149 } 150 151 /* 152 * Round up brk base to a large page boundary and remap 153 * anything in the segment already faulted in beyond that 154 * point. 155 */ 156 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 157 brkend = brkbase + p->p_brksize; 158 len = brkend - addr; 159 /* Check that len is not negative. Update page size code for heap. */ 160 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 161 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 162 p->p_brkpageszc = szc; 163 } 164 165 ASSERT(err == 0); 166 return (err); /* should always be 0 */ 167 } 168 169 /* 170 * Returns 0 on success. 171 */ 172 int 173 brk_internal(caddr_t nva, uint_t brkszc) 174 { 175 caddr_t ova; /* current break address */ 176 size_t size; 177 int error; 178 struct proc *p = curproc; 179 struct as *as = p->p_as; 180 size_t pgsz; 181 uint_t szc; 182 rctl_qty_t as_rctl; 183 184 /* 185 * extend heap to brkszc alignment but use current p->p_brkpageszc 186 * for the newly created segment. This allows the new extension 187 * segment to be concatenated successfully with the existing brk 188 * segment. 189 */ 190 if ((szc = brkszc) != 0) { 191 pgsz = page_get_pagesize(szc); 192 ASSERT(pgsz > PAGESIZE); 193 } else { 194 pgsz = PAGESIZE; 195 } 196 197 mutex_enter(&p->p_lock); 198 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 199 p->p_rctls, p); 200 mutex_exit(&p->p_lock); 201 202 /* 203 * If p_brkbase has not yet been set, the first call 204 * to brk() will initialize it. 205 */ 206 if (p->p_brkbase == 0) 207 p->p_brkbase = nva; 208 209 /* 210 * Before multiple page size support existed p_brksize was the value 211 * not rounded to the pagesize (i.e. it stored the exact user request 212 * for heap size). If pgsz is greater than PAGESIZE calculate the 213 * heap size as the real new heap size by rounding it up to pgsz. 214 * This is useful since we may want to know where the heap ends 215 * without knowing heap pagesize (e.g. some old code) and also if 216 * heap pagesize changes we can update p_brkpageszc but delay adding 217 * new mapping yet still know from p_brksize where the heap really 218 * ends. The user requested heap end is stored in libc variable. 219 */ 220 if (pgsz > PAGESIZE) { 221 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 222 size = tnva - p->p_brkbase; 223 if (tnva < p->p_brkbase || (size > p->p_brksize && 224 size > (size_t)as_rctl)) { 225 szc = 0; 226 pgsz = PAGESIZE; 227 size = nva - p->p_brkbase; 228 } 229 } else { 230 size = nva - p->p_brkbase; 231 } 232 233 /* 234 * use PAGESIZE to roundup ova because we want to know the real value 235 * of the current heap end in case p_brkpageszc changes since the last 236 * p_brksize was computed. 237 */ 238 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 239 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 240 PAGESIZE); 241 242 if ((nva < p->p_brkbase) || (size > p->p_brksize && 243 size > as_rctl)) { 244 mutex_enter(&p->p_lock); 245 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 246 RCA_SAFE); 247 mutex_exit(&p->p_lock); 248 return (ENOMEM); 249 } 250 251 if (nva > ova) { 252 struct segvn_crargs crargs = 253 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 254 255 if (!(p->p_datprot & PROT_EXEC)) { 256 crargs.prot &= ~PROT_EXEC; 257 } 258 259 /* 260 * Add new zfod mapping to extend UNIX data segment 261 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 262 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 263 * page sizes if ova is not aligned to szc's pgsz. 264 */ 265 if (szc > 0) { 266 caddr_t rbss; 267 268 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 269 pgsz); 270 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 271 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 272 AS_MAP_NO_LPOOB; 273 } else if (ova == rbss) { 274 crargs.szc = szc; 275 } else { 276 crargs.szc = AS_MAP_HEAP; 277 } 278 } else { 279 crargs.szc = AS_MAP_NO_LPOOB; 280 } 281 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 282 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 283 &crargs); 284 if (error) { 285 return (error); 286 } 287 288 } else if (nva < ova) { 289 /* 290 * Release mapping to shrink UNIX data segment. 291 */ 292 (void) as_unmap(as, nva, (size_t)(ova - nva)); 293 } 294 p->p_brksize = size; 295 return (0); 296 } 297 298 /* 299 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 300 * This routine assumes that the stack grows downward. 301 */ 302 int 303 grow(caddr_t sp) 304 { 305 struct proc *p = curproc; 306 struct as *as = p->p_as; 307 size_t oldsize = p->p_stksize; 308 size_t newsize; 309 int err; 310 311 /* 312 * Serialize grow operations on an address space. 313 * This also serves as the lock protecting p_stksize 314 * and p_stkpageszc. 315 */ 316 as_rangelock(as); 317 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 318 err = grow_lpg(sp); 319 } else { 320 err = grow_internal(sp, p->p_stkpageszc); 321 } 322 as_rangeunlock(as); 323 324 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 325 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 326 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 327 /* 328 * Set up translations so the process doesn't have to fault in 329 * the stack pages we just gave it. 330 */ 331 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 332 newsize - oldsize, F_INVAL, S_WRITE); 333 } 334 return ((err == 0 ? 1 : 0)); 335 } 336 337 /* 338 * Algorithm: call arch-specific map_pgsz to get best page size to use, 339 * then call grow_internal(). 340 * Returns 0 on success. 341 */ 342 static int 343 grow_lpg(caddr_t sp) 344 { 345 struct proc *p = curproc; 346 size_t pgsz; 347 size_t len, newsize; 348 caddr_t addr, saddr; 349 caddr_t growend; 350 int oszc, szc; 351 int err; 352 353 newsize = p->p_usrstack - sp; 354 355 oszc = p->p_stkpageszc; 356 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 357 szc = page_szc(pgsz); 358 359 /* 360 * Covers two cases: 361 * 1. page_szc() returns -1 for invalid page size, so we want to 362 * ignore it in that case. 363 * 2. By design we never decrease page size, as it is more stable. 364 * This shouldn't happen as the stack never shrinks. 365 */ 366 if (szc <= oszc) { 367 err = grow_internal(sp, oszc); 368 /* failed, fall back to base page size */ 369 if (err != 0 && oszc != 0) { 370 err = grow_internal(sp, 0); 371 } 372 return (err); 373 } 374 375 /* 376 * We've grown sufficiently to switch to a new page size. 377 * So we are going to remap the whole segment with the new page size. 378 */ 379 err = grow_internal(sp, szc); 380 /* The grow with szc failed, so fall back to base page size. */ 381 if (err != 0) { 382 if (szc != 0) { 383 err = grow_internal(sp, 0); 384 } 385 return (err); 386 } 387 388 /* 389 * Round up stack pointer to a large page boundary and remap 390 * any pgsz pages in the segment already faulted in beyond that 391 * point. 392 */ 393 saddr = p->p_usrstack - p->p_stksize; 394 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 395 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 396 len = growend - addr; 397 /* Check that len is not negative. Update page size code for stack. */ 398 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 399 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 400 p->p_stkpageszc = szc; 401 } 402 403 ASSERT(err == 0); 404 return (err); /* should always be 0 */ 405 } 406 407 /* 408 * This routine assumes that the stack grows downward. 409 * Returns 0 on success, errno on failure. 410 */ 411 int 412 grow_internal(caddr_t sp, uint_t growszc) 413 { 414 struct proc *p = curproc; 415 size_t newsize; 416 size_t oldsize; 417 int error; 418 size_t pgsz; 419 uint_t szc; 420 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 421 422 ASSERT(sp < p->p_usrstack); 423 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 424 425 /* 426 * grow to growszc alignment but use current p->p_stkpageszc for 427 * the segvn_crargs szc passed to segvn_create. For memcntl to 428 * increase the szc, this allows the new extension segment to be 429 * concatenated successfully with the existing stack segment. 430 */ 431 if ((szc = growszc) != 0) { 432 pgsz = page_get_pagesize(szc); 433 ASSERT(pgsz > PAGESIZE); 434 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 435 if (newsize > (size_t)p->p_stk_ctl) { 436 szc = 0; 437 pgsz = PAGESIZE; 438 newsize = p->p_usrstack - sp; 439 } 440 } else { 441 pgsz = PAGESIZE; 442 newsize = p->p_usrstack - sp; 443 } 444 445 if (newsize > (size_t)p->p_stk_ctl) { 446 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 447 RCA_UNSAFE_ALL); 448 449 return (ENOMEM); 450 } 451 452 oldsize = p->p_stksize; 453 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 454 455 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 456 return (0); 457 } 458 459 if (!(p->p_stkprot & PROT_EXEC)) { 460 crargs.prot &= ~PROT_EXEC; 461 } 462 /* 463 * extend stack with the proposed new growszc, which is different 464 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 465 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 466 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 467 * if not aligned to szc's pgsz. 468 */ 469 if (szc > 0) { 470 caddr_t oldsp = p->p_usrstack - oldsize; 471 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 472 pgsz); 473 474 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 475 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 476 AS_MAP_NO_LPOOB; 477 } else if (oldsp == austk) { 478 crargs.szc = szc; 479 } else { 480 crargs.szc = AS_MAP_STACK; 481 } 482 } else { 483 crargs.szc = AS_MAP_NO_LPOOB; 484 } 485 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 486 487 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 488 segvn_create, &crargs)) != 0) { 489 if (error == EAGAIN) { 490 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 491 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 492 } 493 return (error); 494 } 495 p->p_stksize = newsize; 496 return (0); 497 } 498 499 /* 500 * Find address for user to map. If MAP_FIXED is not specified, we can pick 501 * any address we want, but we will first try the value in *addrp if it is 502 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to 503 * try and get a preferred address. 504 */ 505 int 506 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 507 int vacalign, uint_t flags) 508 { 509 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 510 size_t lenp = len; 511 512 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 513 if (flags & MAP_FIXED) { 514 (void) as_unmap(as, *addrp, len); 515 return (0); 516 } else if (basep != NULL && 517 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) && 518 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 519 /* User supplied address was available */ 520 *addrp = basep; 521 } else { 522 /* 523 * No user supplied address or the address supplied was not 524 * available. 525 */ 526 map_addr(addrp, len, off, vacalign, flags); 527 } 528 if (*addrp == NULL) 529 return (ENOMEM); 530 return (0); 531 } 532 533 534 /* 535 * Used for MAP_ANON - fast way to get anonymous pages 536 */ 537 static int 538 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 539 offset_t pos) 540 { 541 struct segvn_crargs vn_a; 542 int error; 543 544 if (((PROT_ALL & uprot) != uprot)) 545 return (EACCES); 546 547 if ((flags & MAP_FIXED) != 0) { 548 caddr_t userlimit; 549 550 /* 551 * Use the user address. First verify that 552 * the address to be used is page aligned. 553 * Then make some simple bounds checks. 554 */ 555 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 556 return (EINVAL); 557 558 userlimit = flags & _MAP_LOW32 ? 559 (caddr_t)USERLIMIT32 : as->a_userlimit; 560 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 561 case RANGE_OKAY: 562 break; 563 case RANGE_BADPROT: 564 return (ENOTSUP); 565 case RANGE_BADADDR: 566 default: 567 return (ENOMEM); 568 } 569 } 570 /* 571 * No need to worry about vac alignment for anonymous 572 * pages since this is a "clone" object that doesn't 573 * yet exist. 574 */ 575 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 576 if (error != 0) { 577 return (error); 578 } 579 580 /* 581 * Use the seg_vn segment driver; passing in the NULL amp 582 * gives the desired "cloning" effect. 583 */ 584 vn_a.vp = NULL; 585 vn_a.offset = 0; 586 vn_a.type = flags & MAP_TYPE; 587 vn_a.prot = uprot; 588 vn_a.maxprot = PROT_ALL; 589 vn_a.flags = flags & ~MAP_TYPE; 590 vn_a.cred = CRED(); 591 vn_a.amp = NULL; 592 vn_a.szc = 0; 593 vn_a.lgrp_mem_policy_flags = 0; 594 595 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 596 } 597 598 static int 599 smmap_common(caddr_t *addrp, size_t len, 600 int prot, int flags, struct file *fp, offset_t pos) 601 { 602 struct vnode *vp; 603 struct as *as = curproc->p_as; 604 uint_t uprot, maxprot, type; 605 int error; 606 int in_crit = 0; 607 608 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 609 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 610 MAP_TEXT | MAP_INITDATA)) != 0) { 611 /* | MAP_RENAME */ /* not implemented, let user know */ 612 return (EINVAL); 613 } 614 615 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 616 return (EINVAL); 617 } 618 619 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 620 return (EINVAL); 621 } 622 623 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) == (MAP_FIXED | _MAP_RANDOMIZE)) { 624 return (EINVAL); 625 } 626 627 /* If it's not a fixed allocation and mmap ASLR is enabled, randomize it. */ 628 if (((flags & MAP_FIXED) == 0) && 629 secflag_enabled(curproc, PROC_SEC_ASLR)) 630 flags |= _MAP_RANDOMIZE; 631 632 #if defined(__sparc) 633 /* 634 * See if this is an "old mmap call". If so, remember this 635 * fact and convert the flags value given to mmap to indicate 636 * the specified address in the system call must be used. 637 * _MAP_NEW is turned set by all new uses of mmap. 638 */ 639 if ((flags & _MAP_NEW) == 0) 640 flags |= MAP_FIXED; 641 #endif 642 flags &= ~_MAP_NEW; 643 644 type = flags & MAP_TYPE; 645 if (type != MAP_PRIVATE && type != MAP_SHARED) 646 return (EINVAL); 647 648 649 if (flags & MAP_ALIGN) { 650 if (flags & MAP_FIXED) 651 return (EINVAL); 652 653 /* alignment needs to be a power of 2 >= page size */ 654 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 655 !ISP2((uintptr_t)*addrp)) 656 return (EINVAL); 657 } 658 /* 659 * Check for bad lengths and file position. 660 * We let the VOP_MAP routine check for negative lengths 661 * since on some vnode types this might be appropriate. 662 */ 663 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 664 return (EINVAL); 665 666 maxprot = PROT_ALL; /* start out allowing all accesses */ 667 uprot = prot | PROT_USER; 668 669 if (fp == NULL) { 670 ASSERT(flags & MAP_ANON); 671 /* discard lwpchan mappings, like munmap() */ 672 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 673 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 674 as_rangelock(as); 675 error = zmap(as, addrp, len, uprot, flags, pos); 676 as_rangeunlock(as); 677 /* 678 * Tell machine specific code that lwp has mapped shared memory 679 */ 680 if (error == 0 && (flags & MAP_SHARED)) { 681 /* EMPTY */ 682 LWP_MMODEL_SHARED_AS(*addrp, len); 683 } 684 return (error); 685 } else if ((flags & MAP_ANON) != 0) 686 return (EINVAL); 687 688 vp = fp->f_vnode; 689 690 /* Can't execute code from "noexec" mounted filesystem. */ 691 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 692 maxprot &= ~PROT_EXEC; 693 694 /* 695 * These checks were added as part of large files. 696 * 697 * Return ENXIO if the initial position is negative; return EOVERFLOW 698 * if (offset + len) would overflow the maximum allowed offset for the 699 * type of file descriptor being used. 700 */ 701 if (vp->v_type == VREG) { 702 if (pos < 0) 703 return (ENXIO); 704 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 705 return (EOVERFLOW); 706 } 707 708 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 709 /* no write access allowed */ 710 maxprot &= ~PROT_WRITE; 711 } 712 713 /* 714 * XXX - Do we also adjust maxprot based on protections 715 * of the vnode? E.g. if no execute permission is given 716 * on the vnode for the current user, maxprot probably 717 * should disallow PROT_EXEC also? This is different 718 * from the write access as this would be a per vnode 719 * test as opposed to a per fd test for writability. 720 */ 721 722 /* 723 * Verify that the specified protections are not greater than 724 * the maximum allowable protections. Also test to make sure 725 * that the file descriptor does allows for read access since 726 * "write only" mappings are hard to do since normally we do 727 * the read from the file before the page can be written. 728 */ 729 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 730 return (EACCES); 731 732 /* 733 * If the user specified an address, do some simple checks here 734 */ 735 if ((flags & MAP_FIXED) != 0) { 736 caddr_t userlimit; 737 738 /* 739 * Use the user address. First verify that 740 * the address to be used is page aligned. 741 * Then make some simple bounds checks. 742 */ 743 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 744 return (EINVAL); 745 746 userlimit = flags & _MAP_LOW32 ? 747 (caddr_t)USERLIMIT32 : as->a_userlimit; 748 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 749 case RANGE_OKAY: 750 break; 751 case RANGE_BADPROT: 752 return (ENOTSUP); 753 case RANGE_BADADDR: 754 default: 755 return (ENOMEM); 756 } 757 } 758 759 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 760 nbl_need_check(vp)) { 761 int svmand; 762 nbl_op_t nop; 763 764 nbl_start_crit(vp, RW_READER); 765 in_crit = 1; 766 error = nbl_svmand(vp, fp->f_cred, &svmand); 767 if (error != 0) 768 goto done; 769 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 770 if (prot & (PROT_READ | PROT_EXEC)) { 771 nop = NBL_READWRITE; 772 } else { 773 nop = NBL_WRITE; 774 } 775 } else { 776 nop = NBL_READ; 777 } 778 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 779 error = EACCES; 780 goto done; 781 } 782 } 783 784 /* discard lwpchan mappings, like munmap() */ 785 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 786 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 787 788 /* 789 * Ok, now let the vnode map routine do its thing to set things up. 790 */ 791 error = VOP_MAP(vp, pos, as, 792 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 793 794 if (error == 0) { 795 /* 796 * Tell machine specific code that lwp has mapped shared memory 797 */ 798 if (flags & MAP_SHARED) { 799 /* EMPTY */ 800 LWP_MMODEL_SHARED_AS(*addrp, len); 801 } 802 if (vp->v_type == VREG && 803 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 804 /* 805 * Mark this as an executable vnode 806 */ 807 mutex_enter(&vp->v_lock); 808 vp->v_flag |= VVMEXEC; 809 mutex_exit(&vp->v_lock); 810 } 811 } 812 813 done: 814 if (in_crit) 815 nbl_end_crit(vp); 816 return (error); 817 } 818 819 #ifdef _LP64 820 /* 821 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 822 * 823 * The "large file" mmap routine mmap64(2) is also mapped to this routine 824 * by the 64-bit version of libc. 825 * 826 * Eventually, this should be the only version, and have smmap_common() 827 * folded back into it again. Some day. 828 */ 829 caddr_t 830 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 831 { 832 struct file *fp; 833 int error; 834 835 if (fd == -1 && (flags & MAP_ANON) != 0) 836 error = smmap_common(&addr, len, prot, flags, 837 NULL, (offset_t)pos); 838 else if ((fp = getf(fd)) != NULL) { 839 error = smmap_common(&addr, len, prot, flags, 840 fp, (offset_t)pos); 841 releasef(fd); 842 } else 843 error = EBADF; 844 845 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 846 } 847 #endif /* _LP64 */ 848 849 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 850 851 /* 852 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 853 */ 854 caddr_t 855 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 856 { 857 struct file *fp; 858 int error; 859 caddr_t a = (caddr_t)(uintptr_t)addr; 860 861 if (flags & _MAP_LOW32) 862 error = EINVAL; 863 else if (fd == -1 && (flags & MAP_ANON) != 0) 864 error = smmap_common(&a, (size_t)len, prot, 865 flags | _MAP_LOW32, NULL, (offset_t)pos); 866 else if ((fp = getf(fd)) != NULL) { 867 error = smmap_common(&a, (size_t)len, prot, 868 flags | _MAP_LOW32, fp, (offset_t)pos); 869 releasef(fd); 870 } else 871 error = EBADF; 872 873 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 874 875 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 876 } 877 878 /* 879 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 880 * 881 * Now things really get ugly because we can't use the C-style 882 * calling convention for more than 6 args, and 64-bit parameter 883 * passing on 32-bit systems is less than clean. 884 */ 885 886 struct mmaplf32a { 887 caddr_t addr; 888 size_t len; 889 #ifdef _LP64 890 /* 891 * 32-bit contents, 64-bit cells 892 */ 893 uint64_t prot; 894 uint64_t flags; 895 uint64_t fd; 896 uint64_t offhi; 897 uint64_t offlo; 898 #else 899 /* 900 * 32-bit contents, 32-bit cells 901 */ 902 uint32_t prot; 903 uint32_t flags; 904 uint32_t fd; 905 uint32_t offhi; 906 uint32_t offlo; 907 #endif 908 }; 909 910 int 911 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 912 { 913 struct file *fp; 914 int error; 915 caddr_t a = uap->addr; 916 int flags = (int)uap->flags; 917 int fd = (int)uap->fd; 918 #ifdef _BIG_ENDIAN 919 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 920 #else 921 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 922 #endif 923 924 if (flags & _MAP_LOW32) 925 error = EINVAL; 926 else if (fd == -1 && (flags & MAP_ANON) != 0) 927 error = smmap_common(&a, uap->len, (int)uap->prot, 928 flags | _MAP_LOW32, NULL, off); 929 else if ((fp = getf(fd)) != NULL) { 930 error = smmap_common(&a, uap->len, (int)uap->prot, 931 flags | _MAP_LOW32, fp, off); 932 releasef(fd); 933 } else 934 error = EBADF; 935 936 if (error == 0) 937 rvp->r_val1 = (uintptr_t)a; 938 return (error); 939 } 940 941 #endif /* _SYSCALL32_IMPL || _ILP32 */ 942 943 int 944 munmap(caddr_t addr, size_t len) 945 { 946 struct proc *p = curproc; 947 struct as *as = p->p_as; 948 949 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 950 return (set_errno(EINVAL)); 951 952 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 953 return (set_errno(EINVAL)); 954 955 /* 956 * Discard lwpchan mappings. 957 */ 958 if (p->p_lcp != NULL) 959 lwpchan_delete_mapping(p, addr, addr + len); 960 if (as_unmap(as, addr, len) != 0) 961 return (set_errno(EINVAL)); 962 963 return (0); 964 } 965 966 int 967 mprotect(caddr_t addr, size_t len, int prot) 968 { 969 struct as *as = curproc->p_as; 970 uint_t uprot = prot | PROT_USER; 971 int error; 972 973 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 974 return (set_errno(EINVAL)); 975 976 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 977 case RANGE_OKAY: 978 break; 979 case RANGE_BADPROT: 980 return (set_errno(ENOTSUP)); 981 case RANGE_BADADDR: 982 default: 983 return (set_errno(ENOMEM)); 984 } 985 986 error = as_setprot(as, addr, len, uprot); 987 if (error) 988 return (set_errno(error)); 989 return (0); 990 } 991 992 #define MC_CACHE 128 /* internal result buffer */ 993 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 994 995 int 996 mincore(caddr_t addr, size_t len, char *vecp) 997 { 998 struct as *as = curproc->p_as; 999 caddr_t ea; /* end address of loop */ 1000 size_t rl; /* inner result length */ 1001 char vec[MC_CACHE]; /* local vector cache */ 1002 int error; 1003 model_t model; 1004 long llen; 1005 1006 model = get_udatamodel(); 1007 /* 1008 * Validate form of address parameters. 1009 */ 1010 if (model == DATAMODEL_NATIVE) { 1011 llen = (long)len; 1012 } else { 1013 llen = (int32_t)(size32_t)len; 1014 } 1015 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1016 return (set_errno(EINVAL)); 1017 1018 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1019 return (set_errno(ENOMEM)); 1020 1021 /* 1022 * Loop over subranges of interval [addr : addr + len), recovering 1023 * results internally and then copying them out to caller. Subrange 1024 * is based on the size of MC_CACHE, defined above. 1025 */ 1026 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1027 error = as_incore(as, addr, 1028 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1029 if (rl != 0) { 1030 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1031 if (copyout(vec, vecp, rl) != 0) 1032 return (set_errno(EFAULT)); 1033 vecp += rl; 1034 } 1035 if (error != 0) 1036 return (set_errno(ENOMEM)); 1037 } 1038 return (0); 1039 }