1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #include <sys/types.h> 33 #include <sys/inttypes.h> 34 #include <sys/param.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/user.h> 39 #include <sys/errno.h> 40 #include <sys/var.h> 41 #include <sys/proc.h> 42 #include <sys/tuneable.h> 43 #include <sys/debug.h> 44 #include <sys/cmn_err.h> 45 #include <sys/cred.h> 46 #include <sys/vnode.h> 47 #include <sys/vfs.h> 48 #include <sys/vm.h> 49 #include <sys/file.h> 50 #include <sys/mman.h> 51 #include <sys/vmparam.h> 52 #include <sys/fcntl.h> 53 #include <sys/lwpchan_impl.h> 54 #include <sys/nbmlock.h> 55 56 #include <vm/hat.h> 57 #include <vm/as.h> 58 #include <vm/seg.h> 59 #include <vm/seg_dev.h> 60 #include <vm/seg_vn.h> 61 62 int use_brk_lpg = 1; 63 int use_stk_lpg = 1; 64 65 static int brk_lpg(caddr_t nva); 66 static int grow_lpg(caddr_t sp); 67 68 int 69 brk(caddr_t nva) 70 { 71 int error; 72 proc_t *p = curproc; 73 74 /* 75 * Serialize brk operations on an address space. 76 * This also serves as the lock protecting p_brksize 77 * and p_brkpageszc. 78 */ 79 as_rangelock(p->p_as); 80 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) { 81 error = brk_lpg(nva); 82 } else { 83 error = brk_internal(nva, p->p_brkpageszc); 84 } 85 as_rangeunlock(p->p_as); 86 return ((error != 0 ? set_errno(error) : 0)); 87 } 88 89 /* 90 * Algorithm: call arch-specific map_pgsz to get best page size to use, 91 * then call brk_internal(). 92 * Returns 0 on success. 93 */ 94 static int 95 brk_lpg(caddr_t nva) 96 { 97 struct proc *p = curproc; 98 size_t pgsz, len; 99 caddr_t addr, brkend; 100 caddr_t bssbase = p->p_bssbase; 101 caddr_t brkbase = p->p_brkbase; 102 int oszc, szc; 103 int err; 104 105 oszc = p->p_brkpageszc; 106 107 /* 108 * If p_brkbase has not yet been set, the first call 109 * to brk_internal() will initialize it. 110 */ 111 if (brkbase == 0) { 112 return (brk_internal(nva, oszc)); 113 } 114 115 len = nva - bssbase; 116 117 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0); 118 szc = page_szc(pgsz); 119 120 /* 121 * Covers two cases: 122 * 1. page_szc() returns -1 for invalid page size, so we want to 123 * ignore it in that case. 124 * 2. By design we never decrease page size, as it is more stable. 125 */ 126 if (szc <= oszc) { 127 err = brk_internal(nva, oszc); 128 /* If failed, back off to base page size. */ 129 if (err != 0 && oszc != 0) { 130 err = brk_internal(nva, 0); 131 } 132 return (err); 133 } 134 135 err = brk_internal(nva, szc); 136 /* If using szc failed, map with base page size and return. */ 137 if (err != 0) { 138 if (szc != 0) { 139 err = brk_internal(nva, 0); 140 } 141 return (err); 142 } 143 144 /* 145 * Round up brk base to a large page boundary and remap 146 * anything in the segment already faulted in beyond that 147 * point. 148 */ 149 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz); 150 brkend = brkbase + p->p_brksize; 151 len = brkend - addr; 152 /* Check that len is not negative. Update page size code for heap. */ 153 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) { 154 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 155 p->p_brkpageszc = szc; 156 } 157 158 ASSERT(err == 0); 159 return (err); /* should always be 0 */ 160 } 161 162 /* 163 * Returns 0 on success. 164 */ 165 int 166 brk_internal(caddr_t nva, uint_t brkszc) 167 { 168 caddr_t ova; /* current break address */ 169 size_t size; 170 int error; 171 struct proc *p = curproc; 172 struct as *as = p->p_as; 173 size_t pgsz; 174 uint_t szc; 175 rctl_qty_t as_rctl; 176 177 /* 178 * extend heap to brkszc alignment but use current p->p_brkpageszc 179 * for the newly created segment. This allows the new extension 180 * segment to be concatenated successfully with the existing brk 181 * segment. 182 */ 183 if ((szc = brkszc) != 0) { 184 pgsz = page_get_pagesize(szc); 185 ASSERT(pgsz > PAGESIZE); 186 } else { 187 pgsz = PAGESIZE; 188 } 189 190 mutex_enter(&p->p_lock); 191 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA], 192 p->p_rctls, p); 193 mutex_exit(&p->p_lock); 194 195 /* 196 * If p_brkbase has not yet been set, the first call 197 * to brk() will initialize it. 198 */ 199 if (p->p_brkbase == 0) 200 p->p_brkbase = nva; 201 202 /* 203 * Before multiple page size support existed p_brksize was the value 204 * not rounded to the pagesize (i.e. it stored the exact user request 205 * for heap size). If pgsz is greater than PAGESIZE calculate the 206 * heap size as the real new heap size by rounding it up to pgsz. 207 * This is useful since we may want to know where the heap ends 208 * without knowing heap pagesize (e.g. some old code) and also if 209 * heap pagesize changes we can update p_brkpageszc but delay adding 210 * new mapping yet still know from p_brksize where the heap really 211 * ends. The user requested heap end is stored in libc variable. 212 */ 213 if (pgsz > PAGESIZE) { 214 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 215 size = tnva - p->p_brkbase; 216 if (tnva < p->p_brkbase || (size > p->p_brksize && 217 size > (size_t)as_rctl)) { 218 szc = 0; 219 pgsz = PAGESIZE; 220 size = nva - p->p_brkbase; 221 } 222 } else { 223 size = nva - p->p_brkbase; 224 } 225 226 /* 227 * use PAGESIZE to roundup ova because we want to know the real value 228 * of the current heap end in case p_brkpageszc changes since the last 229 * p_brksize was computed. 230 */ 231 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz); 232 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize), 233 PAGESIZE); 234 235 if ((nva < p->p_brkbase) || (size > p->p_brksize && 236 size > as_rctl)) { 237 mutex_enter(&p->p_lock); 238 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p, 239 RCA_SAFE); 240 mutex_exit(&p->p_lock); 241 return (ENOMEM); 242 } 243 244 if (nva > ova) { 245 struct segvn_crargs crargs = 246 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 247 248 if (!(p->p_datprot & PROT_EXEC)) { 249 crargs.prot &= ~PROT_EXEC; 250 } 251 252 /* 253 * Add new zfod mapping to extend UNIX data segment 254 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies 255 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate 256 * page sizes if ova is not aligned to szc's pgsz. 257 */ 258 if (szc > 0) { 259 caddr_t rbss; 260 261 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, 262 pgsz); 263 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) { 264 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc : 265 AS_MAP_NO_LPOOB; 266 } else if (ova == rbss) { 267 crargs.szc = szc; 268 } else { 269 crargs.szc = AS_MAP_HEAP; 270 } 271 } else { 272 crargs.szc = AS_MAP_NO_LPOOB; 273 } 274 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP; 275 error = as_map(as, ova, (size_t)(nva - ova), segvn_create, 276 &crargs); 277 if (error) { 278 return (error); 279 } 280 281 } else if (nva < ova) { 282 /* 283 * Release mapping to shrink UNIX data segment. 284 */ 285 (void) as_unmap(as, nva, (size_t)(ova - nva)); 286 } 287 p->p_brksize = size; 288 return (0); 289 } 290 291 /* 292 * Grow the stack to include sp. Return 1 if successful, 0 otherwise. 293 * This routine assumes that the stack grows downward. 294 */ 295 int 296 grow(caddr_t sp) 297 { 298 struct proc *p = curproc; 299 struct as *as = p->p_as; 300 size_t oldsize = p->p_stksize; 301 size_t newsize; 302 int err; 303 304 /* 305 * Serialize grow operations on an address space. 306 * This also serves as the lock protecting p_stksize 307 * and p_stkpageszc. 308 */ 309 as_rangelock(as); 310 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) { 311 err = grow_lpg(sp); 312 } else { 313 err = grow_internal(sp, p->p_stkpageszc); 314 } 315 as_rangeunlock(as); 316 317 if (err == 0 && (newsize = p->p_stksize) > oldsize) { 318 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE)); 319 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE)); 320 /* 321 * Set up translations so the process doesn't have to fault in 322 * the stack pages we just gave it. 323 */ 324 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize, 325 newsize - oldsize, F_INVAL, S_WRITE); 326 } 327 return ((err == 0 ? 1 : 0)); 328 } 329 330 /* 331 * Algorithm: call arch-specific map_pgsz to get best page size to use, 332 * then call grow_internal(). 333 * Returns 0 on success. 334 */ 335 static int 336 grow_lpg(caddr_t sp) 337 { 338 struct proc *p = curproc; 339 size_t pgsz; 340 size_t len, newsize; 341 caddr_t addr, saddr; 342 caddr_t growend; 343 int oszc, szc; 344 int err; 345 346 newsize = p->p_usrstack - sp; 347 348 oszc = p->p_stkpageszc; 349 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0); 350 szc = page_szc(pgsz); 351 352 /* 353 * Covers two cases: 354 * 1. page_szc() returns -1 for invalid page size, so we want to 355 * ignore it in that case. 356 * 2. By design we never decrease page size, as it is more stable. 357 * This shouldn't happen as the stack never shrinks. 358 */ 359 if (szc <= oszc) { 360 err = grow_internal(sp, oszc); 361 /* failed, fall back to base page size */ 362 if (err != 0 && oszc != 0) { 363 err = grow_internal(sp, 0); 364 } 365 return (err); 366 } 367 368 /* 369 * We've grown sufficiently to switch to a new page size. 370 * So we are going to remap the whole segment with the new page size. 371 */ 372 err = grow_internal(sp, szc); 373 /* The grow with szc failed, so fall back to base page size. */ 374 if (err != 0) { 375 if (szc != 0) { 376 err = grow_internal(sp, 0); 377 } 378 return (err); 379 } 380 381 /* 382 * Round up stack pointer to a large page boundary and remap 383 * any pgsz pages in the segment already faulted in beyond that 384 * point. 385 */ 386 saddr = p->p_usrstack - p->p_stksize; 387 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz); 388 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz); 389 len = growend - addr; 390 /* Check that len is not negative. Update page size code for stack. */ 391 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) { 392 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE); 393 p->p_stkpageszc = szc; 394 } 395 396 ASSERT(err == 0); 397 return (err); /* should always be 0 */ 398 } 399 400 /* 401 * This routine assumes that the stack grows downward. 402 * Returns 0 on success, errno on failure. 403 */ 404 int 405 grow_internal(caddr_t sp, uint_t growszc) 406 { 407 struct proc *p = curproc; 408 size_t newsize; 409 size_t oldsize; 410 int error; 411 size_t pgsz; 412 uint_t szc; 413 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 414 415 ASSERT(sp < p->p_usrstack); 416 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE); 417 418 /* 419 * grow to growszc alignment but use current p->p_stkpageszc for 420 * the segvn_crargs szc passed to segvn_create. For memcntl to 421 * increase the szc, this allows the new extension segment to be 422 * concatenated successfully with the existing stack segment. 423 */ 424 if ((szc = growszc) != 0) { 425 pgsz = page_get_pagesize(szc); 426 ASSERT(pgsz > PAGESIZE); 427 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz); 428 if (newsize > (size_t)p->p_stk_ctl) { 429 szc = 0; 430 pgsz = PAGESIZE; 431 newsize = p->p_usrstack - sp; 432 } 433 } else { 434 pgsz = PAGESIZE; 435 newsize = p->p_usrstack - sp; 436 } 437 438 if (newsize > (size_t)p->p_stk_ctl) { 439 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p, 440 RCA_UNSAFE_ALL); 441 442 return (ENOMEM); 443 } 444 445 oldsize = p->p_stksize; 446 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0); 447 448 if (newsize <= oldsize) { /* prevent the stack from shrinking */ 449 return (0); 450 } 451 452 if (!(p->p_stkprot & PROT_EXEC)) { 453 crargs.prot &= ~PROT_EXEC; 454 } 455 /* 456 * extend stack with the proposed new growszc, which is different 457 * than p_stkpageszc only on a memcntl to increase the stack pagesize. 458 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via 459 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes 460 * if not aligned to szc's pgsz. 461 */ 462 if (szc > 0) { 463 caddr_t oldsp = p->p_usrstack - oldsize; 464 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, 465 pgsz); 466 467 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) { 468 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc : 469 AS_MAP_NO_LPOOB; 470 } else if (oldsp == austk) { 471 crargs.szc = szc; 472 } else { 473 crargs.szc = AS_MAP_STACK; 474 } 475 } else { 476 crargs.szc = AS_MAP_NO_LPOOB; 477 } 478 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN; 479 480 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize, 481 segvn_create, &crargs)) != 0) { 482 if (error == EAGAIN) { 483 cmn_err(CE_WARN, "Sorry, no swap space to grow stack " 484 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm); 485 } 486 return (error); 487 } 488 p->p_stksize = newsize; 489 return (0); 490 } 491 492 /* 493 * Find address for user to map. 494 * If MAP_FIXED is not specified, we can pick any address we want, but we will 495 * first try the value in *addrp if it is non-NULL. Thus this is implementing 496 * a way to try and get a preferred address. 497 */ 498 int 499 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off, 500 int vacalign, uint_t flags) 501 { 502 proc_t *p = curproc; 503 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK); 504 size_t lenp; 505 506 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */ 507 508 /* 509 * If we have been provided a hint, we should still expand the lenp 510 * to be the rest of the address space. This will allow us to 511 * treat the hint as a strong desire to be "nearby" the provided 512 * address. If we can't satisfy the hint, as_gap() will walk forward. 513 */ 514 if (flags & _MAP_LOW32) 515 lenp = (caddr_t)USERLIMIT32 - basep; 516 #if defined(__amd64) 517 else if (p->p_model == DATAMODEL_NATIVE) 518 lenp = p->p_usrstack - basep - 519 ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK); 520 #endif 521 else 522 lenp = as->a_userlimit - basep; 523 524 if (flags & MAP_FIXED) { 525 (void) as_unmap(as, *addrp, len); 526 return (0); 527 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) && 528 !as_gap(as, len, &basep, &lenp, 0, *addrp)) { 529 /* User supplied address was available */ 530 *addrp = basep; 531 } else { 532 /* 533 * No user supplied address or the address supplied was not 534 * available. 535 */ 536 map_addr(addrp, len, off, vacalign, flags); 537 } 538 if (*addrp == NULL) 539 return (ENOMEM); 540 return (0); 541 } 542 543 544 /* 545 * Used for MAP_ANON - fast way to get anonymous pages 546 */ 547 static int 548 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags, 549 offset_t pos) 550 { 551 struct segvn_crargs vn_a; 552 int error; 553 554 if (((PROT_ALL & uprot) != uprot)) 555 return (EACCES); 556 557 if ((flags & MAP_FIXED) != 0) { 558 caddr_t userlimit; 559 560 /* 561 * Use the user address. First verify that 562 * the address to be used is page aligned. 563 * Then make some simple bounds checks. 564 */ 565 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 566 return (EINVAL); 567 568 userlimit = flags & _MAP_LOW32 ? 569 (caddr_t)USERLIMIT32 : as->a_userlimit; 570 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 571 case RANGE_OKAY: 572 break; 573 case RANGE_BADPROT: 574 return (ENOTSUP); 575 case RANGE_BADADDR: 576 default: 577 return (ENOMEM); 578 } 579 } 580 /* 581 * No need to worry about vac alignment for anonymous 582 * pages since this is a "clone" object that doesn't 583 * yet exist. 584 */ 585 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags); 586 if (error != 0) { 587 return (error); 588 } 589 590 /* 591 * Use the seg_vn segment driver; passing in the NULL amp 592 * gives the desired "cloning" effect. 593 */ 594 vn_a.vp = NULL; 595 vn_a.offset = 0; 596 vn_a.type = flags & MAP_TYPE; 597 vn_a.prot = uprot; 598 vn_a.maxprot = PROT_ALL; 599 vn_a.flags = flags & ~MAP_TYPE; 600 vn_a.cred = CRED(); 601 vn_a.amp = NULL; 602 vn_a.szc = 0; 603 vn_a.lgrp_mem_policy_flags = 0; 604 605 return (as_map(as, *addrp, len, segvn_create, &vn_a)); 606 } 607 608 static int 609 smmap_common(caddr_t *addrp, size_t len, 610 int prot, int flags, struct file *fp, offset_t pos) 611 { 612 struct vnode *vp; 613 struct as *as = curproc->p_as; 614 uint_t uprot, maxprot, type; 615 int error; 616 int in_crit = 0; 617 618 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW | 619 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN | 620 MAP_TEXT | MAP_INITDATA)) != 0) { 621 /* | MAP_RENAME */ /* not implemented, let user know */ 622 return (EINVAL); 623 } 624 625 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) { 626 return (EINVAL); 627 } 628 629 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) { 630 return (EINVAL); 631 } 632 633 #if defined(__sparc) 634 /* 635 * See if this is an "old mmap call". If so, remember this 636 * fact and convert the flags value given to mmap to indicate 637 * the specified address in the system call must be used. 638 * _MAP_NEW is turned set by all new uses of mmap. 639 */ 640 if ((flags & _MAP_NEW) == 0) 641 flags |= MAP_FIXED; 642 #endif 643 flags &= ~_MAP_NEW; 644 645 type = flags & MAP_TYPE; 646 if (type != MAP_PRIVATE && type != MAP_SHARED) 647 return (EINVAL); 648 649 650 if (flags & MAP_ALIGN) { 651 652 if (flags & MAP_FIXED) 653 return (EINVAL); 654 655 /* alignment needs to be a power of 2 >= page size */ 656 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) || 657 !ISP2((uintptr_t)*addrp)) 658 return (EINVAL); 659 } 660 /* 661 * Check for bad lengths and file position. 662 * We let the VOP_MAP routine check for negative lengths 663 * since on some vnode types this might be appropriate. 664 */ 665 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0) 666 return (EINVAL); 667 668 maxprot = PROT_ALL; /* start out allowing all accesses */ 669 uprot = prot | PROT_USER; 670 671 if (fp == NULL) { 672 ASSERT(flags & MAP_ANON); 673 /* discard lwpchan mappings, like munmap() */ 674 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 675 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 676 as_rangelock(as); 677 error = zmap(as, addrp, len, uprot, flags, pos); 678 as_rangeunlock(as); 679 /* 680 * Tell machine specific code that lwp has mapped shared memory 681 */ 682 if (error == 0 && (flags & MAP_SHARED)) { 683 /* EMPTY */ 684 LWP_MMODEL_SHARED_AS(*addrp, len); 685 } 686 return (error); 687 } else if ((flags & MAP_ANON) != 0) 688 return (EINVAL); 689 690 vp = fp->f_vnode; 691 692 /* Can't execute code from "noexec" mounted filesystem. */ 693 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) 694 maxprot &= ~PROT_EXEC; 695 696 /* 697 * These checks were added as part of large files. 698 * 699 * Return ENXIO if the initial position is negative; return EOVERFLOW 700 * if (offset + len) would overflow the maximum allowed offset for the 701 * type of file descriptor being used. 702 */ 703 if (vp->v_type == VREG) { 704 if (pos < 0) 705 return (ENXIO); 706 if ((offset_t)len > (OFFSET_MAX(fp) - pos)) 707 return (EOVERFLOW); 708 } 709 710 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) { 711 /* no write access allowed */ 712 maxprot &= ~PROT_WRITE; 713 } 714 715 /* 716 * XXX - Do we also adjust maxprot based on protections 717 * of the vnode? E.g. if no execute permission is given 718 * on the vnode for the current user, maxprot probably 719 * should disallow PROT_EXEC also? This is different 720 * from the write access as this would be a per vnode 721 * test as opposed to a per fd test for writability. 722 */ 723 724 /* 725 * Verify that the specified protections are not greater than 726 * the maximum allowable protections. Also test to make sure 727 * that the file descriptor does allows for read access since 728 * "write only" mappings are hard to do since normally we do 729 * the read from the file before the page can be written. 730 */ 731 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0) 732 return (EACCES); 733 734 /* 735 * If the user specified an address, do some simple checks here 736 */ 737 if ((flags & MAP_FIXED) != 0) { 738 caddr_t userlimit; 739 740 /* 741 * Use the user address. First verify that 742 * the address to be used is page aligned. 743 * Then make some simple bounds checks. 744 */ 745 if (((uintptr_t)*addrp & PAGEOFFSET) != 0) 746 return (EINVAL); 747 748 userlimit = flags & _MAP_LOW32 ? 749 (caddr_t)USERLIMIT32 : as->a_userlimit; 750 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) { 751 case RANGE_OKAY: 752 break; 753 case RANGE_BADPROT: 754 return (ENOTSUP); 755 case RANGE_BADADDR: 756 default: 757 return (ENOMEM); 758 } 759 } 760 761 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) && 762 nbl_need_check(vp)) { 763 int svmand; 764 nbl_op_t nop; 765 766 nbl_start_crit(vp, RW_READER); 767 in_crit = 1; 768 error = nbl_svmand(vp, fp->f_cred, &svmand); 769 if (error != 0) 770 goto done; 771 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) { 772 if (prot & (PROT_READ | PROT_EXEC)) { 773 nop = NBL_READWRITE; 774 } else { 775 nop = NBL_WRITE; 776 } 777 } else { 778 nop = NBL_READ; 779 } 780 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) { 781 error = EACCES; 782 goto done; 783 } 784 } 785 786 /* discard lwpchan mappings, like munmap() */ 787 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL) 788 lwpchan_delete_mapping(curproc, *addrp, *addrp + len); 789 790 /* 791 * Ok, now let the vnode map routine do its thing to set things up. 792 */ 793 error = VOP_MAP(vp, pos, as, 794 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL); 795 796 if (error == 0) { 797 /* 798 * Tell machine specific code that lwp has mapped shared memory 799 */ 800 if (flags & MAP_SHARED) { 801 /* EMPTY */ 802 LWP_MMODEL_SHARED_AS(*addrp, len); 803 } 804 if (vp->v_type == VREG && 805 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) { 806 /* 807 * Mark this as an executable vnode 808 */ 809 mutex_enter(&vp->v_lock); 810 vp->v_flag |= VVMEXEC; 811 mutex_exit(&vp->v_lock); 812 } 813 } 814 815 done: 816 if (in_crit) 817 nbl_end_crit(vp); 818 return (error); 819 } 820 821 #ifdef _LP64 822 /* 823 * LP64 mmap(2) system call: 64-bit offset, 64-bit address. 824 * 825 * The "large file" mmap routine mmap64(2) is also mapped to this routine 826 * by the 64-bit version of libc. 827 * 828 * Eventually, this should be the only version, and have smmap_common() 829 * folded back into it again. Some day. 830 */ 831 caddr_t 832 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) 833 { 834 struct file *fp; 835 int error; 836 837 if (fd == -1 && (flags & MAP_ANON) != 0) 838 error = smmap_common(&addr, len, prot, flags, 839 NULL, (offset_t)pos); 840 else if ((fp = getf(fd)) != NULL) { 841 error = smmap_common(&addr, len, prot, flags, 842 fp, (offset_t)pos); 843 releasef(fd); 844 } else 845 error = EBADF; 846 847 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); 848 } 849 #endif /* _LP64 */ 850 851 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 852 853 /* 854 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. 855 */ 856 caddr_t 857 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) 858 { 859 struct file *fp; 860 int error; 861 caddr_t a = (caddr_t)(uintptr_t)addr; 862 863 if (flags & _MAP_LOW32) 864 error = EINVAL; 865 else if (fd == -1 && (flags & MAP_ANON) != 0) 866 error = smmap_common(&a, (size_t)len, prot, 867 flags | _MAP_LOW32, NULL, (offset_t)pos); 868 else if ((fp = getf(fd)) != NULL) { 869 error = smmap_common(&a, (size_t)len, prot, 870 flags | _MAP_LOW32, fp, (offset_t)pos); 871 releasef(fd); 872 } else 873 error = EBADF; 874 875 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); 876 877 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); 878 } 879 880 /* 881 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address. 882 * 883 * Now things really get ugly because we can't use the C-style 884 * calling convention for more than 6 args, and 64-bit parameter 885 * passing on 32-bit systems is less than clean. 886 */ 887 888 struct mmaplf32a { 889 caddr_t addr; 890 size_t len; 891 #ifdef _LP64 892 /* 893 * 32-bit contents, 64-bit cells 894 */ 895 uint64_t prot; 896 uint64_t flags; 897 uint64_t fd; 898 uint64_t offhi; 899 uint64_t offlo; 900 #else 901 /* 902 * 32-bit contents, 32-bit cells 903 */ 904 uint32_t prot; 905 uint32_t flags; 906 uint32_t fd; 907 uint32_t offhi; 908 uint32_t offlo; 909 #endif 910 }; 911 912 int 913 smmaplf32(struct mmaplf32a *uap, rval_t *rvp) 914 { 915 struct file *fp; 916 int error; 917 caddr_t a = uap->addr; 918 int flags = (int)uap->flags; 919 int fd = (int)uap->fd; 920 #ifdef _BIG_ENDIAN 921 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; 922 #else 923 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; 924 #endif 925 926 if (flags & _MAP_LOW32) 927 error = EINVAL; 928 else if (fd == -1 && (flags & MAP_ANON) != 0) 929 error = smmap_common(&a, uap->len, (int)uap->prot, 930 flags | _MAP_LOW32, NULL, off); 931 else if ((fp = getf(fd)) != NULL) { 932 error = smmap_common(&a, uap->len, (int)uap->prot, 933 flags | _MAP_LOW32, fp, off); 934 releasef(fd); 935 } else 936 error = EBADF; 937 938 if (error == 0) 939 rvp->r_val1 = (uintptr_t)a; 940 return (error); 941 } 942 943 #endif /* _SYSCALL32_IMPL || _ILP32 */ 944 945 int 946 munmap(caddr_t addr, size_t len) 947 { 948 struct proc *p = curproc; 949 struct as *as = p->p_as; 950 951 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 952 return (set_errno(EINVAL)); 953 954 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 955 return (set_errno(EINVAL)); 956 957 /* 958 * Discard lwpchan mappings. 959 */ 960 if (p->p_lcp != NULL) 961 lwpchan_delete_mapping(p, addr, addr + len); 962 if (as_unmap(as, addr, len) != 0) 963 return (set_errno(EINVAL)); 964 965 return (0); 966 } 967 968 int 969 mprotect(caddr_t addr, size_t len, int prot) 970 { 971 struct as *as = curproc->p_as; 972 uint_t uprot = prot | PROT_USER; 973 int error; 974 975 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0) 976 return (set_errno(EINVAL)); 977 978 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) { 979 case RANGE_OKAY: 980 break; 981 case RANGE_BADPROT: 982 return (set_errno(ENOTSUP)); 983 case RANGE_BADADDR: 984 default: 985 return (set_errno(ENOMEM)); 986 } 987 988 error = as_setprot(as, addr, len, uprot); 989 if (error) 990 return (set_errno(error)); 991 return (0); 992 } 993 994 #define MC_CACHE 128 /* internal result buffer */ 995 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */ 996 997 int 998 mincore(caddr_t addr, size_t len, char *vecp) 999 { 1000 struct as *as = curproc->p_as; 1001 caddr_t ea; /* end address of loop */ 1002 size_t rl; /* inner result length */ 1003 char vec[MC_CACHE]; /* local vector cache */ 1004 int error; 1005 model_t model; 1006 long llen; 1007 1008 model = get_udatamodel(); 1009 /* 1010 * Validate form of address parameters. 1011 */ 1012 if (model == DATAMODEL_NATIVE) { 1013 llen = (long)len; 1014 } else { 1015 llen = (int32_t)(size32_t)len; 1016 } 1017 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0) 1018 return (set_errno(EINVAL)); 1019 1020 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY) 1021 return (set_errno(ENOMEM)); 1022 1023 /* 1024 * Loop over subranges of interval [addr : addr + len), recovering 1025 * results internally and then copying them out to caller. Subrange 1026 * is based on the size of MC_CACHE, defined above. 1027 */ 1028 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) { 1029 error = as_incore(as, addr, 1030 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl); 1031 if (rl != 0) { 1032 rl = (rl + PAGESIZE - 1) / PAGESIZE; 1033 if (copyout(vec, vecp, rl) != 0) 1034 return (set_errno(EFAULT)); 1035 vecp += rl; 1036 } 1037 if (error != 0) 1038 return (set_errno(ENOMEM)); 1039 } 1040 return (0); 1041 }