1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23
24 /*
25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 #include <sys/types.h>
33 #include <sys/inttypes.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/var.h>
41 #include <sys/proc.h>
42 #include <sys/tuneable.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/cred.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/vm.h>
49 #include <sys/file.h>
50 #include <sys/mman.h>
51 #include <sys/vmparam.h>
52 #include <sys/fcntl.h>
53 #include <sys/lwpchan_impl.h>
54 #include <sys/nbmlock.h>
55
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/seg.h>
59 #include <vm/seg_dev.h>
60 #include <vm/seg_vn.h>
61
62 int use_brk_lpg = 1;
63 int use_stk_lpg = 1;
64
65 static int brk_lpg(caddr_t nva);
66 static int grow_lpg(caddr_t sp);
67
68 intptr_t
69 brk(caddr_t nva)
70 {
71 int error;
72 proc_t *p = curproc;
73
74 /*
75 * As a special case to aid the implementation of sbrk(3C), if given a
76 * new brk of 0, return the current brk. We'll hide this in brk(3C).
77 */
78 if (nva == 0)
79 return ((intptr_t)(p->p_brkbase + p->p_brksize));
80
81 /*
82 * Serialize brk operations on an address space.
83 * This also serves as the lock protecting p_brksize
84 * and p_brkpageszc.
85 */
86 as_rangelock(p->p_as);
87 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
88 error = brk_lpg(nva);
89 } else {
90 error = brk_internal(nva, p->p_brkpageszc);
91 }
92 as_rangeunlock(p->p_as);
93 return ((error != 0 ? set_errno(error) : 0));
94 }
95
96 /*
97 * Algorithm: call arch-specific map_pgsz to get best page size to use,
98 * then call brk_internal().
99 * Returns 0 on success.
100 */
101 static int
102 brk_lpg(caddr_t nva)
103 {
104 struct proc *p = curproc;
105 size_t pgsz, len;
106 caddr_t addr, brkend;
107 caddr_t bssbase = p->p_bssbase;
108 caddr_t brkbase = p->p_brkbase;
109 int oszc, szc;
110 int err;
111
112 oszc = p->p_brkpageszc;
113
114 /*
115 * If p_brkbase has not yet been set, the first call
116 * to brk_internal() will initialize it.
117 */
118 if (brkbase == 0) {
119 return (brk_internal(nva, oszc));
120 }
121
122 len = nva - bssbase;
123
124 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
125 szc = page_szc(pgsz);
126
127 /*
128 * Covers two cases:
129 * 1. page_szc() returns -1 for invalid page size, so we want to
130 * ignore it in that case.
131 * 2. By design we never decrease page size, as it is more stable.
132 */
133 if (szc <= oszc) {
134 err = brk_internal(nva, oszc);
135 /* If failed, back off to base page size. */
136 if (err != 0 && oszc != 0) {
137 err = brk_internal(nva, 0);
138 }
139 return (err);
140 }
141
142 err = brk_internal(nva, szc);
143 /* If using szc failed, map with base page size and return. */
144 if (err != 0) {
145 if (szc != 0) {
146 err = brk_internal(nva, 0);
147 }
148 return (err);
149 }
150
151 /*
152 * Round up brk base to a large page boundary and remap
153 * anything in the segment already faulted in beyond that
154 * point.
155 */
156 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
157 brkend = brkbase + p->p_brksize;
158 len = brkend - addr;
159 /* Check that len is not negative. Update page size code for heap. */
160 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
161 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
162 p->p_brkpageszc = szc;
163 }
164
165 ASSERT(err == 0);
166 return (err); /* should always be 0 */
167 }
168
169 /*
170 * Returns 0 on success.
171 */
172 int
173 brk_internal(caddr_t nva, uint_t brkszc)
174 {
175 caddr_t ova; /* current break address */
176 size_t size;
177 int error;
178 struct proc *p = curproc;
179 struct as *as = p->p_as;
180 size_t pgsz;
181 uint_t szc;
182 rctl_qty_t as_rctl;
183
184 /*
185 * extend heap to brkszc alignment but use current p->p_brkpageszc
186 * for the newly created segment. This allows the new extension
187 * segment to be concatenated successfully with the existing brk
188 * segment.
189 */
190 if ((szc = brkszc) != 0) {
191 pgsz = page_get_pagesize(szc);
192 ASSERT(pgsz > PAGESIZE);
193 } else {
194 pgsz = PAGESIZE;
195 }
196
197 mutex_enter(&p->p_lock);
198 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
199 p->p_rctls, p);
200 mutex_exit(&p->p_lock);
201
202 /*
203 * If p_brkbase has not yet been set, the first call
204 * to brk() will initialize it.
205 */
206 if (p->p_brkbase == 0)
207 p->p_brkbase = nva;
208
209 /*
210 * Before multiple page size support existed p_brksize was the value
211 * not rounded to the pagesize (i.e. it stored the exact user request
212 * for heap size). If pgsz is greater than PAGESIZE calculate the
213 * heap size as the real new heap size by rounding it up to pgsz.
214 * This is useful since we may want to know where the heap ends
215 * without knowing heap pagesize (e.g. some old code) and also if
216 * heap pagesize changes we can update p_brkpageszc but delay adding
217 * new mapping yet still know from p_brksize where the heap really
218 * ends. The user requested heap end is stored in libc variable.
219 */
220 if (pgsz > PAGESIZE) {
221 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
222 size = tnva - p->p_brkbase;
223 if (tnva < p->p_brkbase || (size > p->p_brksize &&
224 size > (size_t)as_rctl)) {
225 szc = 0;
226 pgsz = PAGESIZE;
227 size = nva - p->p_brkbase;
228 }
229 } else {
230 size = nva - p->p_brkbase;
231 }
232
233 /*
234 * use PAGESIZE to roundup ova because we want to know the real value
235 * of the current heap end in case p_brkpageszc changes since the last
236 * p_brksize was computed.
237 */
238 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
239 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
240 PAGESIZE);
241
242 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
243 size > as_rctl)) {
244 mutex_enter(&p->p_lock);
245 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
246 RCA_SAFE);
247 mutex_exit(&p->p_lock);
248 return (ENOMEM);
249 }
250
251 if (nva > ova) {
252 struct segvn_crargs crargs =
253 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
254
255 if (!(p->p_datprot & PROT_EXEC)) {
256 crargs.prot &= ~PROT_EXEC;
257 }
258
259 /*
260 * Add new zfod mapping to extend UNIX data segment
261 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
262 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
263 * page sizes if ova is not aligned to szc's pgsz.
264 */
265 if (szc > 0) {
266 caddr_t rbss;
267
268 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
269 pgsz);
270 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
271 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
272 AS_MAP_NO_LPOOB;
273 } else if (ova == rbss) {
274 crargs.szc = szc;
275 } else {
276 crargs.szc = AS_MAP_HEAP;
277 }
278 } else {
279 crargs.szc = AS_MAP_NO_LPOOB;
280 }
281 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
282 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
283 &crargs);
284 if (error) {
285 return (error);
286 }
287
288 } else if (nva < ova) {
289 /*
290 * Release mapping to shrink UNIX data segment.
291 */
292 (void) as_unmap(as, nva, (size_t)(ova - nva));
293 }
294 p->p_brksize = size;
295 return (0);
296 }
297
298 /*
299 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
300 * This routine assumes that the stack grows downward.
301 */
302 int
303 grow(caddr_t sp)
304 {
305 struct proc *p = curproc;
306 struct as *as = p->p_as;
307 size_t oldsize = p->p_stksize;
308 size_t newsize;
309 int err;
310
311 /*
312 * Serialize grow operations on an address space.
313 * This also serves as the lock protecting p_stksize
314 * and p_stkpageszc.
315 */
316 as_rangelock(as);
317 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
318 err = grow_lpg(sp);
319 } else {
320 err = grow_internal(sp, p->p_stkpageszc);
321 }
322 as_rangeunlock(as);
323
324 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
325 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
326 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
327 /*
328 * Set up translations so the process doesn't have to fault in
329 * the stack pages we just gave it.
330 */
331 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
332 newsize - oldsize, F_INVAL, S_WRITE);
333 }
334 return ((err == 0 ? 1 : 0));
335 }
336
337 /*
338 * Algorithm: call arch-specific map_pgsz to get best page size to use,
339 * then call grow_internal().
340 * Returns 0 on success.
341 */
342 static int
343 grow_lpg(caddr_t sp)
344 {
345 struct proc *p = curproc;
346 size_t pgsz;
347 size_t len, newsize;
348 caddr_t addr, saddr;
349 caddr_t growend;
350 int oszc, szc;
351 int err;
352
353 newsize = p->p_usrstack - sp;
354
355 oszc = p->p_stkpageszc;
356 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
357 szc = page_szc(pgsz);
358
359 /*
360 * Covers two cases:
361 * 1. page_szc() returns -1 for invalid page size, so we want to
362 * ignore it in that case.
363 * 2. By design we never decrease page size, as it is more stable.
364 * This shouldn't happen as the stack never shrinks.
365 */
366 if (szc <= oszc) {
367 err = grow_internal(sp, oszc);
368 /* failed, fall back to base page size */
369 if (err != 0 && oszc != 0) {
370 err = grow_internal(sp, 0);
371 }
372 return (err);
373 }
374
375 /*
376 * We've grown sufficiently to switch to a new page size.
377 * So we are going to remap the whole segment with the new page size.
378 */
379 err = grow_internal(sp, szc);
380 /* The grow with szc failed, so fall back to base page size. */
381 if (err != 0) {
382 if (szc != 0) {
383 err = grow_internal(sp, 0);
384 }
385 return (err);
386 }
387
388 /*
389 * Round up stack pointer to a large page boundary and remap
390 * any pgsz pages in the segment already faulted in beyond that
391 * point.
392 */
393 saddr = p->p_usrstack - p->p_stksize;
394 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
395 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
396 len = growend - addr;
397 /* Check that len is not negative. Update page size code for stack. */
398 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
399 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
400 p->p_stkpageszc = szc;
401 }
402
403 ASSERT(err == 0);
404 return (err); /* should always be 0 */
405 }
406
407 /*
408 * This routine assumes that the stack grows downward.
409 * Returns 0 on success, errno on failure.
410 */
411 int
412 grow_internal(caddr_t sp, uint_t growszc)
413 {
414 struct proc *p = curproc;
415 size_t newsize;
416 size_t oldsize;
417 int error;
418 size_t pgsz;
419 uint_t szc;
420 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
421
422 ASSERT(sp < p->p_usrstack);
423 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
424
425 /*
426 * grow to growszc alignment but use current p->p_stkpageszc for
427 * the segvn_crargs szc passed to segvn_create. For memcntl to
428 * increase the szc, this allows the new extension segment to be
429 * concatenated successfully with the existing stack segment.
430 */
431 if ((szc = growszc) != 0) {
432 pgsz = page_get_pagesize(szc);
433 ASSERT(pgsz > PAGESIZE);
434 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
435 if (newsize > (size_t)p->p_stk_ctl) {
436 szc = 0;
437 pgsz = PAGESIZE;
438 newsize = p->p_usrstack - sp;
439 }
440 } else {
441 pgsz = PAGESIZE;
442 newsize = p->p_usrstack - sp;
443 }
444
445 if (newsize > (size_t)p->p_stk_ctl) {
446 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
447 RCA_UNSAFE_ALL);
448
449 return (ENOMEM);
450 }
451
452 oldsize = p->p_stksize;
453 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
454
455 if (newsize <= oldsize) { /* prevent the stack from shrinking */
456 return (0);
457 }
458
459 if (!(p->p_stkprot & PROT_EXEC)) {
460 crargs.prot &= ~PROT_EXEC;
461 }
462 /*
463 * extend stack with the proposed new growszc, which is different
464 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
465 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
466 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
467 * if not aligned to szc's pgsz.
468 */
469 if (szc > 0) {
470 caddr_t oldsp = p->p_usrstack - oldsize;
471 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
472 pgsz);
473
474 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
475 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
476 AS_MAP_NO_LPOOB;
477 } else if (oldsp == austk) {
478 crargs.szc = szc;
479 } else {
480 crargs.szc = AS_MAP_STACK;
481 }
482 } else {
483 crargs.szc = AS_MAP_NO_LPOOB;
484 }
485 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
486
487 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
488 segvn_create, &crargs)) != 0) {
489 if (error == EAGAIN) {
490 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
491 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
492 }
493 return (error);
494 }
495 p->p_stksize = newsize;
496 return (0);
497 }
498
499 /*
500 * Find address for user to map. If MAP_FIXED is not specified, we can pick
501 * any address we want, but we will first try the value in *addrp if it is
502 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to
503 * try and get a preferred address.
504 */
505 int
506 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
507 int vacalign, uint_t flags)
508 {
509 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
510 size_t lenp = len;
511
512 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
513 if (flags & MAP_FIXED) {
514 (void) as_unmap(as, *addrp, len);
515 return (0);
516 } else if (basep != NULL &&
517 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
518 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
519 /* User supplied address was available */
520 *addrp = basep;
521 } else {
522 /*
523 * No user supplied address or the address supplied was not
524 * available.
525 */
526 map_addr(addrp, len, off, vacalign, flags);
527 }
528 if (*addrp == NULL)
529 return (ENOMEM);
530 return (0);
531 }
532
533
534 /*
535 * Used for MAP_ANON - fast way to get anonymous pages
536 */
537 static int
538 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
539 offset_t pos)
540 {
541 struct segvn_crargs vn_a;
542 int error;
543
544 if (((PROT_ALL & uprot) != uprot))
545 return (EACCES);
546
547 if ((flags & MAP_FIXED) != 0) {
548 caddr_t userlimit;
549
550 /*
551 * Use the user address. First verify that
552 * the address to be used is page aligned.
553 * Then make some simple bounds checks.
554 */
555 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
556 return (EINVAL);
557
558 userlimit = flags & _MAP_LOW32 ?
559 (caddr_t)USERLIMIT32 : as->a_userlimit;
560 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
561 case RANGE_OKAY:
562 break;
563 case RANGE_BADPROT:
564 return (ENOTSUP);
565 case RANGE_BADADDR:
566 default:
567 return (ENOMEM);
568 }
569 }
570 /*
571 * No need to worry about vac alignment for anonymous
572 * pages since this is a "clone" object that doesn't
573 * yet exist.
574 */
575 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
576 if (error != 0) {
577 return (error);
578 }
579
580 /*
581 * Use the seg_vn segment driver; passing in the NULL amp
582 * gives the desired "cloning" effect.
583 */
584 vn_a.vp = NULL;
585 vn_a.offset = 0;
586 vn_a.type = flags & MAP_TYPE;
587 vn_a.prot = uprot;
588 vn_a.maxprot = PROT_ALL;
589 vn_a.flags = flags & ~MAP_TYPE;
590 vn_a.cred = CRED();
591 vn_a.amp = NULL;
592 vn_a.szc = 0;
593 vn_a.lgrp_mem_policy_flags = 0;
594
595 return (as_map(as, *addrp, len, segvn_create, &vn_a));
596 }
597
598 static int
599 smmap_common(caddr_t *addrp, size_t len,
600 int prot, int flags, struct file *fp, offset_t pos)
601 {
602 struct vnode *vp;
603 struct as *as = curproc->p_as;
604 uint_t uprot, maxprot, type;
605 int error;
606 int in_crit = 0;
607
608 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
609 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
610 MAP_TEXT | MAP_INITDATA)) != 0) {
611 /* | MAP_RENAME */ /* not implemented, let user know */
612 return (EINVAL);
613 }
614
615 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
616 return (EINVAL);
617 }
618
619 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
620 return (EINVAL);
621 }
622
623 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
624 (MAP_FIXED | _MAP_RANDOMIZE)) {
625 return (EINVAL);
626 }
627
628 /*
629 * If it's not a fixed allocation and mmap ASLR is enabled, randomize
630 * it.
631 */
632 if (((flags & MAP_FIXED) == 0) &&
633 secflag_enabled(curproc, PROC_SEC_ASLR))
634 flags |= _MAP_RANDOMIZE;
635
636 #if defined(__sparc)
637 /*
638 * See if this is an "old mmap call". If so, remember this
639 * fact and convert the flags value given to mmap to indicate
640 * the specified address in the system call must be used.
641 * _MAP_NEW is turned set by all new uses of mmap.
642 */
643 if ((flags & _MAP_NEW) == 0)
644 flags |= MAP_FIXED;
645 #endif
646 flags &= ~_MAP_NEW;
647
648 type = flags & MAP_TYPE;
649 if (type != MAP_PRIVATE && type != MAP_SHARED)
650 return (EINVAL);
651
652
653 if (flags & MAP_ALIGN) {
654 if (flags & MAP_FIXED)
655 return (EINVAL);
656
657 /* alignment needs to be a power of 2 >= page size */
658 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
659 !ISP2((uintptr_t)*addrp))
660 return (EINVAL);
661 }
662 /*
663 * Check for bad lengths and file position.
664 * We let the VOP_MAP routine check for negative lengths
665 * since on some vnode types this might be appropriate.
666 */
667 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
668 return (EINVAL);
669
670 maxprot = PROT_ALL; /* start out allowing all accesses */
671 uprot = prot | PROT_USER;
672
673 if (fp == NULL) {
674 ASSERT(flags & MAP_ANON);
675 /* discard lwpchan mappings, like munmap() */
676 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
677 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
678 as_rangelock(as);
679 error = zmap(as, addrp, len, uprot, flags, pos);
680 as_rangeunlock(as);
681 /*
682 * Tell machine specific code that lwp has mapped shared memory
683 */
684 if (error == 0 && (flags & MAP_SHARED)) {
685 /* EMPTY */
686 LWP_MMODEL_SHARED_AS(*addrp, len);
687 }
688 return (error);
689 } else if ((flags & MAP_ANON) != 0)
690 return (EINVAL);
691
692 vp = fp->f_vnode;
693
694 /* Can't execute code from "noexec" mounted filesystem. */
695 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
696 maxprot &= ~PROT_EXEC;
697
698 /*
699 * These checks were added as part of large files.
700 *
701 * Return ENXIO if the initial position is negative; return EOVERFLOW
702 * if (offset + len) would overflow the maximum allowed offset for the
703 * type of file descriptor being used.
704 */
705 if (vp->v_type == VREG) {
706 if (pos < 0)
707 return (ENXIO);
708 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
709 return (EOVERFLOW);
710 }
711
712 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
713 /* no write access allowed */
714 maxprot &= ~PROT_WRITE;
715 }
716
717 /*
718 * XXX - Do we also adjust maxprot based on protections
719 * of the vnode? E.g. if no execute permission is given
720 * on the vnode for the current user, maxprot probably
721 * should disallow PROT_EXEC also? This is different
722 * from the write access as this would be a per vnode
723 * test as opposed to a per fd test for writability.
724 */
725
726 /*
727 * Verify that the specified protections are not greater than
728 * the maximum allowable protections. Also test to make sure
729 * that the file descriptor does allows for read access since
730 * "write only" mappings are hard to do since normally we do
731 * the read from the file before the page can be written.
732 */
733 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
734 return (EACCES);
735
736 /*
737 * If the user specified an address, do some simple checks here
738 */
739 if ((flags & MAP_FIXED) != 0) {
740 caddr_t userlimit;
741
742 /*
743 * Use the user address. First verify that
744 * the address to be used is page aligned.
745 * Then make some simple bounds checks.
746 */
747 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
748 return (EINVAL);
749
750 userlimit = flags & _MAP_LOW32 ?
751 (caddr_t)USERLIMIT32 : as->a_userlimit;
752 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
753 case RANGE_OKAY:
754 break;
755 case RANGE_BADPROT:
756 return (ENOTSUP);
757 case RANGE_BADADDR:
758 default:
759 return (ENOMEM);
760 }
761 }
762
763 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
764 nbl_need_check(vp)) {
765 int svmand;
766 nbl_op_t nop;
767
768 nbl_start_crit(vp, RW_READER);
769 in_crit = 1;
770 error = nbl_svmand(vp, fp->f_cred, &svmand);
771 if (error != 0)
772 goto done;
773 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
774 if (prot & (PROT_READ | PROT_EXEC)) {
775 nop = NBL_READWRITE;
776 } else {
777 nop = NBL_WRITE;
778 }
779 } else {
780 nop = NBL_READ;
781 }
782 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
783 error = EACCES;
784 goto done;
785 }
786 }
787
788 /* discard lwpchan mappings, like munmap() */
789 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
790 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
791
792 /*
793 * Ok, now let the vnode map routine do its thing to set things up.
794 */
795 error = VOP_MAP(vp, pos, as,
796 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
797
798 if (error == 0) {
799 /*
800 * Tell machine specific code that lwp has mapped shared memory
801 */
802 if (flags & MAP_SHARED) {
803 /* EMPTY */
804 LWP_MMODEL_SHARED_AS(*addrp, len);
805 }
806 if (vp->v_type == VREG &&
807 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
808 /*
809 * Mark this as an executable vnode
810 */
811 mutex_enter(&vp->v_lock);
812 vp->v_flag |= VVMEXEC;
813 mutex_exit(&vp->v_lock);
814 }
815 }
816
817 done:
818 if (in_crit)
819 nbl_end_crit(vp);
820 return (error);
821 }
822
823 #ifdef _LP64
824 /*
825 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
826 *
827 * The "large file" mmap routine mmap64(2) is also mapped to this routine
828 * by the 64-bit version of libc.
829 *
830 * Eventually, this should be the only version, and have smmap_common()
831 * folded back into it again. Some day.
832 */
833 caddr_t
834 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
835 {
836 struct file *fp;
837 int error;
838
839 if (fd == -1 && (flags & MAP_ANON) != 0)
840 error = smmap_common(&addr, len, prot, flags,
841 NULL, (offset_t)pos);
842 else if ((fp = getf(fd)) != NULL) {
843 error = smmap_common(&addr, len, prot, flags,
844 fp, (offset_t)pos);
845 releasef(fd);
846 } else
847 error = EBADF;
848
849 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
850 }
851 #endif /* _LP64 */
852
853 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
854
855 /*
856 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
857 */
858 caddr_t
859 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
860 {
861 struct file *fp;
862 int error;
863 caddr_t a = (caddr_t)(uintptr_t)addr;
864
865 if (flags & _MAP_LOW32)
866 error = EINVAL;
867 else if (fd == -1 && (flags & MAP_ANON) != 0)
868 error = smmap_common(&a, (size_t)len, prot,
869 flags | _MAP_LOW32, NULL, (offset_t)pos);
870 else if ((fp = getf(fd)) != NULL) {
871 error = smmap_common(&a, (size_t)len, prot,
872 flags | _MAP_LOW32, fp, (offset_t)pos);
873 releasef(fd);
874 } else
875 error = EBADF;
876
877 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
878
879 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
880 }
881
882 /*
883 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
884 *
885 * Now things really get ugly because we can't use the C-style
886 * calling convention for more than 6 args, and 64-bit parameter
887 * passing on 32-bit systems is less than clean.
888 */
889
890 struct mmaplf32a {
891 caddr_t addr;
892 size_t len;
893 #ifdef _LP64
894 /*
895 * 32-bit contents, 64-bit cells
896 */
897 uint64_t prot;
898 uint64_t flags;
899 uint64_t fd;
900 uint64_t offhi;
901 uint64_t offlo;
902 #else
903 /*
904 * 32-bit contents, 32-bit cells
905 */
906 uint32_t prot;
907 uint32_t flags;
908 uint32_t fd;
909 uint32_t offhi;
910 uint32_t offlo;
911 #endif
912 };
913
914 int
915 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
916 {
917 struct file *fp;
918 int error;
919 caddr_t a = uap->addr;
920 int flags = (int)uap->flags;
921 int fd = (int)uap->fd;
922 #ifdef _BIG_ENDIAN
923 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
924 #else
925 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
926 #endif
927
928 if (flags & _MAP_LOW32)
929 error = EINVAL;
930 else if (fd == -1 && (flags & MAP_ANON) != 0)
931 error = smmap_common(&a, uap->len, (int)uap->prot,
932 flags | _MAP_LOW32, NULL, off);
933 else if ((fp = getf(fd)) != NULL) {
934 error = smmap_common(&a, uap->len, (int)uap->prot,
935 flags | _MAP_LOW32, fp, off);
936 releasef(fd);
937 } else
938 error = EBADF;
939
940 if (error == 0)
941 rvp->r_val1 = (uintptr_t)a;
942 return (error);
943 }
944
945 #endif /* _SYSCALL32_IMPL || _ILP32 */
946
947 int
948 munmap(caddr_t addr, size_t len)
949 {
950 struct proc *p = curproc;
951 struct as *as = p->p_as;
952
953 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
954 return (set_errno(EINVAL));
955
956 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
957 return (set_errno(EINVAL));
958
959 /*
960 * Discard lwpchan mappings.
961 */
962 if (p->p_lcp != NULL)
963 lwpchan_delete_mapping(p, addr, addr + len);
964 if (as_unmap(as, addr, len) != 0)
965 return (set_errno(EINVAL));
966
967 return (0);
968 }
969
970 int
971 mprotect(caddr_t addr, size_t len, int prot)
972 {
973 struct as *as = curproc->p_as;
974 uint_t uprot = prot | PROT_USER;
975 int error;
976
977 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
978 return (set_errno(EINVAL));
979
980 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
981 case RANGE_OKAY:
982 break;
983 case RANGE_BADPROT:
984 return (set_errno(ENOTSUP));
985 case RANGE_BADADDR:
986 default:
987 return (set_errno(ENOMEM));
988 }
989
990 error = as_setprot(as, addr, len, uprot);
991 if (error)
992 return (set_errno(error));
993 return (0);
994 }
995
996 #define MC_CACHE 128 /* internal result buffer */
997 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
998
999 int
1000 mincore(caddr_t addr, size_t len, char *vecp)
1001 {
1002 struct as *as = curproc->p_as;
1003 caddr_t ea; /* end address of loop */
1004 size_t rl; /* inner result length */
1005 char vec[MC_CACHE]; /* local vector cache */
1006 int error;
1007 model_t model;
1008 long llen;
1009
1010 model = get_udatamodel();
1011 /*
1012 * Validate form of address parameters.
1013 */
1014 if (model == DATAMODEL_NATIVE) {
1015 llen = (long)len;
1016 } else {
1017 llen = (int32_t)(size32_t)len;
1018 }
1019 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1020 return (set_errno(EINVAL));
1021
1022 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1023 return (set_errno(ENOMEM));
1024
1025 /*
1026 * Loop over subranges of interval [addr : addr + len), recovering
1027 * results internally and then copying them out to caller. Subrange
1028 * is based on the size of MC_CACHE, defined above.
1029 */
1030 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1031 error = as_incore(as, addr,
1032 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1033 if (rl != 0) {
1034 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1035 if (copyout(vec, vecp, rl) != 0)
1036 return (set_errno(EFAULT));
1037 vecp += rl;
1038 }
1039 if (error != 0)
1040 return (set_errno(ENOMEM));
1041 }
1042 return (0);
1043 }