1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23
24 /*
25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 #include <sys/types.h>
33 #include <sys/inttypes.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/var.h>
41 #include <sys/proc.h>
42 #include <sys/tuneable.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/cred.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/vm.h>
49 #include <sys/file.h>
50 #include <sys/mman.h>
51 #include <sys/vmparam.h>
52 #include <sys/fcntl.h>
53 #include <sys/lwpchan_impl.h>
54 #include <sys/nbmlock.h>
55
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/seg.h>
59 #include <vm/seg_dev.h>
60 #include <vm/seg_vn.h>
61
62 int use_brk_lpg = 1;
63 int use_stk_lpg = 1;
64
65 static int brk_lpg(caddr_t nva);
66 static int grow_lpg(caddr_t sp);
67
68 int
69 brk(caddr_t nva)
70 {
71 int error;
72 proc_t *p = curproc;
73
74 /*
75 * Serialize brk operations on an address space.
76 * This also serves as the lock protecting p_brksize
77 * and p_brkpageszc.
78 */
79 as_rangelock(p->p_as);
80 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
81 error = brk_lpg(nva);
82 } else {
83 error = brk_internal(nva, p->p_brkpageszc);
84 }
85 as_rangeunlock(p->p_as);
86 return ((error != 0 ? set_errno(error) : 0));
87 }
88
89 /*
90 * Algorithm: call arch-specific map_pgsz to get best page size to use,
91 * then call brk_internal().
92 * Returns 0 on success.
93 */
94 static int
95 brk_lpg(caddr_t nva)
96 {
97 struct proc *p = curproc;
98 size_t pgsz, len;
99 caddr_t addr, brkend;
100 caddr_t bssbase = p->p_bssbase;
101 caddr_t brkbase = p->p_brkbase;
102 int oszc, szc;
103 int err;
104
105 oszc = p->p_brkpageszc;
106
107 /*
108 * If p_brkbase has not yet been set, the first call
109 * to brk_internal() will initialize it.
110 */
111 if (brkbase == 0) {
112 return (brk_internal(nva, oszc));
113 }
114
115 len = nva - bssbase;
116
117 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
118 szc = page_szc(pgsz);
119
120 /*
121 * Covers two cases:
122 * 1. page_szc() returns -1 for invalid page size, so we want to
123 * ignore it in that case.
124 * 2. By design we never decrease page size, as it is more stable.
125 */
126 if (szc <= oszc) {
127 err = brk_internal(nva, oszc);
128 /* If failed, back off to base page size. */
129 if (err != 0 && oszc != 0) {
130 err = brk_internal(nva, 0);
131 }
132 return (err);
133 }
134
135 err = brk_internal(nva, szc);
136 /* If using szc failed, map with base page size and return. */
137 if (err != 0) {
138 if (szc != 0) {
139 err = brk_internal(nva, 0);
140 }
141 return (err);
142 }
143
144 /*
145 * Round up brk base to a large page boundary and remap
146 * anything in the segment already faulted in beyond that
147 * point.
148 */
149 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
150 brkend = brkbase + p->p_brksize;
151 len = brkend - addr;
152 /* Check that len is not negative. Update page size code for heap. */
153 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
154 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
155 p->p_brkpageszc = szc;
156 }
157
158 ASSERT(err == 0);
159 return (err); /* should always be 0 */
160 }
161
162 /*
163 * Returns 0 on success.
164 */
165 int
166 brk_internal(caddr_t nva, uint_t brkszc)
167 {
168 caddr_t ova; /* current break address */
169 size_t size;
170 int error;
171 struct proc *p = curproc;
172 struct as *as = p->p_as;
173 size_t pgsz;
174 uint_t szc;
175 rctl_qty_t as_rctl;
176
177 /*
178 * extend heap to brkszc alignment but use current p->p_brkpageszc
179 * for the newly created segment. This allows the new extension
180 * segment to be concatenated successfully with the existing brk
181 * segment.
182 */
183 if ((szc = brkszc) != 0) {
184 pgsz = page_get_pagesize(szc);
185 ASSERT(pgsz > PAGESIZE);
186 } else {
187 pgsz = PAGESIZE;
188 }
189
190 mutex_enter(&p->p_lock);
191 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
192 p->p_rctls, p);
193 mutex_exit(&p->p_lock);
194
195 /*
196 * If p_brkbase has not yet been set, the first call
197 * to brk() will initialize it.
198 */
199 if (p->p_brkbase == 0)
200 p->p_brkbase = nva;
201
202 /*
203 * Before multiple page size support existed p_brksize was the value
204 * not rounded to the pagesize (i.e. it stored the exact user request
205 * for heap size). If pgsz is greater than PAGESIZE calculate the
206 * heap size as the real new heap size by rounding it up to pgsz.
207 * This is useful since we may want to know where the heap ends
208 * without knowing heap pagesize (e.g. some old code) and also if
209 * heap pagesize changes we can update p_brkpageszc but delay adding
210 * new mapping yet still know from p_brksize where the heap really
211 * ends. The user requested heap end is stored in libc variable.
212 */
213 if (pgsz > PAGESIZE) {
214 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
215 size = tnva - p->p_brkbase;
216 if (tnva < p->p_brkbase || (size > p->p_brksize &&
217 size > (size_t)as_rctl)) {
218 szc = 0;
219 pgsz = PAGESIZE;
220 size = nva - p->p_brkbase;
221 }
222 } else {
223 size = nva - p->p_brkbase;
224 }
225
226 /*
227 * use PAGESIZE to roundup ova because we want to know the real value
228 * of the current heap end in case p_brkpageszc changes since the last
229 * p_brksize was computed.
230 */
231 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
233 PAGESIZE);
234
235 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
236 size > as_rctl)) {
237 mutex_enter(&p->p_lock);
238 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
239 RCA_SAFE);
240 mutex_exit(&p->p_lock);
241 return (ENOMEM);
242 }
243
244 if (nva > ova) {
245 struct segvn_crargs crargs =
246 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
247
248 if (!(p->p_datprot & PROT_EXEC)) {
249 crargs.prot &= ~PROT_EXEC;
250 }
251
252 /*
253 * Add new zfod mapping to extend UNIX data segment
254 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
255 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
256 * page sizes if ova is not aligned to szc's pgsz.
257 */
258 if (szc > 0) {
259 caddr_t rbss;
260
261 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
262 pgsz);
263 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
264 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
265 AS_MAP_NO_LPOOB;
266 } else if (ova == rbss) {
267 crargs.szc = szc;
268 } else {
269 crargs.szc = AS_MAP_HEAP;
270 }
271 } else {
272 crargs.szc = AS_MAP_NO_LPOOB;
273 }
274 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
275 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
276 &crargs);
277 if (error) {
278 return (error);
279 }
280
281 } else if (nva < ova) {
282 /*
283 * Release mapping to shrink UNIX data segment.
284 */
285 (void) as_unmap(as, nva, (size_t)(ova - nva));
286 }
287 p->p_brksize = size;
288 return (0);
289 }
290
291 /*
292 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
293 * This routine assumes that the stack grows downward.
294 */
295 int
296 grow(caddr_t sp)
297 {
298 struct proc *p = curproc;
299 struct as *as = p->p_as;
300 size_t oldsize = p->p_stksize;
301 size_t newsize;
302 int err;
303
304 /*
305 * Serialize grow operations on an address space.
306 * This also serves as the lock protecting p_stksize
307 * and p_stkpageszc.
308 */
309 as_rangelock(as);
310 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
311 err = grow_lpg(sp);
312 } else {
313 err = grow_internal(sp, p->p_stkpageszc);
314 }
315 as_rangeunlock(as);
316
317 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
318 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
319 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
320 /*
321 * Set up translations so the process doesn't have to fault in
322 * the stack pages we just gave it.
323 */
324 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
325 newsize - oldsize, F_INVAL, S_WRITE);
326 }
327 return ((err == 0 ? 1 : 0));
328 }
329
330 /*
331 * Algorithm: call arch-specific map_pgsz to get best page size to use,
332 * then call grow_internal().
333 * Returns 0 on success.
334 */
335 static int
336 grow_lpg(caddr_t sp)
337 {
338 struct proc *p = curproc;
339 size_t pgsz;
340 size_t len, newsize;
341 caddr_t addr, saddr;
342 caddr_t growend;
343 int oszc, szc;
344 int err;
345
346 newsize = p->p_usrstack - sp;
347
348 oszc = p->p_stkpageszc;
349 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
350 szc = page_szc(pgsz);
351
352 /*
353 * Covers two cases:
354 * 1. page_szc() returns -1 for invalid page size, so we want to
355 * ignore it in that case.
356 * 2. By design we never decrease page size, as it is more stable.
357 * This shouldn't happen as the stack never shrinks.
358 */
359 if (szc <= oszc) {
360 err = grow_internal(sp, oszc);
361 /* failed, fall back to base page size */
362 if (err != 0 && oszc != 0) {
363 err = grow_internal(sp, 0);
364 }
365 return (err);
366 }
367
368 /*
369 * We've grown sufficiently to switch to a new page size.
370 * So we are going to remap the whole segment with the new page size.
371 */
372 err = grow_internal(sp, szc);
373 /* The grow with szc failed, so fall back to base page size. */
374 if (err != 0) {
375 if (szc != 0) {
376 err = grow_internal(sp, 0);
377 }
378 return (err);
379 }
380
381 /*
382 * Round up stack pointer to a large page boundary and remap
383 * any pgsz pages in the segment already faulted in beyond that
384 * point.
385 */
386 saddr = p->p_usrstack - p->p_stksize;
387 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
388 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
389 len = growend - addr;
390 /* Check that len is not negative. Update page size code for stack. */
391 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
392 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
393 p->p_stkpageszc = szc;
394 }
395
396 ASSERT(err == 0);
397 return (err); /* should always be 0 */
398 }
399
400 /*
401 * This routine assumes that the stack grows downward.
402 * Returns 0 on success, errno on failure.
403 */
404 int
405 grow_internal(caddr_t sp, uint_t growszc)
406 {
407 struct proc *p = curproc;
408 size_t newsize;
409 size_t oldsize;
410 int error;
411 size_t pgsz;
412 uint_t szc;
413 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
414
415 ASSERT(sp < p->p_usrstack);
416 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417
418 /*
419 * grow to growszc alignment but use current p->p_stkpageszc for
420 * the segvn_crargs szc passed to segvn_create. For memcntl to
421 * increase the szc, this allows the new extension segment to be
422 * concatenated successfully with the existing stack segment.
423 */
424 if ((szc = growszc) != 0) {
425 pgsz = page_get_pagesize(szc);
426 ASSERT(pgsz > PAGESIZE);
427 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
428 if (newsize > (size_t)p->p_stk_ctl) {
429 szc = 0;
430 pgsz = PAGESIZE;
431 newsize = p->p_usrstack - sp;
432 }
433 } else {
434 pgsz = PAGESIZE;
435 newsize = p->p_usrstack - sp;
436 }
437
438 if (newsize > (size_t)p->p_stk_ctl) {
439 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
440 RCA_UNSAFE_ALL);
441
442 return (ENOMEM);
443 }
444
445 oldsize = p->p_stksize;
446 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
447
448 if (newsize <= oldsize) { /* prevent the stack from shrinking */
449 return (0);
450 }
451
452 if (!(p->p_stkprot & PROT_EXEC)) {
453 crargs.prot &= ~PROT_EXEC;
454 }
455 /*
456 * extend stack with the proposed new growszc, which is different
457 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
458 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
459 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
460 * if not aligned to szc's pgsz.
461 */
462 if (szc > 0) {
463 caddr_t oldsp = p->p_usrstack - oldsize;
464 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
465 pgsz);
466
467 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
468 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
469 AS_MAP_NO_LPOOB;
470 } else if (oldsp == austk) {
471 crargs.szc = szc;
472 } else {
473 crargs.szc = AS_MAP_STACK;
474 }
475 } else {
476 crargs.szc = AS_MAP_NO_LPOOB;
477 }
478 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
479
480 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
481 segvn_create, &crargs)) != 0) {
482 if (error == EAGAIN) {
483 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
484 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
485 }
486 return (error);
487 }
488 p->p_stksize = newsize;
489 return (0);
490 }
491
492 /*
493 * Find address for user to map.
494 * If MAP_FIXED is not specified, we can pick any address we want, but we will
495 * first try the value in *addrp if it is non-NULL. Thus this is implementing
496 * a way to try and get a preferred address.
497 */
498 int
499 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
500 int vacalign, uint_t flags)
501 {
502 proc_t *p = curproc;
503 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
504 size_t lenp;
505
506 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
507
508 /*
509 * If we have been provided a hint, we should still expand the lenp
510 * to be the rest of the address space. This will allow us to
511 * treat the hint as a strong desire to be "nearby" the provided
512 * address. If we can't satisfy the hint, as_gap() will walk forward.
513 */
514 if (flags & _MAP_LOW32)
515 lenp = (caddr_t)USERLIMIT32 - basep;
516 #if defined(__amd64)
517 else if (p->p_model == DATAMODEL_NATIVE)
518 lenp = p->p_usrstack - basep -
519 ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
520 #endif
521 else
522 lenp = as->a_userlimit - basep;
523
524 if (flags & MAP_FIXED) {
525 (void) as_unmap(as, *addrp, len);
526 return (0);
527 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
528 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
529 /* User supplied address was available */
530 *addrp = basep;
531 } else {
532 /*
533 * No user supplied address or the address supplied was not
534 * available.
535 */
536 map_addr(addrp, len, off, vacalign, flags);
537 }
538 if (*addrp == NULL)
539 return (ENOMEM);
540 return (0);
541 }
542
543
544 /*
545 * Used for MAP_ANON - fast way to get anonymous pages
546 */
547 static int
548 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
549 offset_t pos)
550 {
551 struct segvn_crargs vn_a;
552 int error;
553
554 if (((PROT_ALL & uprot) != uprot))
555 return (EACCES);
556
557 if ((flags & MAP_FIXED) != 0) {
558 caddr_t userlimit;
559
560 /*
561 * Use the user address. First verify that
562 * the address to be used is page aligned.
563 * Then make some simple bounds checks.
564 */
565 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
566 return (EINVAL);
567
568 userlimit = flags & _MAP_LOW32 ?
569 (caddr_t)USERLIMIT32 : as->a_userlimit;
570 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
571 case RANGE_OKAY:
572 break;
573 case RANGE_BADPROT:
574 return (ENOTSUP);
575 case RANGE_BADADDR:
576 default:
577 return (ENOMEM);
578 }
579 }
580 /*
581 * No need to worry about vac alignment for anonymous
582 * pages since this is a "clone" object that doesn't
583 * yet exist.
584 */
585 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
586 if (error != 0) {
587 return (error);
588 }
589
590 /*
591 * Use the seg_vn segment driver; passing in the NULL amp
592 * gives the desired "cloning" effect.
593 */
594 vn_a.vp = NULL;
595 vn_a.offset = 0;
596 vn_a.type = flags & MAP_TYPE;
597 vn_a.prot = uprot;
598 vn_a.maxprot = PROT_ALL;
599 vn_a.flags = flags & ~MAP_TYPE;
600 vn_a.cred = CRED();
601 vn_a.amp = NULL;
602 vn_a.szc = 0;
603 vn_a.lgrp_mem_policy_flags = 0;
604
605 return (as_map(as, *addrp, len, segvn_create, &vn_a));
606 }
607
608 static int
609 smmap_common(caddr_t *addrp, size_t len,
610 int prot, int flags, struct file *fp, offset_t pos)
611 {
612 struct vnode *vp;
613 struct as *as = curproc->p_as;
614 uint_t uprot, maxprot, type;
615 int error;
616 int in_crit = 0;
617
618 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
619 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
620 MAP_TEXT | MAP_INITDATA)) != 0) {
621 /* | MAP_RENAME */ /* not implemented, let user know */
622 return (EINVAL);
623 }
624
625 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
626 return (EINVAL);
627 }
628
629 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
630 return (EINVAL);
631 }
632
633 #if defined(__sparc)
634 /*
635 * See if this is an "old mmap call". If so, remember this
636 * fact and convert the flags value given to mmap to indicate
637 * the specified address in the system call must be used.
638 * _MAP_NEW is turned set by all new uses of mmap.
639 */
640 if ((flags & _MAP_NEW) == 0)
641 flags |= MAP_FIXED;
642 #endif
643 flags &= ~_MAP_NEW;
644
645 type = flags & MAP_TYPE;
646 if (type != MAP_PRIVATE && type != MAP_SHARED)
647 return (EINVAL);
648
649
650 if (flags & MAP_ALIGN) {
651
652 if (flags & MAP_FIXED)
653 return (EINVAL);
654
655 /* alignment needs to be a power of 2 >= page size */
656 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
657 !ISP2((uintptr_t)*addrp))
658 return (EINVAL);
659 }
660 /*
661 * Check for bad lengths and file position.
662 * We let the VOP_MAP routine check for negative lengths
663 * since on some vnode types this might be appropriate.
664 */
665 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
666 return (EINVAL);
667
668 maxprot = PROT_ALL; /* start out allowing all accesses */
669 uprot = prot | PROT_USER;
670
671 if (fp == NULL) {
672 ASSERT(flags & MAP_ANON);
673 /* discard lwpchan mappings, like munmap() */
674 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
675 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
676 as_rangelock(as);
677 error = zmap(as, addrp, len, uprot, flags, pos);
678 as_rangeunlock(as);
679 /*
680 * Tell machine specific code that lwp has mapped shared memory
681 */
682 if (error == 0 && (flags & MAP_SHARED)) {
683 /* EMPTY */
684 LWP_MMODEL_SHARED_AS(*addrp, len);
685 }
686 return (error);
687 } else if ((flags & MAP_ANON) != 0)
688 return (EINVAL);
689
690 vp = fp->f_vnode;
691
692 /* Can't execute code from "noexec" mounted filesystem. */
693 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
694 maxprot &= ~PROT_EXEC;
695
696 /*
697 * These checks were added as part of large files.
698 *
699 * Return ENXIO if the initial position is negative; return EOVERFLOW
700 * if (offset + len) would overflow the maximum allowed offset for the
701 * type of file descriptor being used.
702 */
703 if (vp->v_type == VREG) {
704 if (pos < 0)
705 return (ENXIO);
706 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
707 return (EOVERFLOW);
708 }
709
710 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
711 /* no write access allowed */
712 maxprot &= ~PROT_WRITE;
713 }
714
715 /*
716 * XXX - Do we also adjust maxprot based on protections
717 * of the vnode? E.g. if no execute permission is given
718 * on the vnode for the current user, maxprot probably
719 * should disallow PROT_EXEC also? This is different
720 * from the write access as this would be a per vnode
721 * test as opposed to a per fd test for writability.
722 */
723
724 /*
725 * Verify that the specified protections are not greater than
726 * the maximum allowable protections. Also test to make sure
727 * that the file descriptor does allows for read access since
728 * "write only" mappings are hard to do since normally we do
729 * the read from the file before the page can be written.
730 */
731 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
732 return (EACCES);
733
734 /*
735 * If the user specified an address, do some simple checks here
736 */
737 if ((flags & MAP_FIXED) != 0) {
738 caddr_t userlimit;
739
740 /*
741 * Use the user address. First verify that
742 * the address to be used is page aligned.
743 * Then make some simple bounds checks.
744 */
745 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
746 return (EINVAL);
747
748 userlimit = flags & _MAP_LOW32 ?
749 (caddr_t)USERLIMIT32 : as->a_userlimit;
750 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
751 case RANGE_OKAY:
752 break;
753 case RANGE_BADPROT:
754 return (ENOTSUP);
755 case RANGE_BADADDR:
756 default:
757 return (ENOMEM);
758 }
759 }
760
761 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
762 nbl_need_check(vp)) {
763 int svmand;
764 nbl_op_t nop;
765
766 nbl_start_crit(vp, RW_READER);
767 in_crit = 1;
768 error = nbl_svmand(vp, fp->f_cred, &svmand);
769 if (error != 0)
770 goto done;
771 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
772 if (prot & (PROT_READ | PROT_EXEC)) {
773 nop = NBL_READWRITE;
774 } else {
775 nop = NBL_WRITE;
776 }
777 } else {
778 nop = NBL_READ;
779 }
780 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
781 error = EACCES;
782 goto done;
783 }
784 }
785
786 /* discard lwpchan mappings, like munmap() */
787 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
788 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
789
790 /*
791 * Ok, now let the vnode map routine do its thing to set things up.
792 */
793 error = VOP_MAP(vp, pos, as,
794 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
795
796 if (error == 0) {
797 /*
798 * Tell machine specific code that lwp has mapped shared memory
799 */
800 if (flags & MAP_SHARED) {
801 /* EMPTY */
802 LWP_MMODEL_SHARED_AS(*addrp, len);
803 }
804 if (vp->v_type == VREG &&
805 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
806 /*
807 * Mark this as an executable vnode
808 */
809 mutex_enter(&vp->v_lock);
810 vp->v_flag |= VVMEXEC;
811 mutex_exit(&vp->v_lock);
812 }
813 }
814
815 done:
816 if (in_crit)
817 nbl_end_crit(vp);
818 return (error);
819 }
820
821 #ifdef _LP64
822 /*
823 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
824 *
825 * The "large file" mmap routine mmap64(2) is also mapped to this routine
826 * by the 64-bit version of libc.
827 *
828 * Eventually, this should be the only version, and have smmap_common()
829 * folded back into it again. Some day.
830 */
831 caddr_t
832 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
833 {
834 struct file *fp;
835 int error;
836
837 if (fd == -1 && (flags & MAP_ANON) != 0)
838 error = smmap_common(&addr, len, prot, flags,
839 NULL, (offset_t)pos);
840 else if ((fp = getf(fd)) != NULL) {
841 error = smmap_common(&addr, len, prot, flags,
842 fp, (offset_t)pos);
843 releasef(fd);
844 } else
845 error = EBADF;
846
847 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
848 }
849 #endif /* _LP64 */
850
851 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
852
853 /*
854 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
855 */
856 caddr_t
857 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
858 {
859 struct file *fp;
860 int error;
861 caddr_t a = (caddr_t)(uintptr_t)addr;
862
863 if (flags & _MAP_LOW32)
864 error = EINVAL;
865 else if (fd == -1 && (flags & MAP_ANON) != 0)
866 error = smmap_common(&a, (size_t)len, prot,
867 flags | _MAP_LOW32, NULL, (offset_t)pos);
868 else if ((fp = getf(fd)) != NULL) {
869 error = smmap_common(&a, (size_t)len, prot,
870 flags | _MAP_LOW32, fp, (offset_t)pos);
871 releasef(fd);
872 } else
873 error = EBADF;
874
875 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
876
877 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
878 }
879
880 /*
881 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
882 *
883 * Now things really get ugly because we can't use the C-style
884 * calling convention for more than 6 args, and 64-bit parameter
885 * passing on 32-bit systems is less than clean.
886 */
887
888 struct mmaplf32a {
889 caddr_t addr;
890 size_t len;
891 #ifdef _LP64
892 /*
893 * 32-bit contents, 64-bit cells
894 */
895 uint64_t prot;
896 uint64_t flags;
897 uint64_t fd;
898 uint64_t offhi;
899 uint64_t offlo;
900 #else
901 /*
902 * 32-bit contents, 32-bit cells
903 */
904 uint32_t prot;
905 uint32_t flags;
906 uint32_t fd;
907 uint32_t offhi;
908 uint32_t offlo;
909 #endif
910 };
911
912 int
913 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
914 {
915 struct file *fp;
916 int error;
917 caddr_t a = uap->addr;
918 int flags = (int)uap->flags;
919 int fd = (int)uap->fd;
920 #ifdef _BIG_ENDIAN
921 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
922 #else
923 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
924 #endif
925
926 if (flags & _MAP_LOW32)
927 error = EINVAL;
928 else if (fd == -1 && (flags & MAP_ANON) != 0)
929 error = smmap_common(&a, uap->len, (int)uap->prot,
930 flags | _MAP_LOW32, NULL, off);
931 else if ((fp = getf(fd)) != NULL) {
932 error = smmap_common(&a, uap->len, (int)uap->prot,
933 flags | _MAP_LOW32, fp, off);
934 releasef(fd);
935 } else
936 error = EBADF;
937
938 if (error == 0)
939 rvp->r_val1 = (uintptr_t)a;
940 return (error);
941 }
942
943 #endif /* _SYSCALL32_IMPL || _ILP32 */
944
945 int
946 munmap(caddr_t addr, size_t len)
947 {
948 struct proc *p = curproc;
949 struct as *as = p->p_as;
950
951 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
952 return (set_errno(EINVAL));
953
954 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
955 return (set_errno(EINVAL));
956
957 /*
958 * Discard lwpchan mappings.
959 */
960 if (p->p_lcp != NULL)
961 lwpchan_delete_mapping(p, addr, addr + len);
962 if (as_unmap(as, addr, len) != 0)
963 return (set_errno(EINVAL));
964
965 return (0);
966 }
967
968 int
969 mprotect(caddr_t addr, size_t len, int prot)
970 {
971 struct as *as = curproc->p_as;
972 uint_t uprot = prot | PROT_USER;
973 int error;
974
975 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
976 return (set_errno(EINVAL));
977
978 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
979 case RANGE_OKAY:
980 break;
981 case RANGE_BADPROT:
982 return (set_errno(ENOTSUP));
983 case RANGE_BADADDR:
984 default:
985 return (set_errno(ENOMEM));
986 }
987
988 error = as_setprot(as, addr, len, uprot);
989 if (error)
990 return (set_errno(error));
991 return (0);
992 }
993
994 #define MC_CACHE 128 /* internal result buffer */
995 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
996
997 int
998 mincore(caddr_t addr, size_t len, char *vecp)
999 {
1000 struct as *as = curproc->p_as;
1001 caddr_t ea; /* end address of loop */
1002 size_t rl; /* inner result length */
1003 char vec[MC_CACHE]; /* local vector cache */
1004 int error;
1005 model_t model;
1006 long llen;
1007
1008 model = get_udatamodel();
1009 /*
1010 * Validate form of address parameters.
1011 */
1012 if (model == DATAMODEL_NATIVE) {
1013 llen = (long)len;
1014 } else {
1015 llen = (int32_t)(size32_t)len;
1016 }
1017 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1018 return (set_errno(EINVAL));
1019
1020 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1021 return (set_errno(ENOMEM));
1022
1023 /*
1024 * Loop over subranges of interval [addr : addr + len), recovering
1025 * results internally and then copying them out to caller. Subrange
1026 * is based on the size of MC_CACHE, defined above.
1027 */
1028 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1029 error = as_incore(as, addr,
1030 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1031 if (rl != 0) {
1032 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1033 if (copyout(vec, vecp, rl) != 0)
1034 return (set_errno(EFAULT));
1035 vecp += rl;
1036 }
1037 if (error != 0)
1038 return (set_errno(ENOMEM));
1039 }
1040 return (0);
1041 }