1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23
24 /*
25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
31
32 #include <sys/types.h>
33 #include <sys/inttypes.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/var.h>
41 #include <sys/proc.h>
42 #include <sys/tuneable.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/cred.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/vm.h>
49 #include <sys/file.h>
50 #include <sys/mman.h>
51 #include <sys/vmparam.h>
52 #include <sys/fcntl.h>
53 #include <sys/lwpchan_impl.h>
54 #include <sys/nbmlock.h>
55
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/seg.h>
59 #include <vm/seg_dev.h>
60 #include <vm/seg_vn.h>
61
62 int use_brk_lpg = 1;
63 int use_stk_lpg = 1;
64
65 /*
66 * If set, we will not randomize mappings where the 'addr' argument is
67 * non-NULL and not an alignment.
68 */
69 int aslr_respect_mmap_hint = 0;
70
71 static int brk_lpg(caddr_t nva);
72 static int grow_lpg(caddr_t sp);
73
74 intptr_t
75 brk(caddr_t nva)
76 {
77 int error;
78 proc_t *p = curproc;
79
80 /*
81 * Serialize brk operations on an address space.
82 * This also serves as the lock protecting p_brksize
83 * and p_brkpageszc.
84 */
85 as_rangelock(p->p_as);
86
87 /*
88 * As a special case to aid the implementation of sbrk(3C), if given a
89 * new brk of 0, return the current brk. We'll hide this in brk(3C).
90 */
91 if (nva == 0) {
92 as_rangeunlock(p->p_as);
93 return ((intptr_t)(p->p_brkbase + p->p_brksize));
94 }
95
96 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
97 error = brk_lpg(nva);
98 } else {
99 error = brk_internal(nva, p->p_brkpageszc);
100 }
101 as_rangeunlock(p->p_as);
102 return ((error != 0 ? set_errno(error) : 0));
103 }
104
105 /*
106 * Algorithm: call arch-specific map_pgsz to get best page size to use,
107 * then call brk_internal().
108 * Returns 0 on success.
109 */
110 static int
111 brk_lpg(caddr_t nva)
112 {
113 struct proc *p = curproc;
114 size_t pgsz, len;
115 caddr_t addr, brkend;
116 caddr_t bssbase = p->p_bssbase;
117 caddr_t brkbase = p->p_brkbase;
118 int oszc, szc;
119 int err;
120
121 oszc = p->p_brkpageszc;
122
123 /*
124 * If p_brkbase has not yet been set, the first call
125 * to brk_internal() will initialize it.
126 */
127 if (brkbase == 0) {
128 return (brk_internal(nva, oszc));
129 }
130
131 len = nva - bssbase;
132
133 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
134 szc = page_szc(pgsz);
135
136 /*
137 * Covers two cases:
138 * 1. page_szc() returns -1 for invalid page size, so we want to
139 * ignore it in that case.
140 * 2. By design we never decrease page size, as it is more stable.
141 */
142 if (szc <= oszc) {
143 err = brk_internal(nva, oszc);
144 /* If failed, back off to base page size. */
145 if (err != 0 && oszc != 0) {
146 err = brk_internal(nva, 0);
147 }
148 return (err);
149 }
150
151 err = brk_internal(nva, szc);
152 /* If using szc failed, map with base page size and return. */
153 if (err != 0) {
154 if (szc != 0) {
155 err = brk_internal(nva, 0);
156 }
157 return (err);
158 }
159
160 /*
161 * Round up brk base to a large page boundary and remap
162 * anything in the segment already faulted in beyond that
163 * point.
164 */
165 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
166 brkend = brkbase + p->p_brksize;
167 len = brkend - addr;
168 /* Check that len is not negative. Update page size code for heap. */
169 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
170 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
171 p->p_brkpageszc = szc;
172 }
173
174 ASSERT(err == 0);
175 return (err); /* should always be 0 */
176 }
177
178 /*
179 * Returns 0 on success.
180 */
181 int
182 brk_internal(caddr_t nva, uint_t brkszc)
183 {
184 caddr_t ova; /* current break address */
185 size_t size;
186 int error;
187 struct proc *p = curproc;
188 struct as *as = p->p_as;
189 size_t pgsz;
190 uint_t szc;
191 rctl_qty_t as_rctl;
192
193 /*
194 * extend heap to brkszc alignment but use current p->p_brkpageszc
195 * for the newly created segment. This allows the new extension
196 * segment to be concatenated successfully with the existing brk
197 * segment.
198 */
199 if ((szc = brkszc) != 0) {
200 pgsz = page_get_pagesize(szc);
201 ASSERT(pgsz > PAGESIZE);
202 } else {
203 pgsz = PAGESIZE;
204 }
205
206 mutex_enter(&p->p_lock);
207 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
208 p->p_rctls, p);
209 mutex_exit(&p->p_lock);
210
211 /*
212 * If p_brkbase has not yet been set, the first call
213 * to brk() will initialize it.
214 */
215 if (p->p_brkbase == 0)
216 p->p_brkbase = nva;
217
218 /*
219 * Before multiple page size support existed p_brksize was the value
220 * not rounded to the pagesize (i.e. it stored the exact user request
221 * for heap size). If pgsz is greater than PAGESIZE calculate the
222 * heap size as the real new heap size by rounding it up to pgsz.
223 * This is useful since we may want to know where the heap ends
224 * without knowing heap pagesize (e.g. some old code) and also if
225 * heap pagesize changes we can update p_brkpageszc but delay adding
226 * new mapping yet still know from p_brksize where the heap really
227 * ends. The user requested heap end is stored in libc variable.
228 */
229 if (pgsz > PAGESIZE) {
230 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
231 size = tnva - p->p_brkbase;
232 if (tnva < p->p_brkbase || (size > p->p_brksize &&
233 size > (size_t)as_rctl)) {
234 szc = 0;
235 pgsz = PAGESIZE;
236 size = nva - p->p_brkbase;
237 }
238 } else {
239 size = nva - p->p_brkbase;
240 }
241
242 /*
243 * use PAGESIZE to roundup ova because we want to know the real value
244 * of the current heap end in case p_brkpageszc changes since the last
245 * p_brksize was computed.
246 */
247 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
248 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
249 PAGESIZE);
250
251 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
252 size > as_rctl)) {
253 mutex_enter(&p->p_lock);
254 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
255 RCA_SAFE);
256 mutex_exit(&p->p_lock);
257 return (ENOMEM);
258 }
259
260 if (nva > ova) {
261 struct segvn_crargs crargs =
262 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
263
264 if (!(p->p_datprot & PROT_EXEC)) {
265 crargs.prot &= ~PROT_EXEC;
266 }
267
268 /*
269 * Add new zfod mapping to extend UNIX data segment
270 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
271 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
272 * page sizes if ova is not aligned to szc's pgsz.
273 */
274 if (szc > 0) {
275 caddr_t rbss;
276
277 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
278 pgsz);
279 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
280 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
281 AS_MAP_NO_LPOOB;
282 } else if (ova == rbss) {
283 crargs.szc = szc;
284 } else {
285 crargs.szc = AS_MAP_HEAP;
286 }
287 } else {
288 crargs.szc = AS_MAP_NO_LPOOB;
289 }
290 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
291 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
292 &crargs);
293 if (error) {
294 return (error);
295 }
296
297 } else if (nva < ova) {
298 /*
299 * Release mapping to shrink UNIX data segment.
300 */
301 (void) as_unmap(as, nva, (size_t)(ova - nva));
302 }
303 p->p_brksize = size;
304 return (0);
305 }
306
307 /*
308 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
309 * This routine assumes that the stack grows downward.
310 */
311 int
312 grow(caddr_t sp)
313 {
314 struct proc *p = curproc;
315 struct as *as = p->p_as;
316 size_t oldsize = p->p_stksize;
317 size_t newsize;
318 int err;
319
320 /*
321 * Serialize grow operations on an address space.
322 * This also serves as the lock protecting p_stksize
323 * and p_stkpageszc.
324 */
325 as_rangelock(as);
326 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
327 err = grow_lpg(sp);
328 } else {
329 err = grow_internal(sp, p->p_stkpageszc);
330 }
331 as_rangeunlock(as);
332
333 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
334 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
335 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
336 /*
337 * Set up translations so the process doesn't have to fault in
338 * the stack pages we just gave it.
339 */
340 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
341 newsize - oldsize, F_INVAL, S_WRITE);
342 }
343 return ((err == 0 ? 1 : 0));
344 }
345
346 /*
347 * Algorithm: call arch-specific map_pgsz to get best page size to use,
348 * then call grow_internal().
349 * Returns 0 on success.
350 */
351 static int
352 grow_lpg(caddr_t sp)
353 {
354 struct proc *p = curproc;
355 size_t pgsz;
356 size_t len, newsize;
357 caddr_t addr, saddr;
358 caddr_t growend;
359 int oszc, szc;
360 int err;
361
362 newsize = p->p_usrstack - sp;
363
364 oszc = p->p_stkpageszc;
365 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
366 szc = page_szc(pgsz);
367
368 /*
369 * Covers two cases:
370 * 1. page_szc() returns -1 for invalid page size, so we want to
371 * ignore it in that case.
372 * 2. By design we never decrease page size, as it is more stable.
373 * This shouldn't happen as the stack never shrinks.
374 */
375 if (szc <= oszc) {
376 err = grow_internal(sp, oszc);
377 /* failed, fall back to base page size */
378 if (err != 0 && oszc != 0) {
379 err = grow_internal(sp, 0);
380 }
381 return (err);
382 }
383
384 /*
385 * We've grown sufficiently to switch to a new page size.
386 * So we are going to remap the whole segment with the new page size.
387 */
388 err = grow_internal(sp, szc);
389 /* The grow with szc failed, so fall back to base page size. */
390 if (err != 0) {
391 if (szc != 0) {
392 err = grow_internal(sp, 0);
393 }
394 return (err);
395 }
396
397 /*
398 * Round up stack pointer to a large page boundary and remap
399 * any pgsz pages in the segment already faulted in beyond that
400 * point.
401 */
402 saddr = p->p_usrstack - p->p_stksize;
403 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
404 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
405 len = growend - addr;
406 /* Check that len is not negative. Update page size code for stack. */
407 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
408 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
409 p->p_stkpageszc = szc;
410 }
411
412 ASSERT(err == 0);
413 return (err); /* should always be 0 */
414 }
415
416 /*
417 * This routine assumes that the stack grows downward.
418 * Returns 0 on success, errno on failure.
419 */
420 int
421 grow_internal(caddr_t sp, uint_t growszc)
422 {
423 struct proc *p = curproc;
424 size_t newsize;
425 size_t oldsize;
426 int error;
427 size_t pgsz;
428 uint_t szc;
429 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
430
431 ASSERT(sp < p->p_usrstack);
432 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
433
434 /*
435 * grow to growszc alignment but use current p->p_stkpageszc for
436 * the segvn_crargs szc passed to segvn_create. For memcntl to
437 * increase the szc, this allows the new extension segment to be
438 * concatenated successfully with the existing stack segment.
439 */
440 if ((szc = growszc) != 0) {
441 pgsz = page_get_pagesize(szc);
442 ASSERT(pgsz > PAGESIZE);
443 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
444 if (newsize > (size_t)p->p_stk_ctl) {
445 szc = 0;
446 pgsz = PAGESIZE;
447 newsize = p->p_usrstack - sp;
448 }
449 } else {
450 pgsz = PAGESIZE;
451 newsize = p->p_usrstack - sp;
452 }
453
454 if (newsize > (size_t)p->p_stk_ctl) {
455 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
456 RCA_UNSAFE_ALL);
457
458 return (ENOMEM);
459 }
460
461 oldsize = p->p_stksize;
462 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
463
464 if (newsize <= oldsize) { /* prevent the stack from shrinking */
465 return (0);
466 }
467
468 if (!(p->p_stkprot & PROT_EXEC)) {
469 crargs.prot &= ~PROT_EXEC;
470 }
471 /*
472 * extend stack with the proposed new growszc, which is different
473 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
474 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
475 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
476 * if not aligned to szc's pgsz.
477 */
478 if (szc > 0) {
479 caddr_t oldsp = p->p_usrstack - oldsize;
480 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
481 pgsz);
482
483 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
484 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
485 AS_MAP_NO_LPOOB;
486 } else if (oldsp == austk) {
487 crargs.szc = szc;
488 } else {
489 crargs.szc = AS_MAP_STACK;
490 }
491 } else {
492 crargs.szc = AS_MAP_NO_LPOOB;
493 }
494 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
495
496 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
497 segvn_create, &crargs)) != 0) {
498 if (error == EAGAIN) {
499 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
500 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
501 }
502 return (error);
503 }
504 p->p_stksize = newsize;
505 return (0);
506 }
507
508 /*
509 * Find address for user to map. If MAP_FIXED is not specified, we can pick
510 * any address we want, but we will first try the value in *addrp if it is
511 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to
512 * try and get a preferred address.
513 */
514 int
515 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
516 int vacalign, uint_t flags)
517 {
518 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
519 size_t lenp = len;
520
521 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
522 if (flags & MAP_FIXED) {
523 (void) as_unmap(as, *addrp, len);
524 return (0);
525 } else if (basep != NULL &&
526 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
527 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
528 /* User supplied address was available */
529 *addrp = basep;
530 } else {
531 /*
532 * No user supplied address or the address supplied was not
533 * available.
534 */
535 map_addr(addrp, len, off, vacalign, flags);
536 }
537 if (*addrp == NULL)
538 return (ENOMEM);
539 return (0);
540 }
541
542
543 /*
544 * Used for MAP_ANON - fast way to get anonymous pages
545 */
546 static int
547 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
548 offset_t pos)
549 {
550 struct segvn_crargs vn_a;
551 int error;
552
553 if (((PROT_ALL & uprot) != uprot))
554 return (EACCES);
555
556 if ((flags & MAP_FIXED) != 0) {
557 caddr_t userlimit;
558
559 /*
560 * Use the user address. First verify that
561 * the address to be used is page aligned.
562 * Then make some simple bounds checks.
563 */
564 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
565 return (EINVAL);
566
567 userlimit = flags & _MAP_LOW32 ?
568 (caddr_t)USERLIMIT32 : as->a_userlimit;
569 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
570 case RANGE_OKAY:
571 break;
572 case RANGE_BADPROT:
573 return (ENOTSUP);
574 case RANGE_BADADDR:
575 default:
576 return (ENOMEM);
577 }
578 }
579 /*
580 * No need to worry about vac alignment for anonymous
581 * pages since this is a "clone" object that doesn't
582 * yet exist.
583 */
584 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
585 if (error != 0) {
586 return (error);
587 }
588
589 /*
590 * Use the seg_vn segment driver; passing in the NULL amp
591 * gives the desired "cloning" effect.
592 */
593 vn_a.vp = NULL;
594 vn_a.offset = 0;
595 vn_a.type = flags & MAP_TYPE;
596 vn_a.prot = uprot;
597 vn_a.maxprot = PROT_ALL;
598 vn_a.flags = flags & ~MAP_TYPE;
599 vn_a.cred = CRED();
600 vn_a.amp = NULL;
601 vn_a.szc = 0;
602 vn_a.lgrp_mem_policy_flags = 0;
603
604 return (as_map(as, *addrp, len, segvn_create, &vn_a));
605 }
606
607 #define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \
608 !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint))
609
610 static int
611 smmap_common(caddr_t *addrp, size_t len,
612 int prot, int flags, struct file *fp, offset_t pos)
613 {
614 struct vnode *vp;
615 struct as *as = curproc->p_as;
616 uint_t uprot, maxprot, type;
617 int error;
618 int in_crit = 0;
619
620 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
621 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
622 MAP_TEXT | MAP_INITDATA)) != 0) {
623 /* | MAP_RENAME */ /* not implemented, let user know */
624 return (EINVAL);
625 }
626
627 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
628 return (EINVAL);
629 }
630
631 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
632 return (EINVAL);
633 }
634
635 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
636 (MAP_FIXED | _MAP_RANDOMIZE)) {
637 return (EINVAL);
638 }
639
640 /*
641 * If it's not a fixed allocation and mmap ASLR is enabled, randomize
642 * it.
643 */
644 if (RANDOMIZABLE_MAPPING(*addrp, flags) &&
645 secflag_enabled(curproc, PROC_SEC_ASLR))
646 flags |= _MAP_RANDOMIZE;
647
648 #if defined(__sparc)
649 /*
650 * See if this is an "old mmap call". If so, remember this
651 * fact and convert the flags value given to mmap to indicate
652 * the specified address in the system call must be used.
653 * _MAP_NEW is turned set by all new uses of mmap.
654 */
655 if ((flags & _MAP_NEW) == 0)
656 flags |= MAP_FIXED;
657 #endif
658 flags &= ~_MAP_NEW;
659
660 type = flags & MAP_TYPE;
661 if (type != MAP_PRIVATE && type != MAP_SHARED)
662 return (EINVAL);
663
664
665 if (flags & MAP_ALIGN) {
666 if (flags & MAP_FIXED)
667 return (EINVAL);
668
669 /* alignment needs to be a power of 2 >= page size */
670 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
671 !ISP2((uintptr_t)*addrp))
672 return (EINVAL);
673 }
674 /*
675 * Check for bad lengths and file position.
676 * We let the VOP_MAP routine check for negative lengths
677 * since on some vnode types this might be appropriate.
678 */
679 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
680 return (EINVAL);
681
682 maxprot = PROT_ALL; /* start out allowing all accesses */
683 uprot = prot | PROT_USER;
684
685 if (fp == NULL) {
686 ASSERT(flags & MAP_ANON);
687 /* discard lwpchan mappings, like munmap() */
688 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
689 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
690 as_rangelock(as);
691 error = zmap(as, addrp, len, uprot, flags, pos);
692 as_rangeunlock(as);
693 /*
694 * Tell machine specific code that lwp has mapped shared memory
695 */
696 if (error == 0 && (flags & MAP_SHARED)) {
697 /* EMPTY */
698 LWP_MMODEL_SHARED_AS(*addrp, len);
699 }
700 return (error);
701 } else if ((flags & MAP_ANON) != 0)
702 return (EINVAL);
703
704 vp = fp->f_vnode;
705
706 /* Can't execute code from "noexec" mounted filesystem. */
707 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
708 maxprot &= ~PROT_EXEC;
709
710 /*
711 * These checks were added as part of large files.
712 *
713 * Return ENXIO if the initial position is negative; return EOVERFLOW
714 * if (offset + len) would overflow the maximum allowed offset for the
715 * type of file descriptor being used.
716 */
717 if (vp->v_type == VREG) {
718 if (pos < 0)
719 return (ENXIO);
720 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
721 return (EOVERFLOW);
722 }
723
724 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
725 /* no write access allowed */
726 maxprot &= ~PROT_WRITE;
727 }
728
729 /*
730 * XXX - Do we also adjust maxprot based on protections
731 * of the vnode? E.g. if no execute permission is given
732 * on the vnode for the current user, maxprot probably
733 * should disallow PROT_EXEC also? This is different
734 * from the write access as this would be a per vnode
735 * test as opposed to a per fd test for writability.
736 */
737
738 /*
739 * Verify that the specified protections are not greater than
740 * the maximum allowable protections. Also test to make sure
741 * that the file descriptor does allows for read access since
742 * "write only" mappings are hard to do since normally we do
743 * the read from the file before the page can be written.
744 */
745 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
746 return (EACCES);
747
748 /*
749 * If the user specified an address, do some simple checks here
750 */
751 if ((flags & MAP_FIXED) != 0) {
752 caddr_t userlimit;
753
754 /*
755 * Use the user address. First verify that
756 * the address to be used is page aligned.
757 * Then make some simple bounds checks.
758 */
759 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
760 return (EINVAL);
761
762 userlimit = flags & _MAP_LOW32 ?
763 (caddr_t)USERLIMIT32 : as->a_userlimit;
764 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
765 case RANGE_OKAY:
766 break;
767 case RANGE_BADPROT:
768 return (ENOTSUP);
769 case RANGE_BADADDR:
770 default:
771 return (ENOMEM);
772 }
773 }
774
775 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
776 nbl_need_check(vp)) {
777 int svmand;
778 nbl_op_t nop;
779
780 nbl_start_crit(vp, RW_READER);
781 in_crit = 1;
782 error = nbl_svmand(vp, fp->f_cred, &svmand);
783 if (error != 0)
784 goto done;
785 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
786 if (prot & (PROT_READ | PROT_EXEC)) {
787 nop = NBL_READWRITE;
788 } else {
789 nop = NBL_WRITE;
790 }
791 } else {
792 nop = NBL_READ;
793 }
794 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
795 error = EACCES;
796 goto done;
797 }
798 }
799
800 /* discard lwpchan mappings, like munmap() */
801 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
802 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
803
804 /*
805 * Ok, now let the vnode map routine do its thing to set things up.
806 */
807 error = VOP_MAP(vp, pos, as,
808 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
809
810 if (error == 0) {
811 /*
812 * Tell machine specific code that lwp has mapped shared memory
813 */
814 if (flags & MAP_SHARED) {
815 /* EMPTY */
816 LWP_MMODEL_SHARED_AS(*addrp, len);
817 }
818 if (vp->v_type == VREG &&
819 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
820 /*
821 * Mark this as an executable vnode
822 */
823 mutex_enter(&vp->v_lock);
824 vp->v_flag |= VVMEXEC;
825 mutex_exit(&vp->v_lock);
826 }
827 }
828
829 done:
830 if (in_crit)
831 nbl_end_crit(vp);
832 return (error);
833 }
834
835 #ifdef _LP64
836 /*
837 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
838 *
839 * The "large file" mmap routine mmap64(2) is also mapped to this routine
840 * by the 64-bit version of libc.
841 *
842 * Eventually, this should be the only version, and have smmap_common()
843 * folded back into it again. Some day.
844 */
845 caddr_t
846 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
847 {
848 struct file *fp;
849 int error;
850
851 if (fd == -1 && (flags & MAP_ANON) != 0)
852 error = smmap_common(&addr, len, prot, flags,
853 NULL, (offset_t)pos);
854 else if ((fp = getf(fd)) != NULL) {
855 error = smmap_common(&addr, len, prot, flags,
856 fp, (offset_t)pos);
857 releasef(fd);
858 } else
859 error = EBADF;
860
861 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
862 }
863 #endif /* _LP64 */
864
865 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
866
867 /*
868 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
869 */
870 caddr_t
871 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
872 {
873 struct file *fp;
874 int error;
875 caddr_t a = (caddr_t)(uintptr_t)addr;
876
877 if (flags & _MAP_LOW32)
878 error = EINVAL;
879 else if (fd == -1 && (flags & MAP_ANON) != 0)
880 error = smmap_common(&a, (size_t)len, prot,
881 flags | _MAP_LOW32, NULL, (offset_t)pos);
882 else if ((fp = getf(fd)) != NULL) {
883 error = smmap_common(&a, (size_t)len, prot,
884 flags | _MAP_LOW32, fp, (offset_t)pos);
885 releasef(fd);
886 } else
887 error = EBADF;
888
889 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
890
891 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
892 }
893
894 /*
895 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
896 *
897 * Now things really get ugly because we can't use the C-style
898 * calling convention for more than 6 args, and 64-bit parameter
899 * passing on 32-bit systems is less than clean.
900 */
901
902 struct mmaplf32a {
903 caddr_t addr;
904 size_t len;
905 #ifdef _LP64
906 /*
907 * 32-bit contents, 64-bit cells
908 */
909 uint64_t prot;
910 uint64_t flags;
911 uint64_t fd;
912 uint64_t offhi;
913 uint64_t offlo;
914 #else
915 /*
916 * 32-bit contents, 32-bit cells
917 */
918 uint32_t prot;
919 uint32_t flags;
920 uint32_t fd;
921 uint32_t offhi;
922 uint32_t offlo;
923 #endif
924 };
925
926 int
927 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
928 {
929 struct file *fp;
930 int error;
931 caddr_t a = uap->addr;
932 int flags = (int)uap->flags;
933 int fd = (int)uap->fd;
934 #ifdef _BIG_ENDIAN
935 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
936 #else
937 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
938 #endif
939
940 if (flags & _MAP_LOW32)
941 error = EINVAL;
942 else if (fd == -1 && (flags & MAP_ANON) != 0)
943 error = smmap_common(&a, uap->len, (int)uap->prot,
944 flags | _MAP_LOW32, NULL, off);
945 else if ((fp = getf(fd)) != NULL) {
946 error = smmap_common(&a, uap->len, (int)uap->prot,
947 flags | _MAP_LOW32, fp, off);
948 releasef(fd);
949 } else
950 error = EBADF;
951
952 if (error == 0)
953 rvp->r_val1 = (uintptr_t)a;
954 return (error);
955 }
956
957 #endif /* _SYSCALL32_IMPL || _ILP32 */
958
959 int
960 munmap(caddr_t addr, size_t len)
961 {
962 struct proc *p = curproc;
963 struct as *as = p->p_as;
964
965 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
966 return (set_errno(EINVAL));
967
968 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
969 return (set_errno(EINVAL));
970
971 /*
972 * Discard lwpchan mappings.
973 */
974 if (p->p_lcp != NULL)
975 lwpchan_delete_mapping(p, addr, addr + len);
976 if (as_unmap(as, addr, len) != 0)
977 return (set_errno(EINVAL));
978
979 return (0);
980 }
981
982 int
983 mprotect(caddr_t addr, size_t len, int prot)
984 {
985 struct as *as = curproc->p_as;
986 uint_t uprot = prot | PROT_USER;
987 int error;
988
989 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
990 return (set_errno(EINVAL));
991
992 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
993 case RANGE_OKAY:
994 break;
995 case RANGE_BADPROT:
996 return (set_errno(ENOTSUP));
997 case RANGE_BADADDR:
998 default:
999 return (set_errno(ENOMEM));
1000 }
1001
1002 error = as_setprot(as, addr, len, uprot);
1003 if (error)
1004 return (set_errno(error));
1005 return (0);
1006 }
1007
1008 #define MC_CACHE 128 /* internal result buffer */
1009 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
1010
1011 int
1012 mincore(caddr_t addr, size_t len, char *vecp)
1013 {
1014 struct as *as = curproc->p_as;
1015 caddr_t ea; /* end address of loop */
1016 size_t rl; /* inner result length */
1017 char vec[MC_CACHE]; /* local vector cache */
1018 int error;
1019 model_t model;
1020 long llen;
1021
1022 model = get_udatamodel();
1023 /*
1024 * Validate form of address parameters.
1025 */
1026 if (model == DATAMODEL_NATIVE) {
1027 llen = (long)len;
1028 } else {
1029 llen = (int32_t)(size32_t)len;
1030 }
1031 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1032 return (set_errno(EINVAL));
1033
1034 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1035 return (set_errno(ENOMEM));
1036
1037 /*
1038 * Loop over subranges of interval [addr : addr + len), recovering
1039 * results internally and then copying them out to caller. Subrange
1040 * is based on the size of MC_CACHE, defined above.
1041 */
1042 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1043 error = as_incore(as, addr,
1044 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1045 if (rl != 0) {
1046 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1047 if (copyout(vec, vecp, rl) != 0)
1048 return (set_errno(EFAULT));
1049 vecp += rl;
1050 }
1051 if (error != 0)
1052 return (set_errno(ENOMEM));
1053 }
1054 return (0);
1055 }