Print this page
sync further changes from uts/aslr
7029 want per-process exploit mitigation features (secflags)
7030 want basic address space layout randomization (aslr)
7031 noexec_user_stack should be a secflag
7032 want a means to forbid mappings around NULL.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/grow.c
+++ new/usr/src/uts/common/os/grow.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23 23
24 24 /*
25 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 30 /* All Rights Reserved */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/inttypes.h>
34 34 #include <sys/param.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/signal.h>
38 38 #include <sys/user.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/var.h>
41 41 #include <sys/proc.h>
42 42 #include <sys/tuneable.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/cred.h>
46 46 #include <sys/vnode.h>
47 47 #include <sys/vfs.h>
48 48 #include <sys/vm.h>
49 49 #include <sys/file.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vmparam.h>
52 52 #include <sys/fcntl.h>
53 53 #include <sys/lwpchan_impl.h>
54 54 #include <sys/nbmlock.h>
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
55 55
56 56 #include <vm/hat.h>
57 57 #include <vm/as.h>
58 58 #include <vm/seg.h>
59 59 #include <vm/seg_dev.h>
60 60 #include <vm/seg_vn.h>
61 61
62 62 int use_brk_lpg = 1;
63 63 int use_stk_lpg = 1;
64 64
65 +/*
66 + * If set, we will not randomize mappings where the 'addr' argument is
67 + * non-NULL and not an alignment.
68 + */
69 +int aslr_respect_mmap_hint = 0;
70 +
65 71 static int brk_lpg(caddr_t nva);
66 72 static int grow_lpg(caddr_t sp);
67 73
68 -int
74 +intptr_t
69 75 brk(caddr_t nva)
70 76 {
71 77 int error;
72 78 proc_t *p = curproc;
73 79
74 80 /*
75 81 * Serialize brk operations on an address space.
76 82 * This also serves as the lock protecting p_brksize
77 83 * and p_brkpageszc.
78 84 */
79 85 as_rangelock(p->p_as);
86 +
87 + /*
88 + * As a special case to aid the implementation of sbrk(3C), if given a
89 + * new brk of 0, return the current brk. We'll hide this in brk(3C).
90 + */
91 + if (nva == 0) {
92 + as_rangeunlock(p->p_as);
93 + return ((intptr_t)(p->p_brkbase + p->p_brksize));
94 + }
95 +
80 96 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
81 97 error = brk_lpg(nva);
82 98 } else {
83 99 error = brk_internal(nva, p->p_brkpageszc);
84 100 }
85 101 as_rangeunlock(p->p_as);
86 102 return ((error != 0 ? set_errno(error) : 0));
87 103 }
88 104
89 105 /*
90 106 * Algorithm: call arch-specific map_pgsz to get best page size to use,
91 107 * then call brk_internal().
92 108 * Returns 0 on success.
93 109 */
94 110 static int
95 111 brk_lpg(caddr_t nva)
96 112 {
97 113 struct proc *p = curproc;
98 114 size_t pgsz, len;
99 115 caddr_t addr, brkend;
100 116 caddr_t bssbase = p->p_bssbase;
101 117 caddr_t brkbase = p->p_brkbase;
102 118 int oszc, szc;
103 119 int err;
104 120
105 121 oszc = p->p_brkpageszc;
106 122
107 123 /*
108 124 * If p_brkbase has not yet been set, the first call
109 125 * to brk_internal() will initialize it.
110 126 */
111 127 if (brkbase == 0) {
112 128 return (brk_internal(nva, oszc));
113 129 }
114 130
115 131 len = nva - bssbase;
116 132
117 133 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
118 134 szc = page_szc(pgsz);
119 135
120 136 /*
121 137 * Covers two cases:
122 138 * 1. page_szc() returns -1 for invalid page size, so we want to
123 139 * ignore it in that case.
124 140 * 2. By design we never decrease page size, as it is more stable.
125 141 */
126 142 if (szc <= oszc) {
127 143 err = brk_internal(nva, oszc);
128 144 /* If failed, back off to base page size. */
129 145 if (err != 0 && oszc != 0) {
130 146 err = brk_internal(nva, 0);
131 147 }
132 148 return (err);
133 149 }
134 150
135 151 err = brk_internal(nva, szc);
136 152 /* If using szc failed, map with base page size and return. */
137 153 if (err != 0) {
138 154 if (szc != 0) {
139 155 err = brk_internal(nva, 0);
140 156 }
141 157 return (err);
142 158 }
143 159
144 160 /*
145 161 * Round up brk base to a large page boundary and remap
146 162 * anything in the segment already faulted in beyond that
147 163 * point.
148 164 */
149 165 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
150 166 brkend = brkbase + p->p_brksize;
151 167 len = brkend - addr;
152 168 /* Check that len is not negative. Update page size code for heap. */
153 169 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
154 170 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
155 171 p->p_brkpageszc = szc;
156 172 }
157 173
158 174 ASSERT(err == 0);
159 175 return (err); /* should always be 0 */
160 176 }
161 177
162 178 /*
163 179 * Returns 0 on success.
164 180 */
165 181 int
166 182 brk_internal(caddr_t nva, uint_t brkszc)
167 183 {
168 184 caddr_t ova; /* current break address */
169 185 size_t size;
170 186 int error;
171 187 struct proc *p = curproc;
172 188 struct as *as = p->p_as;
173 189 size_t pgsz;
174 190 uint_t szc;
175 191 rctl_qty_t as_rctl;
176 192
177 193 /*
178 194 * extend heap to brkszc alignment but use current p->p_brkpageszc
179 195 * for the newly created segment. This allows the new extension
180 196 * segment to be concatenated successfully with the existing brk
181 197 * segment.
182 198 */
183 199 if ((szc = brkszc) != 0) {
184 200 pgsz = page_get_pagesize(szc);
185 201 ASSERT(pgsz > PAGESIZE);
186 202 } else {
187 203 pgsz = PAGESIZE;
188 204 }
189 205
190 206 mutex_enter(&p->p_lock);
191 207 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
192 208 p->p_rctls, p);
193 209 mutex_exit(&p->p_lock);
194 210
195 211 /*
196 212 * If p_brkbase has not yet been set, the first call
197 213 * to brk() will initialize it.
198 214 */
199 215 if (p->p_brkbase == 0)
200 216 p->p_brkbase = nva;
201 217
202 218 /*
203 219 * Before multiple page size support existed p_brksize was the value
204 220 * not rounded to the pagesize (i.e. it stored the exact user request
205 221 * for heap size). If pgsz is greater than PAGESIZE calculate the
206 222 * heap size as the real new heap size by rounding it up to pgsz.
207 223 * This is useful since we may want to know where the heap ends
208 224 * without knowing heap pagesize (e.g. some old code) and also if
209 225 * heap pagesize changes we can update p_brkpageszc but delay adding
210 226 * new mapping yet still know from p_brksize where the heap really
211 227 * ends. The user requested heap end is stored in libc variable.
212 228 */
213 229 if (pgsz > PAGESIZE) {
214 230 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
215 231 size = tnva - p->p_brkbase;
216 232 if (tnva < p->p_brkbase || (size > p->p_brksize &&
217 233 size > (size_t)as_rctl)) {
218 234 szc = 0;
219 235 pgsz = PAGESIZE;
220 236 size = nva - p->p_brkbase;
221 237 }
222 238 } else {
223 239 size = nva - p->p_brkbase;
224 240 }
225 241
226 242 /*
227 243 * use PAGESIZE to roundup ova because we want to know the real value
228 244 * of the current heap end in case p_brkpageszc changes since the last
229 245 * p_brksize was computed.
230 246 */
231 247 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 248 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
233 249 PAGESIZE);
234 250
235 251 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
236 252 size > as_rctl)) {
237 253 mutex_enter(&p->p_lock);
238 254 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
239 255 RCA_SAFE);
240 256 mutex_exit(&p->p_lock);
241 257 return (ENOMEM);
242 258 }
243 259
244 260 if (nva > ova) {
245 261 struct segvn_crargs crargs =
246 262 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
247 263
248 264 if (!(p->p_datprot & PROT_EXEC)) {
249 265 crargs.prot &= ~PROT_EXEC;
250 266 }
251 267
252 268 /*
253 269 * Add new zfod mapping to extend UNIX data segment
254 270 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
255 271 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
256 272 * page sizes if ova is not aligned to szc's pgsz.
257 273 */
258 274 if (szc > 0) {
259 275 caddr_t rbss;
260 276
261 277 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
262 278 pgsz);
263 279 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
264 280 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
265 281 AS_MAP_NO_LPOOB;
266 282 } else if (ova == rbss) {
267 283 crargs.szc = szc;
268 284 } else {
269 285 crargs.szc = AS_MAP_HEAP;
270 286 }
271 287 } else {
272 288 crargs.szc = AS_MAP_NO_LPOOB;
273 289 }
274 290 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
275 291 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
276 292 &crargs);
277 293 if (error) {
278 294 return (error);
279 295 }
280 296
281 297 } else if (nva < ova) {
282 298 /*
283 299 * Release mapping to shrink UNIX data segment.
284 300 */
285 301 (void) as_unmap(as, nva, (size_t)(ova - nva));
286 302 }
287 303 p->p_brksize = size;
288 304 return (0);
289 305 }
290 306
291 307 /*
292 308 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
293 309 * This routine assumes that the stack grows downward.
294 310 */
295 311 int
296 312 grow(caddr_t sp)
297 313 {
298 314 struct proc *p = curproc;
299 315 struct as *as = p->p_as;
300 316 size_t oldsize = p->p_stksize;
301 317 size_t newsize;
302 318 int err;
303 319
304 320 /*
305 321 * Serialize grow operations on an address space.
306 322 * This also serves as the lock protecting p_stksize
307 323 * and p_stkpageszc.
308 324 */
309 325 as_rangelock(as);
310 326 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
311 327 err = grow_lpg(sp);
312 328 } else {
313 329 err = grow_internal(sp, p->p_stkpageszc);
314 330 }
315 331 as_rangeunlock(as);
316 332
317 333 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
318 334 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
319 335 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
320 336 /*
321 337 * Set up translations so the process doesn't have to fault in
322 338 * the stack pages we just gave it.
323 339 */
324 340 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
325 341 newsize - oldsize, F_INVAL, S_WRITE);
326 342 }
327 343 return ((err == 0 ? 1 : 0));
328 344 }
329 345
330 346 /*
331 347 * Algorithm: call arch-specific map_pgsz to get best page size to use,
332 348 * then call grow_internal().
333 349 * Returns 0 on success.
334 350 */
335 351 static int
336 352 grow_lpg(caddr_t sp)
337 353 {
338 354 struct proc *p = curproc;
339 355 size_t pgsz;
340 356 size_t len, newsize;
341 357 caddr_t addr, saddr;
342 358 caddr_t growend;
343 359 int oszc, szc;
344 360 int err;
345 361
346 362 newsize = p->p_usrstack - sp;
347 363
348 364 oszc = p->p_stkpageszc;
349 365 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
350 366 szc = page_szc(pgsz);
351 367
352 368 /*
353 369 * Covers two cases:
354 370 * 1. page_szc() returns -1 for invalid page size, so we want to
355 371 * ignore it in that case.
356 372 * 2. By design we never decrease page size, as it is more stable.
357 373 * This shouldn't happen as the stack never shrinks.
358 374 */
359 375 if (szc <= oszc) {
360 376 err = grow_internal(sp, oszc);
361 377 /* failed, fall back to base page size */
362 378 if (err != 0 && oszc != 0) {
363 379 err = grow_internal(sp, 0);
364 380 }
365 381 return (err);
366 382 }
367 383
368 384 /*
369 385 * We've grown sufficiently to switch to a new page size.
370 386 * So we are going to remap the whole segment with the new page size.
371 387 */
372 388 err = grow_internal(sp, szc);
373 389 /* The grow with szc failed, so fall back to base page size. */
374 390 if (err != 0) {
375 391 if (szc != 0) {
376 392 err = grow_internal(sp, 0);
377 393 }
378 394 return (err);
379 395 }
380 396
381 397 /*
382 398 * Round up stack pointer to a large page boundary and remap
383 399 * any pgsz pages in the segment already faulted in beyond that
384 400 * point.
385 401 */
386 402 saddr = p->p_usrstack - p->p_stksize;
387 403 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
388 404 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
389 405 len = growend - addr;
390 406 /* Check that len is not negative. Update page size code for stack. */
391 407 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
392 408 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
393 409 p->p_stkpageszc = szc;
394 410 }
395 411
396 412 ASSERT(err == 0);
397 413 return (err); /* should always be 0 */
398 414 }
399 415
400 416 /*
401 417 * This routine assumes that the stack grows downward.
402 418 * Returns 0 on success, errno on failure.
403 419 */
404 420 int
405 421 grow_internal(caddr_t sp, uint_t growszc)
406 422 {
407 423 struct proc *p = curproc;
408 424 size_t newsize;
409 425 size_t oldsize;
410 426 int error;
411 427 size_t pgsz;
412 428 uint_t szc;
413 429 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
414 430
415 431 ASSERT(sp < p->p_usrstack);
416 432 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417 433
418 434 /*
419 435 * grow to growszc alignment but use current p->p_stkpageszc for
420 436 * the segvn_crargs szc passed to segvn_create. For memcntl to
421 437 * increase the szc, this allows the new extension segment to be
422 438 * concatenated successfully with the existing stack segment.
423 439 */
424 440 if ((szc = growszc) != 0) {
425 441 pgsz = page_get_pagesize(szc);
426 442 ASSERT(pgsz > PAGESIZE);
427 443 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
428 444 if (newsize > (size_t)p->p_stk_ctl) {
429 445 szc = 0;
430 446 pgsz = PAGESIZE;
431 447 newsize = p->p_usrstack - sp;
432 448 }
433 449 } else {
434 450 pgsz = PAGESIZE;
435 451 newsize = p->p_usrstack - sp;
436 452 }
437 453
438 454 if (newsize > (size_t)p->p_stk_ctl) {
439 455 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
440 456 RCA_UNSAFE_ALL);
441 457
442 458 return (ENOMEM);
443 459 }
444 460
445 461 oldsize = p->p_stksize;
446 462 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
447 463
448 464 if (newsize <= oldsize) { /* prevent the stack from shrinking */
449 465 return (0);
450 466 }
451 467
452 468 if (!(p->p_stkprot & PROT_EXEC)) {
453 469 crargs.prot &= ~PROT_EXEC;
454 470 }
455 471 /*
456 472 * extend stack with the proposed new growszc, which is different
457 473 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
458 474 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
459 475 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
460 476 * if not aligned to szc's pgsz.
461 477 */
462 478 if (szc > 0) {
463 479 caddr_t oldsp = p->p_usrstack - oldsize;
464 480 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
465 481 pgsz);
466 482
467 483 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
468 484 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
469 485 AS_MAP_NO_LPOOB;
470 486 } else if (oldsp == austk) {
471 487 crargs.szc = szc;
472 488 } else {
473 489 crargs.szc = AS_MAP_STACK;
474 490 }
475 491 } else {
476 492 crargs.szc = AS_MAP_NO_LPOOB;
477 493 }
478 494 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
479 495
480 496 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
481 497 segvn_create, &crargs)) != 0) {
482 498 if (error == EAGAIN) {
↓ open down ↓ |
393 lines elided |
↑ open up ↑ |
483 499 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
484 500 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
485 501 }
486 502 return (error);
487 503 }
488 504 p->p_stksize = newsize;
489 505 return (0);
490 506 }
491 507
492 508 /*
493 - * Find address for user to map.
494 - * If MAP_FIXED is not specified, we can pick any address we want, but we will
495 - * first try the value in *addrp if it is non-NULL. Thus this is implementing
496 - * a way to try and get a preferred address.
509 + * Find address for user to map. If MAP_FIXED is not specified, we can pick
510 + * any address we want, but we will first try the value in *addrp if it is
511 + * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to
512 + * try and get a preferred address.
497 513 */
498 514 int
499 515 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
500 516 int vacalign, uint_t flags)
501 517 {
502 518 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
503 519 size_t lenp = len;
504 520
505 521 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
506 522 if (flags & MAP_FIXED) {
507 523 (void) as_unmap(as, *addrp, len);
508 524 return (0);
509 - } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
525 + } else if (basep != NULL &&
526 + ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
510 527 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
511 528 /* User supplied address was available */
512 529 *addrp = basep;
513 530 } else {
514 531 /*
515 532 * No user supplied address or the address supplied was not
516 533 * available.
517 534 */
518 535 map_addr(addrp, len, off, vacalign, flags);
519 536 }
520 537 if (*addrp == NULL)
521 538 return (ENOMEM);
522 539 return (0);
523 540 }
524 541
525 542
526 543 /*
527 544 * Used for MAP_ANON - fast way to get anonymous pages
528 545 */
529 546 static int
530 547 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
531 548 offset_t pos)
532 549 {
533 550 struct segvn_crargs vn_a;
534 551 int error;
535 552
536 553 if (((PROT_ALL & uprot) != uprot))
537 554 return (EACCES);
538 555
539 556 if ((flags & MAP_FIXED) != 0) {
540 557 caddr_t userlimit;
541 558
542 559 /*
543 560 * Use the user address. First verify that
544 561 * the address to be used is page aligned.
545 562 * Then make some simple bounds checks.
546 563 */
547 564 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
548 565 return (EINVAL);
549 566
550 567 userlimit = flags & _MAP_LOW32 ?
551 568 (caddr_t)USERLIMIT32 : as->a_userlimit;
552 569 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
553 570 case RANGE_OKAY:
554 571 break;
555 572 case RANGE_BADPROT:
556 573 return (ENOTSUP);
557 574 case RANGE_BADADDR:
558 575 default:
559 576 return (ENOMEM);
560 577 }
561 578 }
562 579 /*
563 580 * No need to worry about vac alignment for anonymous
564 581 * pages since this is a "clone" object that doesn't
565 582 * yet exist.
566 583 */
567 584 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
568 585 if (error != 0) {
569 586 return (error);
570 587 }
571 588
572 589 /*
573 590 * Use the seg_vn segment driver; passing in the NULL amp
574 591 * gives the desired "cloning" effect.
575 592 */
576 593 vn_a.vp = NULL;
577 594 vn_a.offset = 0;
578 595 vn_a.type = flags & MAP_TYPE;
579 596 vn_a.prot = uprot;
↓ open down ↓ |
60 lines elided |
↑ open up ↑ |
580 597 vn_a.maxprot = PROT_ALL;
581 598 vn_a.flags = flags & ~MAP_TYPE;
582 599 vn_a.cred = CRED();
583 600 vn_a.amp = NULL;
584 601 vn_a.szc = 0;
585 602 vn_a.lgrp_mem_policy_flags = 0;
586 603
587 604 return (as_map(as, *addrp, len, segvn_create, &vn_a));
588 605 }
589 606
607 +#define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \
608 + !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint))
609 +
590 610 static int
591 611 smmap_common(caddr_t *addrp, size_t len,
592 612 int prot, int flags, struct file *fp, offset_t pos)
593 613 {
594 614 struct vnode *vp;
595 615 struct as *as = curproc->p_as;
596 616 uint_t uprot, maxprot, type;
597 617 int error;
598 618 int in_crit = 0;
599 619
600 620 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
601 621 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
602 622 MAP_TEXT | MAP_INITDATA)) != 0) {
603 623 /* | MAP_RENAME */ /* not implemented, let user know */
604 624 return (EINVAL);
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
605 625 }
606 626
607 627 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
608 628 return (EINVAL);
609 629 }
610 630
611 631 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
612 632 return (EINVAL);
613 633 }
614 634
635 + if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
636 + (MAP_FIXED | _MAP_RANDOMIZE)) {
637 + return (EINVAL);
638 + }
639 +
640 + /*
641 + * If it's not a fixed allocation and mmap ASLR is enabled, randomize
642 + * it.
643 + */
644 + if (RANDOMIZABLE_MAPPING(*addrp, flags) &&
645 + secflag_enabled(curproc, PROC_SEC_ASLR))
646 + flags |= _MAP_RANDOMIZE;
647 +
615 648 #if defined(__sparc)
616 649 /*
617 650 * See if this is an "old mmap call". If so, remember this
618 651 * fact and convert the flags value given to mmap to indicate
619 652 * the specified address in the system call must be used.
620 653 * _MAP_NEW is turned set by all new uses of mmap.
621 654 */
622 655 if ((flags & _MAP_NEW) == 0)
623 656 flags |= MAP_FIXED;
624 657 #endif
625 658 flags &= ~_MAP_NEW;
626 659
627 660 type = flags & MAP_TYPE;
628 661 if (type != MAP_PRIVATE && type != MAP_SHARED)
629 662 return (EINVAL);
630 663
631 664
632 665 if (flags & MAP_ALIGN) {
633 -
634 666 if (flags & MAP_FIXED)
635 667 return (EINVAL);
636 668
637 669 /* alignment needs to be a power of 2 >= page size */
638 670 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
639 671 !ISP2((uintptr_t)*addrp))
640 672 return (EINVAL);
641 673 }
642 674 /*
643 675 * Check for bad lengths and file position.
644 676 * We let the VOP_MAP routine check for negative lengths
645 677 * since on some vnode types this might be appropriate.
646 678 */
647 679 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
648 680 return (EINVAL);
649 681
650 682 maxprot = PROT_ALL; /* start out allowing all accesses */
651 683 uprot = prot | PROT_USER;
652 684
653 685 if (fp == NULL) {
654 686 ASSERT(flags & MAP_ANON);
655 687 /* discard lwpchan mappings, like munmap() */
656 688 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
657 689 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
658 690 as_rangelock(as);
659 691 error = zmap(as, addrp, len, uprot, flags, pos);
660 692 as_rangeunlock(as);
661 693 /*
662 694 * Tell machine specific code that lwp has mapped shared memory
663 695 */
664 696 if (error == 0 && (flags & MAP_SHARED)) {
665 697 /* EMPTY */
666 698 LWP_MMODEL_SHARED_AS(*addrp, len);
667 699 }
668 700 return (error);
669 701 } else if ((flags & MAP_ANON) != 0)
670 702 return (EINVAL);
671 703
672 704 vp = fp->f_vnode;
673 705
674 706 /* Can't execute code from "noexec" mounted filesystem. */
675 707 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
676 708 maxprot &= ~PROT_EXEC;
677 709
678 710 /*
679 711 * These checks were added as part of large files.
680 712 *
681 713 * Return ENXIO if the initial position is negative; return EOVERFLOW
682 714 * if (offset + len) would overflow the maximum allowed offset for the
683 715 * type of file descriptor being used.
684 716 */
685 717 if (vp->v_type == VREG) {
686 718 if (pos < 0)
687 719 return (ENXIO);
688 720 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
689 721 return (EOVERFLOW);
690 722 }
691 723
692 724 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
693 725 /* no write access allowed */
694 726 maxprot &= ~PROT_WRITE;
695 727 }
696 728
697 729 /*
698 730 * XXX - Do we also adjust maxprot based on protections
699 731 * of the vnode? E.g. if no execute permission is given
700 732 * on the vnode for the current user, maxprot probably
701 733 * should disallow PROT_EXEC also? This is different
702 734 * from the write access as this would be a per vnode
703 735 * test as opposed to a per fd test for writability.
704 736 */
705 737
706 738 /*
707 739 * Verify that the specified protections are not greater than
708 740 * the maximum allowable protections. Also test to make sure
709 741 * that the file descriptor does allows for read access since
710 742 * "write only" mappings are hard to do since normally we do
711 743 * the read from the file before the page can be written.
712 744 */
713 745 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
714 746 return (EACCES);
715 747
716 748 /*
717 749 * If the user specified an address, do some simple checks here
718 750 */
719 751 if ((flags & MAP_FIXED) != 0) {
720 752 caddr_t userlimit;
721 753
722 754 /*
723 755 * Use the user address. First verify that
724 756 * the address to be used is page aligned.
725 757 * Then make some simple bounds checks.
726 758 */
727 759 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
728 760 return (EINVAL);
729 761
730 762 userlimit = flags & _MAP_LOW32 ?
731 763 (caddr_t)USERLIMIT32 : as->a_userlimit;
732 764 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
733 765 case RANGE_OKAY:
734 766 break;
735 767 case RANGE_BADPROT:
736 768 return (ENOTSUP);
737 769 case RANGE_BADADDR:
738 770 default:
739 771 return (ENOMEM);
740 772 }
741 773 }
742 774
743 775 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
744 776 nbl_need_check(vp)) {
745 777 int svmand;
746 778 nbl_op_t nop;
747 779
748 780 nbl_start_crit(vp, RW_READER);
749 781 in_crit = 1;
750 782 error = nbl_svmand(vp, fp->f_cred, &svmand);
751 783 if (error != 0)
752 784 goto done;
753 785 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
754 786 if (prot & (PROT_READ | PROT_EXEC)) {
755 787 nop = NBL_READWRITE;
756 788 } else {
757 789 nop = NBL_WRITE;
758 790 }
759 791 } else {
760 792 nop = NBL_READ;
761 793 }
762 794 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
763 795 error = EACCES;
764 796 goto done;
765 797 }
766 798 }
767 799
768 800 /* discard lwpchan mappings, like munmap() */
769 801 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
770 802 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
771 803
772 804 /*
773 805 * Ok, now let the vnode map routine do its thing to set things up.
774 806 */
775 807 error = VOP_MAP(vp, pos, as,
776 808 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
777 809
778 810 if (error == 0) {
779 811 /*
780 812 * Tell machine specific code that lwp has mapped shared memory
781 813 */
782 814 if (flags & MAP_SHARED) {
783 815 /* EMPTY */
784 816 LWP_MMODEL_SHARED_AS(*addrp, len);
785 817 }
786 818 if (vp->v_type == VREG &&
787 819 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
788 820 /*
789 821 * Mark this as an executable vnode
790 822 */
791 823 mutex_enter(&vp->v_lock);
792 824 vp->v_flag |= VVMEXEC;
793 825 mutex_exit(&vp->v_lock);
794 826 }
795 827 }
796 828
797 829 done:
798 830 if (in_crit)
799 831 nbl_end_crit(vp);
800 832 return (error);
801 833 }
802 834
803 835 #ifdef _LP64
804 836 /*
805 837 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
806 838 *
807 839 * The "large file" mmap routine mmap64(2) is also mapped to this routine
808 840 * by the 64-bit version of libc.
809 841 *
810 842 * Eventually, this should be the only version, and have smmap_common()
811 843 * folded back into it again. Some day.
812 844 */
813 845 caddr_t
814 846 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
815 847 {
816 848 struct file *fp;
817 849 int error;
818 850
819 851 if (fd == -1 && (flags & MAP_ANON) != 0)
820 852 error = smmap_common(&addr, len, prot, flags,
821 853 NULL, (offset_t)pos);
822 854 else if ((fp = getf(fd)) != NULL) {
823 855 error = smmap_common(&addr, len, prot, flags,
824 856 fp, (offset_t)pos);
825 857 releasef(fd);
826 858 } else
827 859 error = EBADF;
828 860
829 861 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
830 862 }
831 863 #endif /* _LP64 */
832 864
833 865 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
834 866
835 867 /*
836 868 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
837 869 */
838 870 caddr_t
839 871 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
840 872 {
841 873 struct file *fp;
842 874 int error;
843 875 caddr_t a = (caddr_t)(uintptr_t)addr;
844 876
845 877 if (flags & _MAP_LOW32)
846 878 error = EINVAL;
847 879 else if (fd == -1 && (flags & MAP_ANON) != 0)
848 880 error = smmap_common(&a, (size_t)len, prot,
849 881 flags | _MAP_LOW32, NULL, (offset_t)pos);
850 882 else if ((fp = getf(fd)) != NULL) {
851 883 error = smmap_common(&a, (size_t)len, prot,
852 884 flags | _MAP_LOW32, fp, (offset_t)pos);
853 885 releasef(fd);
854 886 } else
855 887 error = EBADF;
856 888
857 889 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
858 890
859 891 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
860 892 }
861 893
862 894 /*
863 895 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
864 896 *
865 897 * Now things really get ugly because we can't use the C-style
866 898 * calling convention for more than 6 args, and 64-bit parameter
867 899 * passing on 32-bit systems is less than clean.
868 900 */
869 901
870 902 struct mmaplf32a {
871 903 caddr_t addr;
872 904 size_t len;
873 905 #ifdef _LP64
874 906 /*
875 907 * 32-bit contents, 64-bit cells
876 908 */
877 909 uint64_t prot;
878 910 uint64_t flags;
879 911 uint64_t fd;
880 912 uint64_t offhi;
881 913 uint64_t offlo;
882 914 #else
883 915 /*
884 916 * 32-bit contents, 32-bit cells
885 917 */
886 918 uint32_t prot;
887 919 uint32_t flags;
888 920 uint32_t fd;
889 921 uint32_t offhi;
890 922 uint32_t offlo;
891 923 #endif
892 924 };
893 925
894 926 int
895 927 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
896 928 {
897 929 struct file *fp;
898 930 int error;
899 931 caddr_t a = uap->addr;
900 932 int flags = (int)uap->flags;
901 933 int fd = (int)uap->fd;
902 934 #ifdef _BIG_ENDIAN
903 935 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
904 936 #else
905 937 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
906 938 #endif
907 939
908 940 if (flags & _MAP_LOW32)
909 941 error = EINVAL;
910 942 else if (fd == -1 && (flags & MAP_ANON) != 0)
911 943 error = smmap_common(&a, uap->len, (int)uap->prot,
912 944 flags | _MAP_LOW32, NULL, off);
913 945 else if ((fp = getf(fd)) != NULL) {
914 946 error = smmap_common(&a, uap->len, (int)uap->prot,
915 947 flags | _MAP_LOW32, fp, off);
916 948 releasef(fd);
917 949 } else
918 950 error = EBADF;
919 951
920 952 if (error == 0)
921 953 rvp->r_val1 = (uintptr_t)a;
922 954 return (error);
923 955 }
924 956
925 957 #endif /* _SYSCALL32_IMPL || _ILP32 */
926 958
927 959 int
928 960 munmap(caddr_t addr, size_t len)
929 961 {
930 962 struct proc *p = curproc;
931 963 struct as *as = p->p_as;
932 964
933 965 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
934 966 return (set_errno(EINVAL));
935 967
936 968 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
937 969 return (set_errno(EINVAL));
938 970
939 971 /*
940 972 * Discard lwpchan mappings.
941 973 */
942 974 if (p->p_lcp != NULL)
943 975 lwpchan_delete_mapping(p, addr, addr + len);
944 976 if (as_unmap(as, addr, len) != 0)
945 977 return (set_errno(EINVAL));
946 978
947 979 return (0);
948 980 }
949 981
950 982 int
951 983 mprotect(caddr_t addr, size_t len, int prot)
952 984 {
953 985 struct as *as = curproc->p_as;
954 986 uint_t uprot = prot | PROT_USER;
955 987 int error;
956 988
957 989 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
958 990 return (set_errno(EINVAL));
959 991
960 992 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
961 993 case RANGE_OKAY:
962 994 break;
963 995 case RANGE_BADPROT:
964 996 return (set_errno(ENOTSUP));
965 997 case RANGE_BADADDR:
966 998 default:
967 999 return (set_errno(ENOMEM));
968 1000 }
969 1001
970 1002 error = as_setprot(as, addr, len, uprot);
971 1003 if (error)
972 1004 return (set_errno(error));
973 1005 return (0);
974 1006 }
975 1007
976 1008 #define MC_CACHE 128 /* internal result buffer */
977 1009 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
978 1010
979 1011 int
980 1012 mincore(caddr_t addr, size_t len, char *vecp)
981 1013 {
982 1014 struct as *as = curproc->p_as;
983 1015 caddr_t ea; /* end address of loop */
984 1016 size_t rl; /* inner result length */
985 1017 char vec[MC_CACHE]; /* local vector cache */
986 1018 int error;
987 1019 model_t model;
988 1020 long llen;
989 1021
990 1022 model = get_udatamodel();
991 1023 /*
992 1024 * Validate form of address parameters.
993 1025 */
994 1026 if (model == DATAMODEL_NATIVE) {
995 1027 llen = (long)len;
996 1028 } else {
997 1029 llen = (int32_t)(size32_t)len;
998 1030 }
999 1031 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1000 1032 return (set_errno(EINVAL));
1001 1033
1002 1034 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1003 1035 return (set_errno(ENOMEM));
1004 1036
1005 1037 /*
1006 1038 * Loop over subranges of interval [addr : addr + len), recovering
1007 1039 * results internally and then copying them out to caller. Subrange
1008 1040 * is based on the size of MC_CACHE, defined above.
1009 1041 */
1010 1042 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1011 1043 error = as_incore(as, addr,
1012 1044 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1013 1045 if (rl != 0) {
1014 1046 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1015 1047 if (copyout(vec, vecp, rl) != 0)
1016 1048 return (set_errno(EFAULT));
1017 1049 vecp += rl;
1018 1050 }
1019 1051 if (error != 0)
1020 1052 return (set_errno(ENOMEM));
1021 1053 }
1022 1054 return (0);
1023 1055 }
↓ open down ↓ |
380 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX