Print this page
uts: Allow for address space randomisation.
Randomise the base addresses of shared objects, non-fixed mappings, the
stack and the heap. Introduce a service, svc:/system/process-security,
and a tool psecflags(1) to control and observe it
libc: adjust brk(0) to return the existing break, and use it to initialize sbrk()
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/grow.c
+++ new/usr/src/uts/common/os/grow.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23 23
24 24 /*
25 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 30 /* All Rights Reserved */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/inttypes.h>
34 34 #include <sys/param.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/signal.h>
38 38 #include <sys/user.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/var.h>
41 41 #include <sys/proc.h>
42 42 #include <sys/tuneable.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/cred.h>
46 46 #include <sys/vnode.h>
47 47 #include <sys/vfs.h>
48 48 #include <sys/vm.h>
49 49 #include <sys/file.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vmparam.h>
52 52 #include <sys/fcntl.h>
53 53 #include <sys/lwpchan_impl.h>
54 54 #include <sys/nbmlock.h>
55 55
56 56 #include <vm/hat.h>
57 57 #include <vm/as.h>
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
58 58 #include <vm/seg.h>
59 59 #include <vm/seg_dev.h>
60 60 #include <vm/seg_vn.h>
61 61
62 62 int use_brk_lpg = 1;
63 63 int use_stk_lpg = 1;
64 64
65 65 static int brk_lpg(caddr_t nva);
66 66 static int grow_lpg(caddr_t sp);
67 67
68 -int
68 +intptr_t
69 69 brk(caddr_t nva)
70 70 {
71 71 int error;
72 72 proc_t *p = curproc;
73 73
74 74 /*
75 + * As a special case to aid the implementation of sbrk(3C), if given a
76 + * new brk of 0, return the current brk. We'll hide this in brk(3C).
77 + */
78 + if (nva == 0)
79 + return ((intptr_t)(p->p_brkbase + p->p_brksize));
80 +
81 + /*
75 82 * Serialize brk operations on an address space.
76 83 * This also serves as the lock protecting p_brksize
77 84 * and p_brkpageszc.
78 85 */
79 86 as_rangelock(p->p_as);
80 87 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
81 88 error = brk_lpg(nva);
82 89 } else {
83 90 error = brk_internal(nva, p->p_brkpageszc);
84 91 }
85 92 as_rangeunlock(p->p_as);
86 93 return ((error != 0 ? set_errno(error) : 0));
87 94 }
88 95
89 96 /*
90 97 * Algorithm: call arch-specific map_pgsz to get best page size to use,
91 98 * then call brk_internal().
92 99 * Returns 0 on success.
93 100 */
94 101 static int
95 102 brk_lpg(caddr_t nva)
96 103 {
97 104 struct proc *p = curproc;
98 105 size_t pgsz, len;
99 106 caddr_t addr, brkend;
100 107 caddr_t bssbase = p->p_bssbase;
101 108 caddr_t brkbase = p->p_brkbase;
102 109 int oszc, szc;
103 110 int err;
104 111
105 112 oszc = p->p_brkpageszc;
106 113
107 114 /*
108 115 * If p_brkbase has not yet been set, the first call
109 116 * to brk_internal() will initialize it.
110 117 */
111 118 if (brkbase == 0) {
112 119 return (brk_internal(nva, oszc));
113 120 }
114 121
115 122 len = nva - bssbase;
116 123
117 124 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
118 125 szc = page_szc(pgsz);
119 126
120 127 /*
121 128 * Covers two cases:
122 129 * 1. page_szc() returns -1 for invalid page size, so we want to
123 130 * ignore it in that case.
124 131 * 2. By design we never decrease page size, as it is more stable.
125 132 */
126 133 if (szc <= oszc) {
127 134 err = brk_internal(nva, oszc);
128 135 /* If failed, back off to base page size. */
129 136 if (err != 0 && oszc != 0) {
130 137 err = brk_internal(nva, 0);
131 138 }
132 139 return (err);
133 140 }
134 141
135 142 err = brk_internal(nva, szc);
136 143 /* If using szc failed, map with base page size and return. */
137 144 if (err != 0) {
138 145 if (szc != 0) {
139 146 err = brk_internal(nva, 0);
140 147 }
141 148 return (err);
142 149 }
143 150
144 151 /*
145 152 * Round up brk base to a large page boundary and remap
146 153 * anything in the segment already faulted in beyond that
147 154 * point.
148 155 */
149 156 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
150 157 brkend = brkbase + p->p_brksize;
151 158 len = brkend - addr;
152 159 /* Check that len is not negative. Update page size code for heap. */
153 160 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
154 161 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
155 162 p->p_brkpageszc = szc;
156 163 }
157 164
158 165 ASSERT(err == 0);
159 166 return (err); /* should always be 0 */
160 167 }
161 168
162 169 /*
163 170 * Returns 0 on success.
164 171 */
165 172 int
166 173 brk_internal(caddr_t nva, uint_t brkszc)
167 174 {
168 175 caddr_t ova; /* current break address */
169 176 size_t size;
170 177 int error;
171 178 struct proc *p = curproc;
172 179 struct as *as = p->p_as;
173 180 size_t pgsz;
174 181 uint_t szc;
175 182 rctl_qty_t as_rctl;
176 183
177 184 /*
178 185 * extend heap to brkszc alignment but use current p->p_brkpageszc
179 186 * for the newly created segment. This allows the new extension
180 187 * segment to be concatenated successfully with the existing brk
181 188 * segment.
182 189 */
183 190 if ((szc = brkszc) != 0) {
184 191 pgsz = page_get_pagesize(szc);
185 192 ASSERT(pgsz > PAGESIZE);
186 193 } else {
187 194 pgsz = PAGESIZE;
188 195 }
189 196
190 197 mutex_enter(&p->p_lock);
191 198 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
192 199 p->p_rctls, p);
193 200 mutex_exit(&p->p_lock);
194 201
195 202 /*
196 203 * If p_brkbase has not yet been set, the first call
197 204 * to brk() will initialize it.
198 205 */
199 206 if (p->p_brkbase == 0)
200 207 p->p_brkbase = nva;
201 208
202 209 /*
203 210 * Before multiple page size support existed p_brksize was the value
204 211 * not rounded to the pagesize (i.e. it stored the exact user request
205 212 * for heap size). If pgsz is greater than PAGESIZE calculate the
206 213 * heap size as the real new heap size by rounding it up to pgsz.
207 214 * This is useful since we may want to know where the heap ends
208 215 * without knowing heap pagesize (e.g. some old code) and also if
209 216 * heap pagesize changes we can update p_brkpageszc but delay adding
210 217 * new mapping yet still know from p_brksize where the heap really
211 218 * ends. The user requested heap end is stored in libc variable.
212 219 */
213 220 if (pgsz > PAGESIZE) {
214 221 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
215 222 size = tnva - p->p_brkbase;
216 223 if (tnva < p->p_brkbase || (size > p->p_brksize &&
217 224 size > (size_t)as_rctl)) {
218 225 szc = 0;
219 226 pgsz = PAGESIZE;
220 227 size = nva - p->p_brkbase;
221 228 }
222 229 } else {
223 230 size = nva - p->p_brkbase;
224 231 }
225 232
226 233 /*
227 234 * use PAGESIZE to roundup ova because we want to know the real value
228 235 * of the current heap end in case p_brkpageszc changes since the last
229 236 * p_brksize was computed.
230 237 */
231 238 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 239 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
233 240 PAGESIZE);
234 241
235 242 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
236 243 size > as_rctl)) {
237 244 mutex_enter(&p->p_lock);
238 245 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
239 246 RCA_SAFE);
240 247 mutex_exit(&p->p_lock);
241 248 return (ENOMEM);
242 249 }
243 250
244 251 if (nva > ova) {
245 252 struct segvn_crargs crargs =
246 253 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
247 254
248 255 if (!(p->p_datprot & PROT_EXEC)) {
249 256 crargs.prot &= ~PROT_EXEC;
250 257 }
251 258
252 259 /*
253 260 * Add new zfod mapping to extend UNIX data segment
254 261 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
255 262 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
256 263 * page sizes if ova is not aligned to szc's pgsz.
257 264 */
258 265 if (szc > 0) {
259 266 caddr_t rbss;
260 267
261 268 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
262 269 pgsz);
263 270 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
264 271 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
265 272 AS_MAP_NO_LPOOB;
266 273 } else if (ova == rbss) {
267 274 crargs.szc = szc;
268 275 } else {
269 276 crargs.szc = AS_MAP_HEAP;
270 277 }
271 278 } else {
272 279 crargs.szc = AS_MAP_NO_LPOOB;
273 280 }
274 281 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
275 282 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
276 283 &crargs);
277 284 if (error) {
278 285 return (error);
279 286 }
280 287
281 288 } else if (nva < ova) {
282 289 /*
283 290 * Release mapping to shrink UNIX data segment.
284 291 */
285 292 (void) as_unmap(as, nva, (size_t)(ova - nva));
286 293 }
287 294 p->p_brksize = size;
288 295 return (0);
289 296 }
290 297
291 298 /*
292 299 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
293 300 * This routine assumes that the stack grows downward.
294 301 */
295 302 int
296 303 grow(caddr_t sp)
297 304 {
298 305 struct proc *p = curproc;
299 306 struct as *as = p->p_as;
300 307 size_t oldsize = p->p_stksize;
301 308 size_t newsize;
302 309 int err;
303 310
304 311 /*
305 312 * Serialize grow operations on an address space.
306 313 * This also serves as the lock protecting p_stksize
307 314 * and p_stkpageszc.
308 315 */
309 316 as_rangelock(as);
310 317 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
311 318 err = grow_lpg(sp);
312 319 } else {
313 320 err = grow_internal(sp, p->p_stkpageszc);
314 321 }
315 322 as_rangeunlock(as);
316 323
317 324 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
318 325 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
319 326 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
320 327 /*
321 328 * Set up translations so the process doesn't have to fault in
322 329 * the stack pages we just gave it.
323 330 */
324 331 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
325 332 newsize - oldsize, F_INVAL, S_WRITE);
326 333 }
327 334 return ((err == 0 ? 1 : 0));
328 335 }
329 336
330 337 /*
331 338 * Algorithm: call arch-specific map_pgsz to get best page size to use,
332 339 * then call grow_internal().
333 340 * Returns 0 on success.
334 341 */
335 342 static int
336 343 grow_lpg(caddr_t sp)
337 344 {
338 345 struct proc *p = curproc;
339 346 size_t pgsz;
340 347 size_t len, newsize;
341 348 caddr_t addr, saddr;
342 349 caddr_t growend;
343 350 int oszc, szc;
344 351 int err;
345 352
346 353 newsize = p->p_usrstack - sp;
347 354
348 355 oszc = p->p_stkpageszc;
349 356 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
350 357 szc = page_szc(pgsz);
351 358
352 359 /*
353 360 * Covers two cases:
354 361 * 1. page_szc() returns -1 for invalid page size, so we want to
355 362 * ignore it in that case.
356 363 * 2. By design we never decrease page size, as it is more stable.
357 364 * This shouldn't happen as the stack never shrinks.
358 365 */
359 366 if (szc <= oszc) {
360 367 err = grow_internal(sp, oszc);
361 368 /* failed, fall back to base page size */
362 369 if (err != 0 && oszc != 0) {
363 370 err = grow_internal(sp, 0);
364 371 }
365 372 return (err);
366 373 }
367 374
368 375 /*
369 376 * We've grown sufficiently to switch to a new page size.
370 377 * So we are going to remap the whole segment with the new page size.
371 378 */
372 379 err = grow_internal(sp, szc);
373 380 /* The grow with szc failed, so fall back to base page size. */
374 381 if (err != 0) {
375 382 if (szc != 0) {
376 383 err = grow_internal(sp, 0);
377 384 }
378 385 return (err);
379 386 }
380 387
381 388 /*
382 389 * Round up stack pointer to a large page boundary and remap
383 390 * any pgsz pages in the segment already faulted in beyond that
384 391 * point.
385 392 */
386 393 saddr = p->p_usrstack - p->p_stksize;
387 394 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
388 395 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
389 396 len = growend - addr;
390 397 /* Check that len is not negative. Update page size code for stack. */
391 398 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
392 399 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
393 400 p->p_stkpageszc = szc;
394 401 }
395 402
396 403 ASSERT(err == 0);
397 404 return (err); /* should always be 0 */
398 405 }
399 406
400 407 /*
401 408 * This routine assumes that the stack grows downward.
402 409 * Returns 0 on success, errno on failure.
403 410 */
404 411 int
405 412 grow_internal(caddr_t sp, uint_t growszc)
406 413 {
407 414 struct proc *p = curproc;
408 415 size_t newsize;
409 416 size_t oldsize;
410 417 int error;
411 418 size_t pgsz;
412 419 uint_t szc;
413 420 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
414 421
415 422 ASSERT(sp < p->p_usrstack);
416 423 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417 424
418 425 /*
419 426 * grow to growszc alignment but use current p->p_stkpageszc for
420 427 * the segvn_crargs szc passed to segvn_create. For memcntl to
421 428 * increase the szc, this allows the new extension segment to be
422 429 * concatenated successfully with the existing stack segment.
423 430 */
424 431 if ((szc = growszc) != 0) {
425 432 pgsz = page_get_pagesize(szc);
426 433 ASSERT(pgsz > PAGESIZE);
427 434 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
428 435 if (newsize > (size_t)p->p_stk_ctl) {
429 436 szc = 0;
430 437 pgsz = PAGESIZE;
431 438 newsize = p->p_usrstack - sp;
432 439 }
433 440 } else {
434 441 pgsz = PAGESIZE;
435 442 newsize = p->p_usrstack - sp;
436 443 }
437 444
438 445 if (newsize > (size_t)p->p_stk_ctl) {
439 446 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
440 447 RCA_UNSAFE_ALL);
441 448
442 449 return (ENOMEM);
443 450 }
444 451
445 452 oldsize = p->p_stksize;
446 453 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
447 454
448 455 if (newsize <= oldsize) { /* prevent the stack from shrinking */
449 456 return (0);
450 457 }
451 458
452 459 if (!(p->p_stkprot & PROT_EXEC)) {
453 460 crargs.prot &= ~PROT_EXEC;
454 461 }
455 462 /*
456 463 * extend stack with the proposed new growszc, which is different
457 464 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
458 465 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
459 466 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
460 467 * if not aligned to szc's pgsz.
461 468 */
462 469 if (szc > 0) {
463 470 caddr_t oldsp = p->p_usrstack - oldsize;
464 471 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
465 472 pgsz);
466 473
467 474 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
468 475 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
469 476 AS_MAP_NO_LPOOB;
470 477 } else if (oldsp == austk) {
471 478 crargs.szc = szc;
472 479 } else {
473 480 crargs.szc = AS_MAP_STACK;
474 481 }
475 482 } else {
476 483 crargs.szc = AS_MAP_NO_LPOOB;
477 484 }
478 485 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
479 486
480 487 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
481 488 segvn_create, &crargs)) != 0) {
482 489 if (error == EAGAIN) {
↓ open down ↓ |
398 lines elided |
↑ open up ↑ |
483 490 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
484 491 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
485 492 }
486 493 return (error);
487 494 }
488 495 p->p_stksize = newsize;
489 496 return (0);
490 497 }
491 498
492 499 /*
493 - * Find address for user to map.
494 - * If MAP_FIXED is not specified, we can pick any address we want, but we will
495 - * first try the value in *addrp if it is non-NULL. Thus this is implementing
496 - * a way to try and get a preferred address.
500 + * Find address for user to map. If MAP_FIXED is not specified, we can pick
501 + * any address we want, but we will first try the value in *addrp if it is
502 + * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to
503 + * try and get a preferred address.
497 504 */
498 505 int
499 506 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
500 507 int vacalign, uint_t flags)
501 508 {
502 509 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
503 510 size_t lenp = len;
504 511
505 512 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
506 513 if (flags & MAP_FIXED) {
507 514 (void) as_unmap(as, *addrp, len);
508 515 return (0);
509 - } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
516 + } else if (basep != NULL &&
517 + ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
510 518 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
511 519 /* User supplied address was available */
512 520 *addrp = basep;
513 521 } else {
514 522 /*
515 523 * No user supplied address or the address supplied was not
516 524 * available.
517 525 */
518 526 map_addr(addrp, len, off, vacalign, flags);
519 527 }
520 528 if (*addrp == NULL)
521 529 return (ENOMEM);
522 530 return (0);
523 531 }
524 532
525 533
526 534 /*
527 535 * Used for MAP_ANON - fast way to get anonymous pages
528 536 */
529 537 static int
530 538 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
531 539 offset_t pos)
532 540 {
533 541 struct segvn_crargs vn_a;
534 542 int error;
535 543
536 544 if (((PROT_ALL & uprot) != uprot))
537 545 return (EACCES);
538 546
539 547 if ((flags & MAP_FIXED) != 0) {
540 548 caddr_t userlimit;
541 549
542 550 /*
543 551 * Use the user address. First verify that
544 552 * the address to be used is page aligned.
545 553 * Then make some simple bounds checks.
546 554 */
547 555 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
548 556 return (EINVAL);
549 557
550 558 userlimit = flags & _MAP_LOW32 ?
551 559 (caddr_t)USERLIMIT32 : as->a_userlimit;
552 560 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
553 561 case RANGE_OKAY:
554 562 break;
555 563 case RANGE_BADPROT:
556 564 return (ENOTSUP);
557 565 case RANGE_BADADDR:
558 566 default:
559 567 return (ENOMEM);
560 568 }
561 569 }
562 570 /*
563 571 * No need to worry about vac alignment for anonymous
564 572 * pages since this is a "clone" object that doesn't
565 573 * yet exist.
566 574 */
567 575 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
568 576 if (error != 0) {
569 577 return (error);
570 578 }
571 579
572 580 /*
573 581 * Use the seg_vn segment driver; passing in the NULL amp
574 582 * gives the desired "cloning" effect.
575 583 */
576 584 vn_a.vp = NULL;
577 585 vn_a.offset = 0;
578 586 vn_a.type = flags & MAP_TYPE;
579 587 vn_a.prot = uprot;
580 588 vn_a.maxprot = PROT_ALL;
581 589 vn_a.flags = flags & ~MAP_TYPE;
582 590 vn_a.cred = CRED();
583 591 vn_a.amp = NULL;
584 592 vn_a.szc = 0;
585 593 vn_a.lgrp_mem_policy_flags = 0;
586 594
587 595 return (as_map(as, *addrp, len, segvn_create, &vn_a));
588 596 }
589 597
590 598 static int
591 599 smmap_common(caddr_t *addrp, size_t len,
592 600 int prot, int flags, struct file *fp, offset_t pos)
593 601 {
594 602 struct vnode *vp;
595 603 struct as *as = curproc->p_as;
596 604 uint_t uprot, maxprot, type;
597 605 int error;
598 606 int in_crit = 0;
599 607
600 608 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
601 609 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
602 610 MAP_TEXT | MAP_INITDATA)) != 0) {
603 611 /* | MAP_RENAME */ /* not implemented, let user know */
604 612 return (EINVAL);
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
605 613 }
606 614
607 615 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
608 616 return (EINVAL);
609 617 }
610 618
611 619 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
612 620 return (EINVAL);
613 621 }
614 622
623 + if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) == (MAP_FIXED | _MAP_RANDOMIZE)) {
624 + return (EINVAL);
625 + }
626 +
627 + /* If it's not a fixed allocation and mmap ASLR is enabled, randomize it. */
628 + if (((flags & MAP_FIXED) == 0) &&
629 + secflag_enabled(curproc, PROC_SEC_ASLR))
630 + flags |= _MAP_RANDOMIZE;
631 +
615 632 #if defined(__sparc)
616 633 /*
617 634 * See if this is an "old mmap call". If so, remember this
618 635 * fact and convert the flags value given to mmap to indicate
619 636 * the specified address in the system call must be used.
620 637 * _MAP_NEW is turned set by all new uses of mmap.
621 638 */
622 639 if ((flags & _MAP_NEW) == 0)
623 640 flags |= MAP_FIXED;
624 641 #endif
625 642 flags &= ~_MAP_NEW;
626 643
627 644 type = flags & MAP_TYPE;
628 645 if (type != MAP_PRIVATE && type != MAP_SHARED)
629 646 return (EINVAL);
630 647
631 648
632 649 if (flags & MAP_ALIGN) {
633 -
634 650 if (flags & MAP_FIXED)
635 651 return (EINVAL);
636 652
637 653 /* alignment needs to be a power of 2 >= page size */
638 654 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
639 655 !ISP2((uintptr_t)*addrp))
640 656 return (EINVAL);
641 657 }
642 658 /*
643 659 * Check for bad lengths and file position.
644 660 * We let the VOP_MAP routine check for negative lengths
645 661 * since on some vnode types this might be appropriate.
646 662 */
647 663 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
648 664 return (EINVAL);
649 665
650 666 maxprot = PROT_ALL; /* start out allowing all accesses */
651 667 uprot = prot | PROT_USER;
652 668
653 669 if (fp == NULL) {
654 670 ASSERT(flags & MAP_ANON);
655 671 /* discard lwpchan mappings, like munmap() */
656 672 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
657 673 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
658 674 as_rangelock(as);
659 675 error = zmap(as, addrp, len, uprot, flags, pos);
660 676 as_rangeunlock(as);
661 677 /*
662 678 * Tell machine specific code that lwp has mapped shared memory
663 679 */
664 680 if (error == 0 && (flags & MAP_SHARED)) {
665 681 /* EMPTY */
666 682 LWP_MMODEL_SHARED_AS(*addrp, len);
667 683 }
668 684 return (error);
669 685 } else if ((flags & MAP_ANON) != 0)
670 686 return (EINVAL);
671 687
672 688 vp = fp->f_vnode;
673 689
674 690 /* Can't execute code from "noexec" mounted filesystem. */
675 691 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
676 692 maxprot &= ~PROT_EXEC;
677 693
678 694 /*
679 695 * These checks were added as part of large files.
680 696 *
681 697 * Return ENXIO if the initial position is negative; return EOVERFLOW
682 698 * if (offset + len) would overflow the maximum allowed offset for the
683 699 * type of file descriptor being used.
684 700 */
685 701 if (vp->v_type == VREG) {
686 702 if (pos < 0)
687 703 return (ENXIO);
688 704 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
689 705 return (EOVERFLOW);
690 706 }
691 707
692 708 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
693 709 /* no write access allowed */
694 710 maxprot &= ~PROT_WRITE;
695 711 }
696 712
697 713 /*
698 714 * XXX - Do we also adjust maxprot based on protections
699 715 * of the vnode? E.g. if no execute permission is given
700 716 * on the vnode for the current user, maxprot probably
701 717 * should disallow PROT_EXEC also? This is different
702 718 * from the write access as this would be a per vnode
703 719 * test as opposed to a per fd test for writability.
704 720 */
705 721
706 722 /*
707 723 * Verify that the specified protections are not greater than
708 724 * the maximum allowable protections. Also test to make sure
709 725 * that the file descriptor does allows for read access since
710 726 * "write only" mappings are hard to do since normally we do
711 727 * the read from the file before the page can be written.
712 728 */
713 729 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
714 730 return (EACCES);
715 731
716 732 /*
717 733 * If the user specified an address, do some simple checks here
718 734 */
719 735 if ((flags & MAP_FIXED) != 0) {
720 736 caddr_t userlimit;
721 737
722 738 /*
723 739 * Use the user address. First verify that
724 740 * the address to be used is page aligned.
725 741 * Then make some simple bounds checks.
726 742 */
727 743 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
728 744 return (EINVAL);
729 745
730 746 userlimit = flags & _MAP_LOW32 ?
731 747 (caddr_t)USERLIMIT32 : as->a_userlimit;
732 748 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
733 749 case RANGE_OKAY:
734 750 break;
735 751 case RANGE_BADPROT:
736 752 return (ENOTSUP);
737 753 case RANGE_BADADDR:
738 754 default:
739 755 return (ENOMEM);
740 756 }
741 757 }
742 758
743 759 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
744 760 nbl_need_check(vp)) {
745 761 int svmand;
746 762 nbl_op_t nop;
747 763
748 764 nbl_start_crit(vp, RW_READER);
749 765 in_crit = 1;
750 766 error = nbl_svmand(vp, fp->f_cred, &svmand);
751 767 if (error != 0)
752 768 goto done;
753 769 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
754 770 if (prot & (PROT_READ | PROT_EXEC)) {
755 771 nop = NBL_READWRITE;
756 772 } else {
757 773 nop = NBL_WRITE;
758 774 }
759 775 } else {
760 776 nop = NBL_READ;
761 777 }
762 778 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
763 779 error = EACCES;
764 780 goto done;
765 781 }
766 782 }
767 783
768 784 /* discard lwpchan mappings, like munmap() */
769 785 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
770 786 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
771 787
772 788 /*
773 789 * Ok, now let the vnode map routine do its thing to set things up.
774 790 */
775 791 error = VOP_MAP(vp, pos, as,
776 792 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
777 793
778 794 if (error == 0) {
779 795 /*
780 796 * Tell machine specific code that lwp has mapped shared memory
781 797 */
782 798 if (flags & MAP_SHARED) {
783 799 /* EMPTY */
784 800 LWP_MMODEL_SHARED_AS(*addrp, len);
785 801 }
786 802 if (vp->v_type == VREG &&
787 803 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
788 804 /*
789 805 * Mark this as an executable vnode
790 806 */
791 807 mutex_enter(&vp->v_lock);
792 808 vp->v_flag |= VVMEXEC;
793 809 mutex_exit(&vp->v_lock);
794 810 }
795 811 }
796 812
797 813 done:
798 814 if (in_crit)
799 815 nbl_end_crit(vp);
800 816 return (error);
801 817 }
802 818
803 819 #ifdef _LP64
804 820 /*
805 821 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
806 822 *
807 823 * The "large file" mmap routine mmap64(2) is also mapped to this routine
808 824 * by the 64-bit version of libc.
809 825 *
810 826 * Eventually, this should be the only version, and have smmap_common()
811 827 * folded back into it again. Some day.
812 828 */
813 829 caddr_t
814 830 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
815 831 {
816 832 struct file *fp;
817 833 int error;
818 834
819 835 if (fd == -1 && (flags & MAP_ANON) != 0)
820 836 error = smmap_common(&addr, len, prot, flags,
821 837 NULL, (offset_t)pos);
822 838 else if ((fp = getf(fd)) != NULL) {
823 839 error = smmap_common(&addr, len, prot, flags,
824 840 fp, (offset_t)pos);
825 841 releasef(fd);
826 842 } else
827 843 error = EBADF;
828 844
829 845 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
830 846 }
831 847 #endif /* _LP64 */
832 848
833 849 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
834 850
835 851 /*
836 852 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
837 853 */
838 854 caddr_t
839 855 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
840 856 {
841 857 struct file *fp;
842 858 int error;
843 859 caddr_t a = (caddr_t)(uintptr_t)addr;
844 860
845 861 if (flags & _MAP_LOW32)
846 862 error = EINVAL;
847 863 else if (fd == -1 && (flags & MAP_ANON) != 0)
848 864 error = smmap_common(&a, (size_t)len, prot,
849 865 flags | _MAP_LOW32, NULL, (offset_t)pos);
850 866 else if ((fp = getf(fd)) != NULL) {
851 867 error = smmap_common(&a, (size_t)len, prot,
852 868 flags | _MAP_LOW32, fp, (offset_t)pos);
853 869 releasef(fd);
854 870 } else
855 871 error = EBADF;
856 872
857 873 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
858 874
859 875 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
860 876 }
861 877
862 878 /*
863 879 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
864 880 *
865 881 * Now things really get ugly because we can't use the C-style
866 882 * calling convention for more than 6 args, and 64-bit parameter
867 883 * passing on 32-bit systems is less than clean.
868 884 */
869 885
870 886 struct mmaplf32a {
871 887 caddr_t addr;
872 888 size_t len;
873 889 #ifdef _LP64
874 890 /*
875 891 * 32-bit contents, 64-bit cells
876 892 */
877 893 uint64_t prot;
878 894 uint64_t flags;
879 895 uint64_t fd;
880 896 uint64_t offhi;
881 897 uint64_t offlo;
882 898 #else
883 899 /*
884 900 * 32-bit contents, 32-bit cells
885 901 */
886 902 uint32_t prot;
887 903 uint32_t flags;
888 904 uint32_t fd;
889 905 uint32_t offhi;
890 906 uint32_t offlo;
891 907 #endif
892 908 };
893 909
894 910 int
895 911 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
896 912 {
897 913 struct file *fp;
898 914 int error;
899 915 caddr_t a = uap->addr;
900 916 int flags = (int)uap->flags;
901 917 int fd = (int)uap->fd;
902 918 #ifdef _BIG_ENDIAN
903 919 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
904 920 #else
905 921 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
906 922 #endif
907 923
908 924 if (flags & _MAP_LOW32)
909 925 error = EINVAL;
910 926 else if (fd == -1 && (flags & MAP_ANON) != 0)
911 927 error = smmap_common(&a, uap->len, (int)uap->prot,
912 928 flags | _MAP_LOW32, NULL, off);
913 929 else if ((fp = getf(fd)) != NULL) {
914 930 error = smmap_common(&a, uap->len, (int)uap->prot,
915 931 flags | _MAP_LOW32, fp, off);
916 932 releasef(fd);
917 933 } else
918 934 error = EBADF;
919 935
920 936 if (error == 0)
921 937 rvp->r_val1 = (uintptr_t)a;
922 938 return (error);
923 939 }
924 940
925 941 #endif /* _SYSCALL32_IMPL || _ILP32 */
926 942
927 943 int
928 944 munmap(caddr_t addr, size_t len)
929 945 {
930 946 struct proc *p = curproc;
931 947 struct as *as = p->p_as;
932 948
933 949 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
934 950 return (set_errno(EINVAL));
935 951
936 952 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
937 953 return (set_errno(EINVAL));
938 954
939 955 /*
940 956 * Discard lwpchan mappings.
941 957 */
942 958 if (p->p_lcp != NULL)
943 959 lwpchan_delete_mapping(p, addr, addr + len);
944 960 if (as_unmap(as, addr, len) != 0)
945 961 return (set_errno(EINVAL));
946 962
947 963 return (0);
948 964 }
949 965
950 966 int
951 967 mprotect(caddr_t addr, size_t len, int prot)
952 968 {
953 969 struct as *as = curproc->p_as;
954 970 uint_t uprot = prot | PROT_USER;
955 971 int error;
956 972
957 973 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
958 974 return (set_errno(EINVAL));
959 975
960 976 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
961 977 case RANGE_OKAY:
962 978 break;
963 979 case RANGE_BADPROT:
964 980 return (set_errno(ENOTSUP));
965 981 case RANGE_BADADDR:
966 982 default:
967 983 return (set_errno(ENOMEM));
968 984 }
969 985
970 986 error = as_setprot(as, addr, len, uprot);
971 987 if (error)
972 988 return (set_errno(error));
973 989 return (0);
974 990 }
975 991
976 992 #define MC_CACHE 128 /* internal result buffer */
977 993 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
978 994
979 995 int
980 996 mincore(caddr_t addr, size_t len, char *vecp)
981 997 {
982 998 struct as *as = curproc->p_as;
983 999 caddr_t ea; /* end address of loop */
984 1000 size_t rl; /* inner result length */
985 1001 char vec[MC_CACHE]; /* local vector cache */
986 1002 int error;
987 1003 model_t model;
988 1004 long llen;
989 1005
990 1006 model = get_udatamodel();
991 1007 /*
992 1008 * Validate form of address parameters.
993 1009 */
994 1010 if (model == DATAMODEL_NATIVE) {
995 1011 llen = (long)len;
996 1012 } else {
997 1013 llen = (int32_t)(size32_t)len;
998 1014 }
999 1015 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1000 1016 return (set_errno(EINVAL));
1001 1017
1002 1018 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1003 1019 return (set_errno(ENOMEM));
1004 1020
1005 1021 /*
1006 1022 * Loop over subranges of interval [addr : addr + len), recovering
1007 1023 * results internally and then copying them out to caller. Subrange
1008 1024 * is based on the size of MC_CACHE, defined above.
1009 1025 */
1010 1026 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1011 1027 error = as_incore(as, addr,
1012 1028 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1013 1029 if (rl != 0) {
1014 1030 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1015 1031 if (copyout(vec, vecp, rl) != 0)
1016 1032 return (set_errno(EFAULT));
1017 1033 vecp += rl;
1018 1034 }
1019 1035 if (error != 0)
1020 1036 return (set_errno(ENOMEM));
1021 1037 }
1022 1038 return (0);
1023 1039 }
↓ open down ↓ |
380 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX