Print this page
os/grow: define 'p' under the same ifdef as it's consumed
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/grow.c
+++ new/usr/src/uts/common/os/grow.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
23 23
24 24 /*
25 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 30 /* All Rights Reserved */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/inttypes.h>
34 34 #include <sys/param.h>
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/signal.h>
38 38 #include <sys/user.h>
39 39 #include <sys/errno.h>
40 40 #include <sys/var.h>
41 41 #include <sys/proc.h>
42 42 #include <sys/tuneable.h>
43 43 #include <sys/debug.h>
44 44 #include <sys/cmn_err.h>
45 45 #include <sys/cred.h>
46 46 #include <sys/vnode.h>
47 47 #include <sys/vfs.h>
48 48 #include <sys/vm.h>
49 49 #include <sys/file.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vmparam.h>
52 52 #include <sys/fcntl.h>
53 53 #include <sys/lwpchan_impl.h>
54 54 #include <sys/nbmlock.h>
55 55
56 56 #include <vm/hat.h>
57 57 #include <vm/as.h>
58 58 #include <vm/seg.h>
59 59 #include <vm/seg_dev.h>
60 60 #include <vm/seg_vn.h>
61 61
62 62 int use_brk_lpg = 1;
63 63 int use_stk_lpg = 1;
64 64
65 65 static int brk_lpg(caddr_t nva);
66 66 static int grow_lpg(caddr_t sp);
67 67
68 68 int
69 69 brk(caddr_t nva)
70 70 {
71 71 int error;
72 72 proc_t *p = curproc;
73 73
74 74 /*
75 75 * Serialize brk operations on an address space.
76 76 * This also serves as the lock protecting p_brksize
77 77 * and p_brkpageszc.
78 78 */
79 79 as_rangelock(p->p_as);
80 80 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
81 81 error = brk_lpg(nva);
82 82 } else {
83 83 error = brk_internal(nva, p->p_brkpageszc);
84 84 }
85 85 as_rangeunlock(p->p_as);
86 86 return ((error != 0 ? set_errno(error) : 0));
87 87 }
88 88
89 89 /*
90 90 * Algorithm: call arch-specific map_pgsz to get best page size to use,
91 91 * then call brk_internal().
92 92 * Returns 0 on success.
93 93 */
94 94 static int
95 95 brk_lpg(caddr_t nva)
96 96 {
97 97 struct proc *p = curproc;
98 98 size_t pgsz, len;
99 99 caddr_t addr, brkend;
100 100 caddr_t bssbase = p->p_bssbase;
101 101 caddr_t brkbase = p->p_brkbase;
102 102 int oszc, szc;
103 103 int err;
104 104
105 105 oszc = p->p_brkpageszc;
106 106
107 107 /*
108 108 * If p_brkbase has not yet been set, the first call
109 109 * to brk_internal() will initialize it.
110 110 */
111 111 if (brkbase == 0) {
112 112 return (brk_internal(nva, oszc));
113 113 }
114 114
115 115 len = nva - bssbase;
116 116
117 117 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
118 118 szc = page_szc(pgsz);
119 119
120 120 /*
121 121 * Covers two cases:
122 122 * 1. page_szc() returns -1 for invalid page size, so we want to
123 123 * ignore it in that case.
124 124 * 2. By design we never decrease page size, as it is more stable.
125 125 */
126 126 if (szc <= oszc) {
127 127 err = brk_internal(nva, oszc);
128 128 /* If failed, back off to base page size. */
129 129 if (err != 0 && oszc != 0) {
130 130 err = brk_internal(nva, 0);
131 131 }
132 132 return (err);
133 133 }
134 134
135 135 err = brk_internal(nva, szc);
136 136 /* If using szc failed, map with base page size and return. */
137 137 if (err != 0) {
138 138 if (szc != 0) {
139 139 err = brk_internal(nva, 0);
140 140 }
141 141 return (err);
142 142 }
143 143
144 144 /*
145 145 * Round up brk base to a large page boundary and remap
146 146 * anything in the segment already faulted in beyond that
147 147 * point.
148 148 */
149 149 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
150 150 brkend = brkbase + p->p_brksize;
151 151 len = brkend - addr;
152 152 /* Check that len is not negative. Update page size code for heap. */
153 153 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
154 154 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
155 155 p->p_brkpageszc = szc;
156 156 }
157 157
158 158 ASSERT(err == 0);
159 159 return (err); /* should always be 0 */
160 160 }
161 161
162 162 /*
163 163 * Returns 0 on success.
164 164 */
165 165 int
166 166 brk_internal(caddr_t nva, uint_t brkszc)
167 167 {
168 168 caddr_t ova; /* current break address */
169 169 size_t size;
170 170 int error;
171 171 struct proc *p = curproc;
172 172 struct as *as = p->p_as;
173 173 size_t pgsz;
174 174 uint_t szc;
175 175 rctl_qty_t as_rctl;
176 176
177 177 /*
178 178 * extend heap to brkszc alignment but use current p->p_brkpageszc
179 179 * for the newly created segment. This allows the new extension
180 180 * segment to be concatenated successfully with the existing brk
181 181 * segment.
182 182 */
183 183 if ((szc = brkszc) != 0) {
184 184 pgsz = page_get_pagesize(szc);
185 185 ASSERT(pgsz > PAGESIZE);
186 186 } else {
187 187 pgsz = PAGESIZE;
188 188 }
189 189
190 190 mutex_enter(&p->p_lock);
191 191 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
192 192 p->p_rctls, p);
193 193 mutex_exit(&p->p_lock);
194 194
195 195 /*
196 196 * If p_brkbase has not yet been set, the first call
197 197 * to brk() will initialize it.
198 198 */
199 199 if (p->p_brkbase == 0)
200 200 p->p_brkbase = nva;
201 201
202 202 /*
203 203 * Before multiple page size support existed p_brksize was the value
204 204 * not rounded to the pagesize (i.e. it stored the exact user request
205 205 * for heap size). If pgsz is greater than PAGESIZE calculate the
206 206 * heap size as the real new heap size by rounding it up to pgsz.
207 207 * This is useful since we may want to know where the heap ends
208 208 * without knowing heap pagesize (e.g. some old code) and also if
209 209 * heap pagesize changes we can update p_brkpageszc but delay adding
210 210 * new mapping yet still know from p_brksize where the heap really
211 211 * ends. The user requested heap end is stored in libc variable.
212 212 */
213 213 if (pgsz > PAGESIZE) {
214 214 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
215 215 size = tnva - p->p_brkbase;
216 216 if (tnva < p->p_brkbase || (size > p->p_brksize &&
217 217 size > (size_t)as_rctl)) {
218 218 szc = 0;
219 219 pgsz = PAGESIZE;
220 220 size = nva - p->p_brkbase;
221 221 }
222 222 } else {
223 223 size = nva - p->p_brkbase;
224 224 }
225 225
226 226 /*
227 227 * use PAGESIZE to roundup ova because we want to know the real value
228 228 * of the current heap end in case p_brkpageszc changes since the last
229 229 * p_brksize was computed.
230 230 */
231 231 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 232 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
233 233 PAGESIZE);
234 234
235 235 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
236 236 size > as_rctl)) {
237 237 mutex_enter(&p->p_lock);
238 238 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
239 239 RCA_SAFE);
240 240 mutex_exit(&p->p_lock);
241 241 return (ENOMEM);
242 242 }
243 243
244 244 if (nva > ova) {
245 245 struct segvn_crargs crargs =
246 246 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
247 247
248 248 if (!(p->p_datprot & PROT_EXEC)) {
249 249 crargs.prot &= ~PROT_EXEC;
250 250 }
251 251
252 252 /*
253 253 * Add new zfod mapping to extend UNIX data segment
254 254 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
255 255 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
256 256 * page sizes if ova is not aligned to szc's pgsz.
257 257 */
258 258 if (szc > 0) {
259 259 caddr_t rbss;
260 260
261 261 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
262 262 pgsz);
263 263 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
264 264 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
265 265 AS_MAP_NO_LPOOB;
266 266 } else if (ova == rbss) {
267 267 crargs.szc = szc;
268 268 } else {
269 269 crargs.szc = AS_MAP_HEAP;
270 270 }
271 271 } else {
272 272 crargs.szc = AS_MAP_NO_LPOOB;
273 273 }
274 274 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
275 275 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
276 276 &crargs);
277 277 if (error) {
278 278 return (error);
279 279 }
280 280
281 281 } else if (nva < ova) {
282 282 /*
283 283 * Release mapping to shrink UNIX data segment.
284 284 */
285 285 (void) as_unmap(as, nva, (size_t)(ova - nva));
286 286 }
287 287 p->p_brksize = size;
288 288 return (0);
289 289 }
290 290
291 291 /*
292 292 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
293 293 * This routine assumes that the stack grows downward.
294 294 */
295 295 int
296 296 grow(caddr_t sp)
297 297 {
298 298 struct proc *p = curproc;
299 299 struct as *as = p->p_as;
300 300 size_t oldsize = p->p_stksize;
301 301 size_t newsize;
302 302 int err;
303 303
304 304 /*
305 305 * Serialize grow operations on an address space.
306 306 * This also serves as the lock protecting p_stksize
307 307 * and p_stkpageszc.
308 308 */
309 309 as_rangelock(as);
310 310 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
311 311 err = grow_lpg(sp);
312 312 } else {
313 313 err = grow_internal(sp, p->p_stkpageszc);
314 314 }
315 315 as_rangeunlock(as);
316 316
317 317 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
318 318 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
319 319 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
320 320 /*
321 321 * Set up translations so the process doesn't have to fault in
322 322 * the stack pages we just gave it.
323 323 */
324 324 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
325 325 newsize - oldsize, F_INVAL, S_WRITE);
326 326 }
327 327 return ((err == 0 ? 1 : 0));
328 328 }
329 329
330 330 /*
331 331 * Algorithm: call arch-specific map_pgsz to get best page size to use,
332 332 * then call grow_internal().
333 333 * Returns 0 on success.
334 334 */
335 335 static int
336 336 grow_lpg(caddr_t sp)
337 337 {
338 338 struct proc *p = curproc;
339 339 size_t pgsz;
340 340 size_t len, newsize;
341 341 caddr_t addr, saddr;
342 342 caddr_t growend;
343 343 int oszc, szc;
344 344 int err;
345 345
346 346 newsize = p->p_usrstack - sp;
347 347
348 348 oszc = p->p_stkpageszc;
349 349 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
350 350 szc = page_szc(pgsz);
351 351
352 352 /*
353 353 * Covers two cases:
354 354 * 1. page_szc() returns -1 for invalid page size, so we want to
355 355 * ignore it in that case.
356 356 * 2. By design we never decrease page size, as it is more stable.
357 357 * This shouldn't happen as the stack never shrinks.
358 358 */
359 359 if (szc <= oszc) {
360 360 err = grow_internal(sp, oszc);
361 361 /* failed, fall back to base page size */
362 362 if (err != 0 && oszc != 0) {
363 363 err = grow_internal(sp, 0);
364 364 }
365 365 return (err);
366 366 }
367 367
368 368 /*
369 369 * We've grown sufficiently to switch to a new page size.
370 370 * So we are going to remap the whole segment with the new page size.
371 371 */
372 372 err = grow_internal(sp, szc);
373 373 /* The grow with szc failed, so fall back to base page size. */
374 374 if (err != 0) {
375 375 if (szc != 0) {
376 376 err = grow_internal(sp, 0);
377 377 }
378 378 return (err);
379 379 }
380 380
381 381 /*
382 382 * Round up stack pointer to a large page boundary and remap
383 383 * any pgsz pages in the segment already faulted in beyond that
384 384 * point.
385 385 */
386 386 saddr = p->p_usrstack - p->p_stksize;
387 387 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
388 388 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
389 389 len = growend - addr;
390 390 /* Check that len is not negative. Update page size code for stack. */
391 391 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
392 392 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
393 393 p->p_stkpageszc = szc;
394 394 }
395 395
396 396 ASSERT(err == 0);
397 397 return (err); /* should always be 0 */
398 398 }
399 399
400 400 /*
401 401 * This routine assumes that the stack grows downward.
402 402 * Returns 0 on success, errno on failure.
403 403 */
404 404 int
405 405 grow_internal(caddr_t sp, uint_t growszc)
406 406 {
407 407 struct proc *p = curproc;
408 408 size_t newsize;
409 409 size_t oldsize;
410 410 int error;
411 411 size_t pgsz;
412 412 uint_t szc;
413 413 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
414 414
415 415 ASSERT(sp < p->p_usrstack);
416 416 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417 417
418 418 /*
419 419 * grow to growszc alignment but use current p->p_stkpageszc for
420 420 * the segvn_crargs szc passed to segvn_create. For memcntl to
421 421 * increase the szc, this allows the new extension segment to be
422 422 * concatenated successfully with the existing stack segment.
423 423 */
424 424 if ((szc = growszc) != 0) {
425 425 pgsz = page_get_pagesize(szc);
426 426 ASSERT(pgsz > PAGESIZE);
427 427 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
428 428 if (newsize > (size_t)p->p_stk_ctl) {
429 429 szc = 0;
430 430 pgsz = PAGESIZE;
431 431 newsize = p->p_usrstack - sp;
432 432 }
433 433 } else {
434 434 pgsz = PAGESIZE;
435 435 newsize = p->p_usrstack - sp;
436 436 }
437 437
438 438 if (newsize > (size_t)p->p_stk_ctl) {
439 439 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
440 440 RCA_UNSAFE_ALL);
441 441
442 442 return (ENOMEM);
443 443 }
444 444
445 445 oldsize = p->p_stksize;
446 446 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
447 447
448 448 if (newsize <= oldsize) { /* prevent the stack from shrinking */
449 449 return (0);
450 450 }
451 451
452 452 if (!(p->p_stkprot & PROT_EXEC)) {
453 453 crargs.prot &= ~PROT_EXEC;
454 454 }
455 455 /*
456 456 * extend stack with the proposed new growszc, which is different
457 457 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
458 458 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
459 459 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
460 460 * if not aligned to szc's pgsz.
461 461 */
462 462 if (szc > 0) {
463 463 caddr_t oldsp = p->p_usrstack - oldsize;
464 464 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
465 465 pgsz);
466 466
467 467 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
468 468 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
469 469 AS_MAP_NO_LPOOB;
470 470 } else if (oldsp == austk) {
471 471 crargs.szc = szc;
472 472 } else {
473 473 crargs.szc = AS_MAP_STACK;
474 474 }
475 475 } else {
476 476 crargs.szc = AS_MAP_NO_LPOOB;
477 477 }
478 478 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
479 479
480 480 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
481 481 segvn_create, &crargs)) != 0) {
482 482 if (error == EAGAIN) {
483 483 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
484 484 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
485 485 }
486 486 return (error);
487 487 }
488 488 p->p_stksize = newsize;
489 489 return (0);
490 490 }
491 491
↓ open down ↓ |
491 lines elided |
↑ open up ↑ |
492 492 /*
493 493 * Find address for user to map.
494 494 * If MAP_FIXED is not specified, we can pick any address we want, but we will
495 495 * first try the value in *addrp if it is non-NULL. Thus this is implementing
496 496 * a way to try and get a preferred address.
497 497 */
498 498 int
499 499 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
500 500 int vacalign, uint_t flags)
501 501 {
502 +#if defined(__amd64)
502 503 proc_t *p = curproc;
504 +#endif
503 505 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
504 506 size_t lenp;
505 507
506 508 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
507 509
508 510 /*
509 511 * If we have been provided a hint, we should still expand the lenp
510 512 * to be the rest of the address space. This will allow us to
511 513 * treat the hint as a strong desire to be "nearby" the provided
512 514 * address. If we can't satisfy the hint, as_gap() will walk forward.
513 515 */
514 516 if (flags & _MAP_LOW32)
515 517 lenp = (caddr_t)USERLIMIT32 - basep;
516 518 #if defined(__amd64)
517 519 else if (p->p_model == DATAMODEL_NATIVE)
518 520 lenp = p->p_usrstack - basep -
519 521 ((p->p_stk_ctl + PAGEOFFSET) & PAGEMASK);
520 522 #endif
521 523 else
522 524 lenp = as->a_userlimit - basep;
523 525
524 526 if (flags & MAP_FIXED) {
525 527 (void) as_unmap(as, *addrp, len);
526 528 return (0);
527 529 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
528 530 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
529 531 /* User supplied address was available */
530 532 *addrp = basep;
531 533 } else {
532 534 /*
533 535 * No user supplied address or the address supplied was not
534 536 * available.
535 537 */
536 538 map_addr(addrp, len, off, vacalign, flags);
537 539 }
538 540 if (*addrp == NULL)
539 541 return (ENOMEM);
540 542 return (0);
541 543 }
542 544
543 545
544 546 /*
545 547 * Used for MAP_ANON - fast way to get anonymous pages
546 548 */
547 549 static int
548 550 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
549 551 offset_t pos)
550 552 {
551 553 struct segvn_crargs vn_a;
552 554 int error;
553 555
554 556 if (((PROT_ALL & uprot) != uprot))
555 557 return (EACCES);
556 558
557 559 if ((flags & MAP_FIXED) != 0) {
558 560 caddr_t userlimit;
559 561
560 562 /*
561 563 * Use the user address. First verify that
562 564 * the address to be used is page aligned.
563 565 * Then make some simple bounds checks.
564 566 */
565 567 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
566 568 return (EINVAL);
567 569
568 570 userlimit = flags & _MAP_LOW32 ?
569 571 (caddr_t)USERLIMIT32 : as->a_userlimit;
570 572 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
571 573 case RANGE_OKAY:
572 574 break;
573 575 case RANGE_BADPROT:
574 576 return (ENOTSUP);
575 577 case RANGE_BADADDR:
576 578 default:
577 579 return (ENOMEM);
578 580 }
579 581 }
580 582 /*
581 583 * No need to worry about vac alignment for anonymous
582 584 * pages since this is a "clone" object that doesn't
583 585 * yet exist.
584 586 */
585 587 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
586 588 if (error != 0) {
587 589 return (error);
588 590 }
589 591
590 592 /*
591 593 * Use the seg_vn segment driver; passing in the NULL amp
592 594 * gives the desired "cloning" effect.
593 595 */
594 596 vn_a.vp = NULL;
595 597 vn_a.offset = 0;
596 598 vn_a.type = flags & MAP_TYPE;
597 599 vn_a.prot = uprot;
598 600 vn_a.maxprot = PROT_ALL;
599 601 vn_a.flags = flags & ~MAP_TYPE;
600 602 vn_a.cred = CRED();
601 603 vn_a.amp = NULL;
602 604 vn_a.szc = 0;
603 605 vn_a.lgrp_mem_policy_flags = 0;
604 606
605 607 return (as_map(as, *addrp, len, segvn_create, &vn_a));
606 608 }
607 609
608 610 static int
609 611 smmap_common(caddr_t *addrp, size_t len,
610 612 int prot, int flags, struct file *fp, offset_t pos)
611 613 {
612 614 struct vnode *vp;
613 615 struct as *as = curproc->p_as;
614 616 uint_t uprot, maxprot, type;
615 617 int error;
616 618 int in_crit = 0;
617 619
618 620 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
619 621 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
620 622 MAP_TEXT | MAP_INITDATA)) != 0) {
621 623 /* | MAP_RENAME */ /* not implemented, let user know */
622 624 return (EINVAL);
623 625 }
624 626
625 627 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
626 628 return (EINVAL);
627 629 }
628 630
629 631 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
630 632 return (EINVAL);
631 633 }
632 634
633 635 #if defined(__sparc)
634 636 /*
635 637 * See if this is an "old mmap call". If so, remember this
636 638 * fact and convert the flags value given to mmap to indicate
637 639 * the specified address in the system call must be used.
638 640 * _MAP_NEW is turned set by all new uses of mmap.
639 641 */
640 642 if ((flags & _MAP_NEW) == 0)
641 643 flags |= MAP_FIXED;
642 644 #endif
643 645 flags &= ~_MAP_NEW;
644 646
645 647 type = flags & MAP_TYPE;
646 648 if (type != MAP_PRIVATE && type != MAP_SHARED)
647 649 return (EINVAL);
648 650
649 651
650 652 if (flags & MAP_ALIGN) {
651 653
652 654 if (flags & MAP_FIXED)
653 655 return (EINVAL);
654 656
655 657 /* alignment needs to be a power of 2 >= page size */
656 658 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
657 659 !ISP2((uintptr_t)*addrp))
658 660 return (EINVAL);
659 661 }
660 662 /*
661 663 * Check for bad lengths and file position.
662 664 * We let the VOP_MAP routine check for negative lengths
663 665 * since on some vnode types this might be appropriate.
664 666 */
665 667 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
666 668 return (EINVAL);
667 669
668 670 maxprot = PROT_ALL; /* start out allowing all accesses */
669 671 uprot = prot | PROT_USER;
670 672
671 673 if (fp == NULL) {
672 674 ASSERT(flags & MAP_ANON);
673 675 /* discard lwpchan mappings, like munmap() */
674 676 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
675 677 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
676 678 as_rangelock(as);
677 679 error = zmap(as, addrp, len, uprot, flags, pos);
678 680 as_rangeunlock(as);
679 681 /*
680 682 * Tell machine specific code that lwp has mapped shared memory
681 683 */
682 684 if (error == 0 && (flags & MAP_SHARED)) {
683 685 /* EMPTY */
684 686 LWP_MMODEL_SHARED_AS(*addrp, len);
685 687 }
686 688 return (error);
687 689 } else if ((flags & MAP_ANON) != 0)
688 690 return (EINVAL);
689 691
690 692 vp = fp->f_vnode;
691 693
692 694 /* Can't execute code from "noexec" mounted filesystem. */
693 695 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
694 696 maxprot &= ~PROT_EXEC;
695 697
696 698 /*
697 699 * These checks were added as part of large files.
698 700 *
699 701 * Return ENXIO if the initial position is negative; return EOVERFLOW
700 702 * if (offset + len) would overflow the maximum allowed offset for the
701 703 * type of file descriptor being used.
702 704 */
703 705 if (vp->v_type == VREG) {
704 706 if (pos < 0)
705 707 return (ENXIO);
706 708 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
707 709 return (EOVERFLOW);
708 710 }
709 711
710 712 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
711 713 /* no write access allowed */
712 714 maxprot &= ~PROT_WRITE;
713 715 }
714 716
715 717 /*
716 718 * XXX - Do we also adjust maxprot based on protections
717 719 * of the vnode? E.g. if no execute permission is given
718 720 * on the vnode for the current user, maxprot probably
719 721 * should disallow PROT_EXEC also? This is different
720 722 * from the write access as this would be a per vnode
721 723 * test as opposed to a per fd test for writability.
722 724 */
723 725
724 726 /*
725 727 * Verify that the specified protections are not greater than
726 728 * the maximum allowable protections. Also test to make sure
727 729 * that the file descriptor does allows for read access since
728 730 * "write only" mappings are hard to do since normally we do
729 731 * the read from the file before the page can be written.
730 732 */
731 733 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
732 734 return (EACCES);
733 735
734 736 /*
735 737 * If the user specified an address, do some simple checks here
736 738 */
737 739 if ((flags & MAP_FIXED) != 0) {
738 740 caddr_t userlimit;
739 741
740 742 /*
741 743 * Use the user address. First verify that
742 744 * the address to be used is page aligned.
743 745 * Then make some simple bounds checks.
744 746 */
745 747 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
746 748 return (EINVAL);
747 749
748 750 userlimit = flags & _MAP_LOW32 ?
749 751 (caddr_t)USERLIMIT32 : as->a_userlimit;
750 752 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
751 753 case RANGE_OKAY:
752 754 break;
753 755 case RANGE_BADPROT:
754 756 return (ENOTSUP);
755 757 case RANGE_BADADDR:
756 758 default:
757 759 return (ENOMEM);
758 760 }
759 761 }
760 762
761 763 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
762 764 nbl_need_check(vp)) {
763 765 int svmand;
764 766 nbl_op_t nop;
765 767
766 768 nbl_start_crit(vp, RW_READER);
767 769 in_crit = 1;
768 770 error = nbl_svmand(vp, fp->f_cred, &svmand);
769 771 if (error != 0)
770 772 goto done;
771 773 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
772 774 if (prot & (PROT_READ | PROT_EXEC)) {
773 775 nop = NBL_READWRITE;
774 776 } else {
775 777 nop = NBL_WRITE;
776 778 }
777 779 } else {
778 780 nop = NBL_READ;
779 781 }
780 782 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
781 783 error = EACCES;
782 784 goto done;
783 785 }
784 786 }
785 787
786 788 /* discard lwpchan mappings, like munmap() */
787 789 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
788 790 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
789 791
790 792 /*
791 793 * Ok, now let the vnode map routine do its thing to set things up.
792 794 */
793 795 error = VOP_MAP(vp, pos, as,
794 796 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
795 797
796 798 if (error == 0) {
797 799 /*
798 800 * Tell machine specific code that lwp has mapped shared memory
799 801 */
800 802 if (flags & MAP_SHARED) {
801 803 /* EMPTY */
802 804 LWP_MMODEL_SHARED_AS(*addrp, len);
803 805 }
804 806 if (vp->v_type == VREG &&
805 807 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
806 808 /*
807 809 * Mark this as an executable vnode
808 810 */
809 811 mutex_enter(&vp->v_lock);
810 812 vp->v_flag |= VVMEXEC;
811 813 mutex_exit(&vp->v_lock);
812 814 }
813 815 }
814 816
815 817 done:
816 818 if (in_crit)
817 819 nbl_end_crit(vp);
818 820 return (error);
819 821 }
820 822
821 823 #ifdef _LP64
822 824 /*
823 825 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
824 826 *
825 827 * The "large file" mmap routine mmap64(2) is also mapped to this routine
826 828 * by the 64-bit version of libc.
827 829 *
828 830 * Eventually, this should be the only version, and have smmap_common()
829 831 * folded back into it again. Some day.
830 832 */
831 833 caddr_t
832 834 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
833 835 {
834 836 struct file *fp;
835 837 int error;
836 838
837 839 if (fd == -1 && (flags & MAP_ANON) != 0)
838 840 error = smmap_common(&addr, len, prot, flags,
839 841 NULL, (offset_t)pos);
840 842 else if ((fp = getf(fd)) != NULL) {
841 843 error = smmap_common(&addr, len, prot, flags,
842 844 fp, (offset_t)pos);
843 845 releasef(fd);
844 846 } else
845 847 error = EBADF;
846 848
847 849 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
848 850 }
849 851 #endif /* _LP64 */
850 852
851 853 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
852 854
853 855 /*
854 856 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
855 857 */
856 858 caddr_t
857 859 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
858 860 {
859 861 struct file *fp;
860 862 int error;
861 863 caddr_t a = (caddr_t)(uintptr_t)addr;
862 864
863 865 if (flags & _MAP_LOW32)
864 866 error = EINVAL;
865 867 else if (fd == -1 && (flags & MAP_ANON) != 0)
866 868 error = smmap_common(&a, (size_t)len, prot,
867 869 flags | _MAP_LOW32, NULL, (offset_t)pos);
868 870 else if ((fp = getf(fd)) != NULL) {
869 871 error = smmap_common(&a, (size_t)len, prot,
870 872 flags | _MAP_LOW32, fp, (offset_t)pos);
871 873 releasef(fd);
872 874 } else
873 875 error = EBADF;
874 876
875 877 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
876 878
877 879 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
878 880 }
879 881
880 882 /*
881 883 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
882 884 *
883 885 * Now things really get ugly because we can't use the C-style
884 886 * calling convention for more than 6 args, and 64-bit parameter
885 887 * passing on 32-bit systems is less than clean.
886 888 */
887 889
888 890 struct mmaplf32a {
889 891 caddr_t addr;
890 892 size_t len;
891 893 #ifdef _LP64
892 894 /*
893 895 * 32-bit contents, 64-bit cells
894 896 */
895 897 uint64_t prot;
896 898 uint64_t flags;
897 899 uint64_t fd;
898 900 uint64_t offhi;
899 901 uint64_t offlo;
900 902 #else
901 903 /*
902 904 * 32-bit contents, 32-bit cells
903 905 */
904 906 uint32_t prot;
905 907 uint32_t flags;
906 908 uint32_t fd;
907 909 uint32_t offhi;
908 910 uint32_t offlo;
909 911 #endif
910 912 };
911 913
912 914 int
913 915 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
914 916 {
915 917 struct file *fp;
916 918 int error;
917 919 caddr_t a = uap->addr;
918 920 int flags = (int)uap->flags;
919 921 int fd = (int)uap->fd;
920 922 #ifdef _BIG_ENDIAN
921 923 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
922 924 #else
923 925 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
924 926 #endif
925 927
926 928 if (flags & _MAP_LOW32)
927 929 error = EINVAL;
928 930 else if (fd == -1 && (flags & MAP_ANON) != 0)
929 931 error = smmap_common(&a, uap->len, (int)uap->prot,
930 932 flags | _MAP_LOW32, NULL, off);
931 933 else if ((fp = getf(fd)) != NULL) {
932 934 error = smmap_common(&a, uap->len, (int)uap->prot,
933 935 flags | _MAP_LOW32, fp, off);
934 936 releasef(fd);
935 937 } else
936 938 error = EBADF;
937 939
938 940 if (error == 0)
939 941 rvp->r_val1 = (uintptr_t)a;
940 942 return (error);
941 943 }
942 944
943 945 #endif /* _SYSCALL32_IMPL || _ILP32 */
944 946
945 947 int
946 948 munmap(caddr_t addr, size_t len)
947 949 {
948 950 struct proc *p = curproc;
949 951 struct as *as = p->p_as;
950 952
951 953 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
952 954 return (set_errno(EINVAL));
953 955
954 956 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
955 957 return (set_errno(EINVAL));
956 958
957 959 /*
958 960 * Discard lwpchan mappings.
959 961 */
960 962 if (p->p_lcp != NULL)
961 963 lwpchan_delete_mapping(p, addr, addr + len);
962 964 if (as_unmap(as, addr, len) != 0)
963 965 return (set_errno(EINVAL));
964 966
965 967 return (0);
966 968 }
967 969
968 970 int
969 971 mprotect(caddr_t addr, size_t len, int prot)
970 972 {
971 973 struct as *as = curproc->p_as;
972 974 uint_t uprot = prot | PROT_USER;
973 975 int error;
974 976
975 977 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
976 978 return (set_errno(EINVAL));
977 979
978 980 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
979 981 case RANGE_OKAY:
980 982 break;
981 983 case RANGE_BADPROT:
982 984 return (set_errno(ENOTSUP));
983 985 case RANGE_BADADDR:
984 986 default:
985 987 return (set_errno(ENOMEM));
986 988 }
987 989
988 990 error = as_setprot(as, addr, len, uprot);
989 991 if (error)
990 992 return (set_errno(error));
991 993 return (0);
992 994 }
993 995
994 996 #define MC_CACHE 128 /* internal result buffer */
995 997 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
996 998
997 999 int
998 1000 mincore(caddr_t addr, size_t len, char *vecp)
999 1001 {
1000 1002 struct as *as = curproc->p_as;
1001 1003 caddr_t ea; /* end address of loop */
1002 1004 size_t rl; /* inner result length */
1003 1005 char vec[MC_CACHE]; /* local vector cache */
1004 1006 int error;
1005 1007 model_t model;
1006 1008 long llen;
1007 1009
1008 1010 model = get_udatamodel();
1009 1011 /*
1010 1012 * Validate form of address parameters.
1011 1013 */
1012 1014 if (model == DATAMODEL_NATIVE) {
1013 1015 llen = (long)len;
1014 1016 } else {
1015 1017 llen = (int32_t)(size32_t)len;
1016 1018 }
1017 1019 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1018 1020 return (set_errno(EINVAL));
1019 1021
1020 1022 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1021 1023 return (set_errno(ENOMEM));
1022 1024
1023 1025 /*
1024 1026 * Loop over subranges of interval [addr : addr + len), recovering
1025 1027 * results internally and then copying them out to caller. Subrange
1026 1028 * is based on the size of MC_CACHE, defined above.
1027 1029 */
1028 1030 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1029 1031 error = as_incore(as, addr,
1030 1032 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1031 1033 if (rl != 0) {
1032 1034 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1033 1035 if (copyout(vec, vecp, rl) != 0)
1034 1036 return (set_errno(EFAULT));
1035 1037 vecp += rl;
1036 1038 }
1037 1039 if (error != 0)
1038 1040 return (set_errno(ENOMEM));
1039 1041 }
1040 1042 return (0);
1041 1043 }
↓ open down ↓ |
529 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX