Print this page
cstyle sort of updates
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/physmem.c
+++ new/usr/src/uts/common/io/physmem.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/modctl.h>
29 29 #include <sys/conf.h>
30 30 #include <sys/ddi.h>
31 31 #include <sys/sunddi.h>
32 32 #include <sys/devops.h>
33 33 #include <sys/stat.h>
34 34 #include <sys/file.h>
35 35 #include <sys/cred.h>
36 36 #include <sys/policy.h>
37 37 #include <sys/errno.h>
38 38 #include <vm/seg_dev.h>
39 39 #include <vm/seg_vn.h>
40 40 #include <vm/page.h>
41 41 #include <sys/fs/swapnode.h>
42 42 #include <sys/sysmacros.h>
43 43 #include <sys/fcntl.h>
44 44 #include <sys/vmsystm.h>
45 45 #include <sys/physmem.h>
46 46 #include <sys/vfs_opreg.h>
47 47
48 48 static dev_info_t *physmem_dip = NULL;
49 49
50 50 /*
51 51 * Linked list element hanging off physmem_proc_hash below, which holds all
52 52 * the information for a given segment which has been setup for this process.
53 53 * This is a simple linked list as we are assuming that for a given process
54 54 * the setup ioctl will only be called a handful of times. If this assumption
55 55 * changes in the future, a quicker to traverse data structure should be used.
56 56 */
57 57 struct physmem_hash {
58 58 struct physmem_hash *ph_next;
59 59 uint64_t ph_base_pa;
60 60 caddr_t ph_base_va;
61 61 size_t ph_seg_len;
62 62 struct vnode *ph_vnode;
63 63 };
64 64
65 65 /*
66 66 * Hash of all of the processes which have setup mappings with the driver with
67 67 * pointers to per process data.
68 68 */
69 69 struct physmem_proc_hash {
70 70 struct proc *pph_proc;
71 71 struct physmem_hash *pph_hash;
72 72 struct physmem_proc_hash *pph_next;
73 73 };
74 74
75 75
76 76 /* Needs to be a power of two for simple hash algorithm */
77 77 #define PPH_SIZE 8
78 78 struct physmem_proc_hash *pph[PPH_SIZE];
79 79
80 80 /*
81 81 * Lock which protects the pph hash above. To add an element (either a new
82 82 * process or a new segment) the WRITE lock must be held. To traverse the
83 83 * list, only a READ lock is needed.
84 84 */
85 85 krwlock_t pph_rwlock;
86 86
87 87 #define PHYSMEM_HASH(procp) ((int)((((uintptr_t)procp) >> 8) & (PPH_SIZE - 1)))
88 88
89 89 /*
90 90 * Need to keep a reference count of how many processes have the driver
91 91 * open to prevent it from disappearing.
92 92 */
93 93 uint64_t physmem_vnodecnt;
94 94 kmutex_t physmem_mutex; /* protects phsymem_vnodecnt */
95 95
96 96 static int physmem_getpage(struct vnode *vp, offset_t off, size_t len,
97 97 uint_t *protp, page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
98 98 enum seg_rw rw, struct cred *cr, caller_context_t *ct);
99 99
100 100 static int physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
↓ open down ↓ |
100 lines elided |
↑ open up ↑ |
101 101 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
102 102 struct cred *cred, caller_context_t *ct);
103 103
104 104 static int physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
105 105 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
106 106 struct cred *cred, caller_context_t *ct);
107 107
108 108 static void physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct);
109 109
110 110 const fs_operation_def_t physmem_vnodeops_template[] = {
111 - VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage },
112 - VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap },
113 - VOPNAME_DELMAP, { .vop_delmap = physmem_delmap },
114 - VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive },
115 - NULL, NULL
111 + { VOPNAME_GETPAGE, { .vop_getpage = physmem_getpage } },
112 + { VOPNAME_ADDMAP, { .vop_addmap = physmem_addmap } },
113 + { VOPNAME_DELMAP, { .vop_delmap = physmem_delmap } },
114 + { VOPNAME_INACTIVE, { .vop_inactive = physmem_inactive } },
115 + { NULL, { NULL } }
116 116 };
117 117
118 118 vnodeops_t *physmem_vnodeops = NULL;
119 119
120 120 /*
121 121 * Removes the current process from the hash if the process has no more
122 122 * physmem segments active.
123 123 */
124 124 void
125 125 physmem_remove_hash_proc()
126 126 {
127 127 int index;
128 128 struct physmem_proc_hash **walker;
129 129 struct physmem_proc_hash *victim = NULL;
130 130
131 131 index = PHYSMEM_HASH(curproc);
132 132 rw_enter(&pph_rwlock, RW_WRITER);
133 133 walker = &pph[index];
134 134 while (*walker != NULL) {
135 135 if ((*walker)->pph_proc == curproc &&
136 136 (*walker)->pph_hash == NULL) {
137 137 victim = *walker;
138 138 *walker = victim->pph_next;
139 139 break;
140 140 }
141 141 walker = &((*walker)->pph_next);
142 142 }
143 143 rw_exit(&pph_rwlock);
144 144 if (victim != NULL)
145 145 kmem_free(victim, sizeof (struct physmem_proc_hash));
146 146 }
147 147
148 148 /*
149 149 * Add a new entry to the hash for the given process to cache the
150 150 * address ranges that it is working on. If this is the first hash
151 151 * item to be added for this process, we will create the head pointer
152 152 * for this process.
153 153 * Returns 0 on success, ERANGE when the physical address is already in the
154 154 * hash.
155 155 */
156 156 int
157 157 physmem_add_hash(struct physmem_hash *php)
158 158 {
159 159 int index;
160 160 struct physmem_proc_hash *iterator;
161 161 struct physmem_proc_hash *newp = NULL;
162 162 struct physmem_hash *temp;
163 163 int ret = 0;
164 164
165 165 index = PHYSMEM_HASH(curproc);
166 166
167 167 insert:
168 168 rw_enter(&pph_rwlock, RW_WRITER);
169 169 iterator = pph[index];
170 170 while (iterator != NULL) {
171 171 if (iterator->pph_proc == curproc) {
172 172 /*
173 173 * check to make sure a single process does not try to
174 174 * map the same region twice.
175 175 */
176 176 for (temp = iterator->pph_hash; temp != NULL;
177 177 temp = temp->ph_next) {
178 178 if ((php->ph_base_pa >= temp->ph_base_pa &&
179 179 php->ph_base_pa < temp->ph_base_pa +
180 180 temp->ph_seg_len) ||
181 181 (temp->ph_base_pa >= php->ph_base_pa &&
182 182 temp->ph_base_pa < php->ph_base_pa +
183 183 php->ph_seg_len)) {
184 184 ret = ERANGE;
185 185 break;
186 186 }
187 187 }
188 188 if (ret == 0) {
189 189 php->ph_next = iterator->pph_hash;
190 190 iterator->pph_hash = php;
191 191 }
192 192 rw_exit(&pph_rwlock);
193 193 /* Need to check for two threads in sync */
194 194 if (newp != NULL)
195 195 kmem_free(newp, sizeof (*newp));
196 196 return (ret);
197 197 }
198 198 iterator = iterator->pph_next;
199 199 }
200 200
201 201 if (newp != NULL) {
202 202 newp->pph_proc = curproc;
203 203 newp->pph_next = pph[index];
204 204 newp->pph_hash = php;
205 205 php->ph_next = NULL;
206 206 pph[index] = newp;
207 207 rw_exit(&pph_rwlock);
208 208 return (0);
209 209 }
210 210
211 211 rw_exit(&pph_rwlock);
212 212 /* Dropped the lock so we could use KM_SLEEP */
213 213 newp = kmem_zalloc(sizeof (struct physmem_proc_hash), KM_SLEEP);
214 214 goto insert;
215 215 }
216 216
217 217 /*
218 218 * Will return the pointer to the physmem_hash struct if the setup routine
219 219 * has previously been called for this memory.
220 220 * Returns NULL on failure.
221 221 */
222 222 struct physmem_hash *
223 223 physmem_get_hash(uint64_t req_paddr, size_t len, proc_t *procp)
224 224 {
225 225 int index;
226 226 struct physmem_proc_hash *proc_hp;
227 227 struct physmem_hash *php;
228 228
229 229 ASSERT(rw_lock_held(&pph_rwlock));
230 230
231 231 index = PHYSMEM_HASH(procp);
232 232 proc_hp = pph[index];
233 233 while (proc_hp != NULL) {
234 234 if (proc_hp->pph_proc == procp) {
235 235 php = proc_hp->pph_hash;
236 236 while (php != NULL) {
237 237 if ((req_paddr >= php->ph_base_pa) &&
238 238 (req_paddr + len <=
239 239 php->ph_base_pa + php->ph_seg_len)) {
240 240 return (php);
241 241 }
242 242 php = php->ph_next;
243 243 }
244 244 }
245 245 proc_hp = proc_hp->pph_next;
246 246 }
247 247 return (NULL);
248 248 }
249 249
250 250 int
251 251 physmem_validate_cookie(uint64_t p_cookie)
252 252 {
253 253 int index;
254 254 struct physmem_proc_hash *proc_hp;
255 255 struct physmem_hash *php;
256 256
257 257 ASSERT(rw_lock_held(&pph_rwlock));
258 258
259 259 index = PHYSMEM_HASH(curproc);
260 260 proc_hp = pph[index];
261 261 while (proc_hp != NULL) {
262 262 if (proc_hp->pph_proc == curproc) {
263 263 php = proc_hp->pph_hash;
264 264 while (php != NULL) {
265 265 if ((uint64_t)(uintptr_t)php == p_cookie) {
266 266 return (1);
267 267 }
268 268 php = php->ph_next;
269 269 }
270 270 }
271 271 proc_hp = proc_hp->pph_next;
272 272 }
273 273 return (0);
274 274 }
275 275
276 276 /*
277 277 * Remove the given vnode from the pph hash. If it exists in the hash the
278 278 * process still has to be around as the vnode is obviously still around and
279 279 * since it's a physmem vnode, it must be in the hash.
280 280 * If it is not in the hash that must mean that the setup ioctl failed.
281 281 * Return 0 in this instance, 1 if it is in the hash.
282 282 */
283 283 int
284 284 physmem_remove_vnode_hash(vnode_t *vp)
285 285 {
286 286 int index;
287 287 struct physmem_proc_hash *proc_hp;
288 288 struct physmem_hash **phpp;
289 289 struct physmem_hash *victim;
290 290
291 291 index = PHYSMEM_HASH(curproc);
292 292 /* synchronize with the map routine */
293 293 rw_enter(&pph_rwlock, RW_WRITER);
294 294 proc_hp = pph[index];
295 295 while (proc_hp != NULL) {
296 296 if (proc_hp->pph_proc == curproc) {
297 297 phpp = &proc_hp->pph_hash;
298 298 while (*phpp != NULL) {
299 299 if ((*phpp)->ph_vnode == vp) {
300 300 victim = *phpp;
301 301 *phpp = victim->ph_next;
302 302
303 303 rw_exit(&pph_rwlock);
304 304 kmem_free(victim, sizeof (*victim));
305 305 return (1);
306 306 }
307 307 phpp = &(*phpp)->ph_next;
308 308 }
309 309 }
310 310 proc_hp = proc_hp->pph_next;
311 311 }
312 312 rw_exit(&pph_rwlock);
313 313
314 314 /* not found */
315 315 return (0);
316 316 }
317 317
318 318 int
319 319 physmem_setup_vnops()
320 320 {
321 321 int error;
322 322 char *name = "physmem";
323 323 if (physmem_vnodeops != NULL)
324 324 cmn_err(CE_PANIC, "physmem vnodeops already set\n");
325 325 error = vn_make_ops(name, physmem_vnodeops_template, &physmem_vnodeops);
326 326 if (error != 0) {
327 327 cmn_err(CE_WARN, "physmem_setup_vnops: bad vnode ops template");
328 328 }
329 329 return (error);
330 330 }
331 331
332 332 /*
333 333 * The guts of the PHYSMEM_SETUP ioctl.
334 334 * Create a segment in the address space with the specified parameters.
335 335 * If pspp->user_va is NULL, as_gap will be used to find an appropriate VA.
336 336 * We do not do bounds checking on the requested physical addresses, if they
337 337 * do not exist in the system, they will not be mappable.
338 338 * Returns 0 on success with the following error codes on failure:
339 339 * ENOMEM - The VA range requested was already mapped if pspp->user_va is
340 340 * non-NULL or the system was unable to find enough VA space for
341 341 * the desired length if user_va was NULL>
342 342 * EINVAL - The requested PA, VA, or length was not PAGESIZE aligned.
343 343 */
344 344 int
345 345 physmem_setup_addrs(struct physmem_setup_param *pspp)
346 346 {
347 347 struct as *as = curproc->p_as;
348 348 struct segvn_crargs vn_a;
349 349 int ret = 0;
350 350 uint64_t base_pa;
351 351 size_t len;
352 352 caddr_t uvaddr;
353 353 struct vnode *vp;
354 354 struct physmem_hash *php;
355 355
356 356 ASSERT(pspp != NULL);
357 357 base_pa = pspp->req_paddr;
358 358 len = pspp->len;
359 359 uvaddr = (caddr_t)(uintptr_t)pspp->user_va;
360 360
361 361 /* Sanity checking */
362 362 if (!IS_P2ALIGNED(base_pa, PAGESIZE))
363 363 return (EINVAL);
364 364 if (!IS_P2ALIGNED(len, PAGESIZE))
365 365 return (EINVAL);
366 366 if (uvaddr != NULL && !IS_P2ALIGNED(uvaddr, PAGESIZE))
367 367 return (EINVAL);
368 368
369 369 php = kmem_zalloc(sizeof (struct physmem_hash), KM_SLEEP);
370 370
371 371 /* Need to bump vnode count so that the driver can not be unloaded */
372 372 mutex_enter(&physmem_mutex);
373 373 physmem_vnodecnt++;
374 374 mutex_exit(&physmem_mutex);
375 375
376 376 vp = vn_alloc(KM_SLEEP);
377 377 ASSERT(vp != NULL); /* SLEEP can't return NULL */
378 378 vn_setops(vp, physmem_vnodeops);
379 379
380 380 php->ph_vnode = vp;
381 381
382 382 vn_a.vp = vp;
383 383 vn_a.offset = (u_offset_t)base_pa;
384 384 vn_a.type = MAP_SHARED;
385 385 vn_a.prot = PROT_ALL;
386 386 vn_a.maxprot = PROT_ALL;
387 387 vn_a.flags = 0;
388 388 vn_a.cred = NULL;
389 389 vn_a.amp = NULL;
390 390 vn_a.szc = 0;
391 391 vn_a.lgrp_mem_policy_flags = 0;
392 392
393 393 as_rangelock(as);
394 394 if (uvaddr != NULL) {
395 395 if (as_gap(as, len, &uvaddr, &len, AH_LO, NULL) == -1) {
396 396 ret = ENOMEM;
397 397 fail:
398 398 as_rangeunlock(as);
399 399 vn_free(vp);
400 400 kmem_free(php, sizeof (*php));
401 401 mutex_enter(&physmem_mutex);
402 402 physmem_vnodecnt--;
403 403 mutex_exit(&physmem_mutex);
404 404 return (ret);
405 405 }
406 406 } else {
407 407 /* We pick the address for the user */
408 408 map_addr(&uvaddr, len, 0, 1, 0);
409 409 if (uvaddr == NULL) {
410 410 ret = ENOMEM;
411 411 goto fail;
412 412 }
413 413 }
414 414 ret = as_map(as, uvaddr, len, segvn_create, &vn_a);
415 415
416 416 if (ret == 0) {
417 417 as_rangeunlock(as);
418 418 php->ph_base_pa = base_pa;
419 419 php->ph_base_va = uvaddr;
420 420 php->ph_seg_len = len;
421 421 pspp->user_va = (uint64_t)(uintptr_t)uvaddr;
422 422 pspp->cookie = (uint64_t)(uintptr_t)php;
423 423 ret = physmem_add_hash(php);
424 424 if (ret == 0)
425 425 return (0);
426 426
427 427 /* Note that the call to as_unmap will free the vnode */
428 428 (void) as_unmap(as, uvaddr, len);
429 429 kmem_free(php, sizeof (*php));
430 430 return (ret);
431 431 }
432 432
433 433 goto fail;
434 434 /*NOTREACHED*/
435 435 }
436 436
437 437 /*
438 438 * The guts of the PHYSMEM_MAP ioctl.
439 439 * Map the given PA to the appropriate VA if PHYSMEM_SETUP ioctl has already
440 440 * been called for this PA range.
441 441 * Returns 0 on success with the following error codes on failure:
442 442 * EPERM - The requested page is long term locked, and thus repeated
443 443 * requests to allocate this page will likely fail.
444 444 * EAGAIN - The requested page could not be allocated, but it is believed
445 445 * that future attempts could succeed.
446 446 * ENOMEM - There was not enough free memory in the system to safely
447 447 * map the requested page.
448 448 * EINVAL - The requested paddr was not PAGESIZE aligned or the
449 449 * PHYSMEM_SETUP ioctl was not called for this page.
450 450 * ENOENT - The requested page was iniside the kernel cage, and the
451 451 * PHYSMEM_CAGE flag was not set.
452 452 * EBUSY - The requested page is retired and the PHYSMEM_RETIRE flag
453 453 * was not set.
454 454 */
455 455 static int
456 456 physmem_map_addrs(struct physmem_map_param *pmpp)
457 457 {
458 458 caddr_t uvaddr;
459 459 page_t *pp;
460 460 uint64_t req_paddr;
461 461 struct vnode *vp;
462 462 int ret = 0;
463 463 struct physmem_hash *php;
464 464 uint_t flags = 0;
465 465
466 466 ASSERT(pmpp != NULL);
467 467 req_paddr = pmpp->req_paddr;
468 468
469 469 if (!IS_P2ALIGNED(req_paddr, PAGESIZE))
470 470 return (EINVAL);
471 471 /* Find the vnode for this map request */
472 472 rw_enter(&pph_rwlock, RW_READER);
473 473 php = physmem_get_hash(req_paddr, PAGESIZE, curproc);
474 474 if (php == NULL) {
475 475 rw_exit(&pph_rwlock);
476 476 return (EINVAL);
477 477 }
478 478 vp = php->ph_vnode;
479 479 uvaddr = php->ph_base_va + (req_paddr - php->ph_base_pa);
480 480 rw_exit(&pph_rwlock);
481 481
482 482 pp = page_numtopp_nolock(btop((size_t)req_paddr));
483 483 if (pp == NULL) {
484 484 pmpp->ret_va = NULL;
485 485 return (EPERM);
486 486 }
487 487
488 488 /*
489 489 * Check to see if page already mapped correctly. This can happen
490 490 * when we failed to capture a page previously and it was captured
491 491 * asynchronously for us. Return success in this case.
492 492 */
493 493 if (pp->p_vnode == vp) {
494 494 ASSERT(pp->p_offset == (u_offset_t)req_paddr);
495 495 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
496 496 return (0);
497 497 }
498 498
499 499 /*
500 500 * physmem should be responsible for checking for cage
501 501 * and prom pages.
502 502 */
503 503 if (pmpp->flags & PHYSMEM_CAGE)
504 504 flags = CAPTURE_GET_CAGE;
505 505 if (pmpp->flags & PHYSMEM_RETIRED)
506 506 flags |= CAPTURE_GET_RETIRED;
507 507
508 508 ret = page_trycapture(pp, 0, flags | CAPTURE_PHYSMEM, curproc);
509 509
510 510 if (ret != 0) {
511 511 pmpp->ret_va = NULL;
512 512 return (ret);
513 513 } else {
514 514 pmpp->ret_va = (uint64_t)(uintptr_t)uvaddr;
515 515 return (0);
516 516 }
517 517 }
518 518
519 519 /*
520 520 * Map the given page into the process's address space if possible.
521 521 * We actually only hash the page in on the correct vnode as the page
522 522 * will be mapped via segvn_pagefault.
523 523 * returns 0 on success
524 524 * returns 1 if there is no need to map this page anymore (process exited)
525 525 * returns -1 if we failed to map the page.
526 526 */
527 527 int
528 528 map_page_proc(page_t *pp, void *arg, uint_t flags)
529 529 {
530 530 struct vnode *vp;
531 531 proc_t *procp = (proc_t *)arg;
532 532 int ret;
533 533 u_offset_t paddr = (u_offset_t)ptob(pp->p_pagenum);
534 534 struct physmem_hash *php;
535 535
536 536 ASSERT(pp != NULL);
537 537
538 538 /*
539 539 * Check against availrmem to make sure that we're not low on memory.
540 540 * We check again here as ASYNC requests do not do this check elsewhere.
541 541 * We return 1 as we don't want the page to have the PR_CAPTURE bit
542 542 * set or be on the page capture hash.
543 543 */
544 544 if (swapfs_minfree > availrmem + 1) {
545 545 page_free(pp, 1);
546 546 return (1);
547 547 }
548 548
549 549 /*
550 550 * If this is an asynchronous request for the current process,
551 551 * we can not map the page as it's possible that we are also in the
552 552 * process of unmapping the page which could result in a deadlock
553 553 * with the as lock.
554 554 */
555 555 if ((flags & CAPTURE_ASYNC) && (curproc == procp)) {
556 556 page_free(pp, 1);
557 557 return (-1);
558 558 }
559 559
560 560 /* only return zeroed out pages */
561 561 pagezero(pp, 0, PAGESIZE);
562 562
563 563 rw_enter(&pph_rwlock, RW_READER);
564 564 php = physmem_get_hash(paddr, PAGESIZE, procp);
565 565 if (php == NULL) {
566 566 rw_exit(&pph_rwlock);
567 567 /*
568 568 * Free the page as there is no longer a valid outstanding
569 569 * request for this page.
570 570 */
571 571 page_free(pp, 1);
572 572 return (1);
573 573 }
574 574
575 575 vp = php->ph_vnode;
576 576
577 577 /*
578 578 * We need to protect against a possible deadlock here where we own
579 579 * the vnode page hash mutex and want to acquire it again as there
580 580 * are locations in the code, where we unlock a page while holding
581 581 * the mutex which can lead to the page being captured and eventually
582 582 * end up here.
583 583 */
584 584 if (mutex_owned(page_vnode_mutex(vp))) {
585 585 rw_exit(&pph_rwlock);
586 586 page_free(pp, 1);
587 587 return (-1);
588 588 }
589 589
590 590 ret = page_hashin(pp, vp, paddr, NULL);
591 591 rw_exit(&pph_rwlock);
592 592 if (ret == 0) {
593 593 page_free(pp, 1);
594 594 return (-1);
595 595 }
596 596
597 597 page_downgrade(pp);
598 598
599 599 mutex_enter(&freemem_lock);
600 600 availrmem--;
601 601 mutex_exit(&freemem_lock);
602 602
603 603 return (0);
604 604 }
605 605
606 606 /*
607 607 * The guts of the PHYSMEM_DESTROY ioctl.
608 608 * The cookie passed in will provide all of the information needed to
609 609 * free up the address space and physical memory associated with the
610 610 * corresponding PHSYMEM_SETUP ioctl.
611 611 * Returns 0 on success with the following error codes on failure:
612 612 * EINVAL - The cookie supplied is not valid.
613 613 */
614 614 int
615 615 physmem_destroy_addrs(uint64_t p_cookie)
616 616 {
617 617 struct as *as = curproc->p_as;
618 618 size_t len;
619 619 caddr_t uvaddr;
620 620
621 621 rw_enter(&pph_rwlock, RW_READER);
622 622 if (physmem_validate_cookie(p_cookie) == 0) {
623 623 rw_exit(&pph_rwlock);
624 624 return (EINVAL);
625 625 }
626 626
627 627 len = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_seg_len;
628 628 uvaddr = ((struct physmem_hash *)(uintptr_t)p_cookie)->ph_base_va;
629 629 rw_exit(&pph_rwlock);
630 630
631 631 (void) as_unmap(as, uvaddr, len);
632 632
633 633 return (0);
634 634 }
635 635
636 636 /*
637 637 * If the page has been hashed into the physmem vnode, then just look it up
638 638 * and return it via pl, otherwise return ENOMEM as the map ioctl has not
639 639 * succeeded on the given page.
640 640 */
641 641 /*ARGSUSED*/
642 642 static int
643 643 physmem_getpage(struct vnode *vp, offset_t off, size_t len, uint_t *protp,
644 644 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
645 645 struct cred *cr, caller_context_t *ct)
646 646 {
647 647 page_t *pp;
648 648
649 649 ASSERT(len == PAGESIZE);
650 650 ASSERT(AS_READ_HELD(seg->s_as));
651 651
652 652 /*
653 653 * If the page is in the hash, then we successfully claimed this
654 654 * page earlier, so return it to the caller.
655 655 */
656 656 pp = page_lookup(vp, off, SE_SHARED);
657 657 if (pp != NULL) {
658 658 pl[0] = pp;
659 659 pl[1] = NULL;
660 660 *protp = PROT_ALL;
661 661 return (0);
662 662 }
663 663 return (ENOMEM);
664 664 }
665 665
666 666 /*
667 667 * We can not allow a process mapping /dev/physmem pages to fork as there can
668 668 * only be a single mapping to a /dev/physmem page at a given time. Thus, the
669 669 * return of EINVAL when we are not working on our own address space.
670 670 * Otherwise we return zero as this function is required for normal operation.
671 671 */
672 672 /*ARGSUSED*/
673 673 static int
674 674 physmem_addmap(struct vnode *vp, offset_t off, struct as *as,
675 675 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
676 676 struct cred *cred, caller_context_t *ct)
677 677 {
678 678 if (curproc->p_as != as) {
679 679 return (EINVAL);
680 680 }
681 681 return (0);
682 682 }
683 683
684 684 /* Will always get called for removing a whole segment. */
685 685 /*ARGSUSED*/
686 686 static int
687 687 physmem_delmap(struct vnode *vp, offset_t off, struct as *as,
688 688 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
689 689 struct cred *cred, caller_context_t *ct)
690 690 {
691 691 /*
692 692 * Release our hold on the vnode so that the final VN_RELE will
693 693 * call physmem_inactive to clean things up.
694 694 */
695 695 VN_RELE(vp);
696 696
697 697 return (0);
698 698 }
699 699
700 700 /*
701 701 * Clean up all the pages belonging to this vnode and then free it.
702 702 */
703 703 /*ARGSUSED*/
704 704 static void
705 705 physmem_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
706 706 {
707 707 page_t *pp;
708 708
709 709 /*
710 710 * Remove the vnode from the hash now, to prevent asynchronous
711 711 * attempts to map into this vnode. This avoids a deadlock
712 712 * where two threads try to get into this logic at the same
713 713 * time and try to map the pages they are destroying into the
714 714 * other's address space.
715 715 * If it's not in the hash, just free it.
716 716 */
717 717 if (physmem_remove_vnode_hash(vp) == 0) {
718 718 ASSERT(vp->v_pages == NULL);
719 719 vn_free(vp);
720 720 physmem_remove_hash_proc();
721 721 mutex_enter(&physmem_mutex);
722 722 physmem_vnodecnt--;
723 723 mutex_exit(&physmem_mutex);
724 724 return;
725 725 }
726 726
727 727 /*
728 728 * At this point in time, no other logic can be adding or removing
729 729 * pages from the vnode, otherwise the v_pages list could be inaccurate.
730 730 */
731 731
732 732 while ((pp = vp->v_pages) != NULL) {
733 733 page_t *rpp;
734 734 if (page_tryupgrade(pp)) {
735 735 /*
736 736 * set lckcnt for page_destroy to do availrmem
737 737 * accounting
738 738 */
739 739 pp->p_lckcnt = 1;
740 740 page_destroy(pp, 0);
741 741 } else {
742 742 /* failure to lock should be transient */
743 743 rpp = page_lookup(vp, ptob(pp->p_pagenum), SE_SHARED);
744 744 if (rpp != pp) {
745 745 page_unlock(rpp);
746 746 continue;
747 747 }
748 748 page_unlock(pp);
749 749 }
750 750 }
751 751 vn_free(vp);
752 752 physmem_remove_hash_proc();
753 753 mutex_enter(&physmem_mutex);
754 754 physmem_vnodecnt--;
755 755 mutex_exit(&physmem_mutex);
756 756 }
757 757
758 758 /*ARGSUSED*/
759 759 static int
760 760 physmem_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
761 761 int *rvalp)
762 762 {
763 763 int ret;
764 764
765 765 switch (cmd) {
766 766 case PHYSMEM_SETUP:
767 767 {
768 768 struct physmem_setup_param psp;
769 769 if (ddi_copyin((void *)arg, &psp,
770 770 sizeof (struct physmem_setup_param), 0))
771 771 return (EFAULT);
772 772 ret = physmem_setup_addrs(&psp);
773 773 if (ddi_copyout(&psp, (void *)arg, sizeof (psp), 0))
774 774 return (EFAULT);
775 775 }
776 776 break;
777 777 case PHYSMEM_MAP:
778 778 {
779 779 struct physmem_map_param pmp;
780 780 if (ddi_copyin((void *)arg, &pmp,
781 781 sizeof (struct physmem_map_param), 0))
782 782 return (EFAULT);
783 783 ret = physmem_map_addrs(&pmp);
784 784 if (ddi_copyout(&pmp, (void *)arg, sizeof (pmp), 0))
785 785 return (EFAULT);
786 786 }
787 787 break;
788 788 case PHYSMEM_DESTROY:
789 789 {
790 790 uint64_t cookie;
791 791 if (ddi_copyin((void *)arg, &cookie,
792 792 sizeof (uint64_t), 0))
793 793 return (EFAULT);
794 794 ret = physmem_destroy_addrs(cookie);
795 795 }
796 796 break;
797 797 default:
798 798 return (ENOTSUP);
799 799 }
800 800 return (ret);
801 801 }
802 802
803 803 /*ARGSUSED*/
804 804 static int
805 805 physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
806 806 {
807 807 int ret;
808 808 static int msg_printed = 0;
809 809
810 810 if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
811 811 return (EINVAL);
812 812 }
813 813
814 814 /* need to make sure we have the right privileges */
815 815 if ((ret = secpolicy_resource(credp)) != 0)
816 816 return (ret);
817 817 if ((ret = secpolicy_lock_memory(credp)) != 0)
818 818 return (ret);
819 819
820 820 if (msg_printed == 0) {
821 821 cmn_err(CE_NOTE, "!driver has been opened. This driver may "
822 822 "take out long term locks on pages which may impact "
823 823 "dynamic reconfiguration events");
824 824 msg_printed = 1;
825 825 }
826 826
827 827 return (0);
828 828 }
829 829
830 830 /*ARGSUSED*/
831 831 static int
832 832 physmem_close(dev_t dev, int flag, int otyp, cred_t *credp)
833 833 {
834 834 return (0);
835 835 }
836 836
837 837 /*ARGSUSED*/
838 838 static int
839 839 physmem_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd,
840 840 void *arg, void **resultp)
841 841 {
842 842 switch (infocmd) {
843 843 case DDI_INFO_DEVT2DEVINFO:
844 844 *resultp = physmem_dip;
845 845 return (DDI_SUCCESS);
846 846
847 847 case DDI_INFO_DEVT2INSTANCE:
848 848 *resultp = (void *)(ulong_t)getminor((dev_t)arg);
849 849 return (DDI_SUCCESS);
850 850
851 851 default:
852 852 return (DDI_FAILURE);
853 853 }
854 854 }
855 855
856 856 static int
857 857 physmem_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
858 858 {
859 859 int i;
860 860
861 861 if (cmd == DDI_RESUME) {
862 862 return (DDI_SUCCESS);
863 863 }
864 864
865 865 if (cmd != DDI_ATTACH)
866 866 return (DDI_FAILURE);
867 867
868 868 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR,
869 869 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
870 870 return (DDI_FAILURE);
871 871
872 872 physmem_dip = dip;
873 873
874 874 /* Initialize driver specific data */
875 875 if (physmem_setup_vnops()) {
876 876 ddi_remove_minor_node(dip, ddi_get_name(dip));
877 877 return (DDI_FAILURE);
878 878 }
879 879
880 880 for (i = 0; i < PPH_SIZE; i++)
881 881 pph[i] = NULL;
882 882
883 883 page_capture_register_callback(PC_PHYSMEM, 10000,
884 884 map_page_proc);
885 885
886 886 return (DDI_SUCCESS);
887 887 }
888 888
889 889 static int
890 890 physmem_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
891 891 {
892 892 int ret = DDI_SUCCESS;
893 893
894 894 if (cmd == DDI_SUSPEND) {
895 895 return (DDI_SUCCESS);
896 896 }
897 897
898 898 if (cmd != DDI_DETACH)
899 899 return (DDI_FAILURE);
900 900
901 901 ASSERT(physmem_dip == dip);
902 902
903 903 mutex_enter(&physmem_mutex);
904 904 if (physmem_vnodecnt == 0) {
905 905 if (physmem_vnodeops != NULL) {
906 906 vn_freevnodeops(physmem_vnodeops);
907 907 physmem_vnodeops = NULL;
908 908 page_capture_unregister_callback(PC_PHYSMEM);
909 909 }
910 910 } else {
911 911 ret = EBUSY;
912 912 }
913 913 mutex_exit(&physmem_mutex);
914 914 if (ret == DDI_SUCCESS)
915 915 ddi_remove_minor_node(dip, ddi_get_name(dip));
916 916 return (ret);
917 917 }
918 918
919 919 static struct cb_ops physmem_cb_ops = {
920 920 physmem_open, /* open */
921 921 physmem_close, /* close */
922 922 nodev, /* strategy */
923 923 nodev, /* print */
924 924 nodev, /* dump */
925 925 nodev, /* read */
926 926 nodev, /* write */
927 927 physmem_ioctl, /* ioctl */
928 928 nodev, /* devmap */
929 929 nodev, /* mmap */
930 930 nodev, /* segmap */
931 931 nochpoll, /* chpoll */
932 932 ddi_prop_op, /* prop_op */
933 933 NULL, /* cb_str */
934 934 D_NEW | D_MP | D_DEVMAP,
935 935 CB_REV,
936 936 NULL,
937 937 NULL
938 938 };
939 939
940 940 static struct dev_ops physmem_ops = {
941 941 DEVO_REV,
942 942 0,
943 943 physmem_getinfo,
944 944 nulldev,
945 945 nulldev,
946 946 physmem_attach,
947 947 physmem_detach,
948 948 nodev,
949 949 &physmem_cb_ops,
950 950 NULL,
951 951 NULL,
952 952 ddi_quiesce_not_needed, /* quiesce */
↓ open down ↓ |
827 lines elided |
↑ open up ↑ |
953 953 };
954 954
955 955 static struct modldrv modldrv = {
956 956 &mod_driverops,
957 957 "physmem driver",
958 958 &physmem_ops
959 959 };
960 960
961 961 static struct modlinkage modlinkage = {
962 962 MODREV_1,
963 - &modldrv,
964 - NULL
963 + { &modldrv, NULL }
965 964 };
966 965
967 966 int
968 967 _init(void)
969 968 {
970 969 return (mod_install(&modlinkage));
971 970 }
972 971
973 972 int
974 973 _info(struct modinfo *modinfop)
975 974 {
976 975 return (mod_info(&modlinkage, modinfop));
977 976 }
978 977
979 978 int
980 979 _fini(void)
981 980 {
982 981 return (mod_remove(&modlinkage));
983 982 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX