Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/mem.c
+++ new/usr/src/uts/common/io/mem.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2015, Joyent, Inc. All rights reserved.
28 28 */
29 29
30 30 /*
31 31 * Memory special file
32 32 */
33 33
34 34 #include <sys/types.h>
35 35 #include <sys/param.h>
36 36 #include <sys/user.h>
37 37 #include <sys/buf.h>
38 38 #include <sys/systm.h>
39 39 #include <sys/cred.h>
40 40 #include <sys/vm.h>
41 41 #include <sys/uio.h>
42 42 #include <sys/mman.h>
43 43 #include <sys/kmem.h>
44 44 #include <vm/seg.h>
45 45 #include <vm/page.h>
46 46 #include <sys/stat.h>
47 47 #include <sys/vmem.h>
48 48 #include <sys/memlist.h>
49 49 #include <sys/bootconf.h>
50 50
51 51 #include <vm/seg_vn.h>
52 52 #include <vm/seg_dev.h>
53 53 #include <vm/seg_kmem.h>
54 54 #include <vm/seg_kp.h>
55 55 #include <vm/seg_kpm.h>
56 56 #include <vm/hat.h>
57 57
58 58 #include <sys/conf.h>
59 59 #include <sys/mem.h>
60 60 #include <sys/types.h>
61 61 #include <sys/conf.h>
62 62 #include <sys/param.h>
63 63 #include <sys/systm.h>
64 64 #include <sys/errno.h>
65 65 #include <sys/modctl.h>
66 66 #include <sys/memlist.h>
67 67 #include <sys/ddi.h>
68 68 #include <sys/sunddi.h>
69 69 #include <sys/debug.h>
70 70 #include <sys/fm/protocol.h>
71 71
72 72 #if defined(__sparc)
73 73 extern int cpu_get_mem_name(uint64_t, uint64_t *, uint64_t, char *, int, int *);
74 74 extern int cpu_get_mem_info(uint64_t, uint64_t, uint64_t *, uint64_t *,
75 75 uint64_t *, int *, int *, int *);
76 76 extern size_t cpu_get_name_bufsize(void);
77 77 extern int cpu_get_mem_sid(char *, char *, int, int *);
78 78 extern int cpu_get_mem_addr(char *, char *, uint64_t, uint64_t *);
79 79 #elif defined(__x86)
80 80 #include <sys/cpu_module.h>
81 81 #endif /* __sparc */
82 82
83 83 /*
84 84 * Turn a byte length into a pagecount. The DDI btop takes a
85 85 * 32-bit size on 32-bit machines, this handles 64-bit sizes for
86 86 * large physical-memory 32-bit machines.
87 87 */
88 88 #define BTOP(x) ((pgcnt_t)((x) >> _pageshift))
89 89
90 90 static kmutex_t mm_lock;
91 91 static caddr_t mm_map;
92 92
93 93 static dev_info_t *mm_dip; /* private copy of devinfo pointer */
94 94
95 95 static int mm_kmem_io_access;
96 96
97 97 static int mm_kstat_update(kstat_t *ksp, int rw);
98 98 static int mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw);
99 99
100 100 static int mm_read_mem_name(intptr_t data, mem_name_t *mem_name);
101 101
102 102 #define MM_KMEMLOG_NENTRIES 64
103 103
104 104 static int mm_kmemlogent;
105 105 static mm_logentry_t mm_kmemlog[MM_KMEMLOG_NENTRIES];
106 106
107 107 /*
108 108 * On kmem/allmem writes, we log information that might be useful in the event
109 109 * that a write is errant (that is, due to operator error) and induces a later
110 110 * problem. Note that (in particular) in the event of such operator-induced
111 111 * corruption, a search over the kernel address space for the corrupted
112 112 * address will yield the ring buffer entry that recorded the write. And
113 113 * should it seem baroque or otherwise unnecessary, yes, we need this kind of
114 114 * auditing facility and yes, we learned that the hard way: disturbingly,
115 115 * there exist recommendations for "tuning" the system that involve writing to
116 116 * kernel memory addresses via the kernel debugger, and -- as we discovered --
117 117 * these can easily be applied incorrectly or unsafely, yielding an entirely
118 118 * undebuggable "can't happen" kind of panic.
119 119 */
120 120 static void
121 121 mm_logkmem(struct uio *uio)
122 122 {
123 123 mm_logentry_t *ent;
124 124 proc_t *p = curthread->t_procp;
125 125
126 126 mutex_enter(&mm_lock);
127 127
128 128 ent = &mm_kmemlog[mm_kmemlogent++];
129 129
130 130 if (mm_kmemlogent == MM_KMEMLOG_NENTRIES)
131 131 mm_kmemlogent = 0;
132 132
133 133 ent->mle_vaddr = (uintptr_t)uio->uio_loffset;
134 134 ent->mle_len = uio->uio_resid;
135 135 gethrestime(&ent->mle_hrestime);
136 136 ent->mle_hrtime = gethrtime();
137 137 ent->mle_pid = p->p_pidp->pid_id;
138 138
139 139 (void) strncpy(ent->mle_psargs,
140 140 p->p_user.u_psargs, sizeof (ent->mle_psargs));
141 141
142 142 mutex_exit(&mm_lock);
143 143 }
144 144
145 145 /*ARGSUSED1*/
146 146 static int
147 147 mm_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
148 148 {
149 149 int i;
150 150 struct mem_minor {
151 151 char *name;
152 152 minor_t minor;
153 153 int privonly;
154 154 const char *rdpriv;
155 155 const char *wrpriv;
156 156 mode_t priv_mode;
157 157 } mm[] = {
158 158 { "mem", M_MEM, 0, NULL, "all", 0640 },
159 159 { "kmem", M_KMEM, 0, NULL, "all", 0640 },
160 160 { "allkmem", M_ALLKMEM, 0, "all", "all", 0600 },
161 161 { "null", M_NULL, PRIVONLY_DEV, NULL, NULL, 0666 },
162 162 { "zero", M_ZERO, PRIVONLY_DEV, NULL, NULL, 0666 },
163 163 };
164 164 kstat_t *ksp;
165 165
166 166 mutex_init(&mm_lock, NULL, MUTEX_DEFAULT, NULL);
167 167 mm_map = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
168 168
169 169 for (i = 0; i < (sizeof (mm) / sizeof (mm[0])); i++) {
170 170 if (ddi_create_priv_minor_node(devi, mm[i].name, S_IFCHR,
171 171 mm[i].minor, DDI_PSEUDO, mm[i].privonly,
172 172 mm[i].rdpriv, mm[i].wrpriv, mm[i].priv_mode) ==
173 173 DDI_FAILURE) {
174 174 ddi_remove_minor_node(devi, NULL);
175 175 return (DDI_FAILURE);
176 176 }
177 177 }
178 178
179 179 mm_dip = devi;
180 180
181 181 ksp = kstat_create("mm", 0, "phys_installed", "misc",
182 182 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE | KSTAT_FLAG_VIRTUAL);
183 183 if (ksp != NULL) {
184 184 ksp->ks_update = mm_kstat_update;
185 185 ksp->ks_snapshot = mm_kstat_snapshot;
186 186 ksp->ks_lock = &mm_lock; /* XXX - not really needed */
187 187 kstat_install(ksp);
188 188 }
189 189
190 190 mm_kmem_io_access = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
191 191 "kmem_io_access", 0);
192 192
193 193 return (DDI_SUCCESS);
194 194 }
195 195
196 196 /*ARGSUSED*/
197 197 static int
198 198 mm_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
199 199 {
200 200 register int error;
201 201
202 202 switch (infocmd) {
203 203 case DDI_INFO_DEVT2DEVINFO:
204 204 *result = (void *)mm_dip;
205 205 error = DDI_SUCCESS;
206 206 break;
207 207 case DDI_INFO_DEVT2INSTANCE:
208 208 *result = (void *)0;
209 209 error = DDI_SUCCESS;
210 210 break;
211 211 default:
212 212 error = DDI_FAILURE;
213 213 }
214 214 return (error);
215 215 }
216 216
217 217 /*ARGSUSED1*/
218 218 static int
219 219 mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
220 220 {
221 221 switch (getminor(*devp)) {
222 222 case M_NULL:
223 223 case M_ZERO:
224 224 case M_MEM:
225 225 case M_KMEM:
226 226 case M_ALLKMEM:
227 227 /* standard devices */
228 228 break;
229 229
230 230 default:
231 231 /* Unsupported or unknown type */
232 232 return (EINVAL);
233 233 }
234 234 /* must be character device */
235 235 if (typ != OTYP_CHR)
236 236 return (EINVAL);
237 237 return (0);
238 238 }
239 239
240 240 struct pollhead mm_pollhd;
241 241
242 242 /*ARGSUSED*/
243 243 static int
244 244 mmchpoll(dev_t dev, short events, int anyyet, short *reventsp,
245 245 struct pollhead **phpp)
246 246 {
247 247 switch (getminor(dev)) {
248 248 case M_NULL:
249 249 case M_ZERO:
250 250 case M_MEM:
251 251 case M_KMEM:
252 252 case M_ALLKMEM:
253 253 *reventsp = events & (POLLIN | POLLOUT | POLLPRI | POLLRDNORM |
254 254 POLLWRNORM | POLLRDBAND | POLLWRBAND);
255 255 /*
256 256 * A non NULL pollhead pointer should be returned in case
257 257 * user polls for 0 events.
258 258 */
259 259 *phpp = !anyyet && !*reventsp ?
260 260 &mm_pollhd : (struct pollhead *)NULL;
261 261 return (0);
262 262 default:
263 263 /* no other devices currently support polling */
264 264 return (ENXIO);
265 265 }
266 266 }
267 267
268 268 static int
269 269 mmpropop(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
270 270 char *name, caddr_t valuep, int *lengthp)
271 271 {
272 272 /*
273 273 * implement zero size to reduce overhead (avoid two failing
274 274 * property lookups per stat).
275 275 */
276 276 return (ddi_prop_op_size(dev, dip, prop_op,
277 277 flags, name, valuep, lengthp, 0));
278 278 }
279 279
280 280 static int
281 281 mmio(struct uio *uio, enum uio_rw rw, pfn_t pfn, off_t pageoff, int allowio,
282 282 page_t *pp)
283 283 {
284 284 int error = 0;
285 285 int devload = 0;
286 286 int is_memory = pf_is_memory(pfn);
287 287 size_t nbytes = MIN((size_t)(PAGESIZE - pageoff),
288 288 (size_t)uio->uio_iov->iov_len);
289 289 caddr_t va = NULL;
290 290
291 291 mutex_enter(&mm_lock);
292 292
293 293 if (is_memory && kpm_enable) {
294 294 if (pp)
295 295 va = hat_kpm_mapin(pp, NULL);
296 296 else
297 297 va = hat_kpm_mapin_pfn(pfn);
298 298 }
299 299
300 300 if (va == NULL) {
301 301 hat_devload(kas.a_hat, mm_map, PAGESIZE, pfn,
302 302 (uint_t)(rw == UIO_READ ? PROT_READ : PROT_READ|PROT_WRITE),
303 303 HAT_LOAD_NOCONSIST|HAT_LOAD_LOCK);
304 304 va = mm_map;
305 305 devload = 1;
306 306 }
307 307
308 308 if (!is_memory) {
309 309 if (allowio) {
310 310 size_t c = uio->uio_iov->iov_len;
311 311
312 312 if (ddi_peekpokeio(NULL, uio, rw,
313 313 (caddr_t)(uintptr_t)uio->uio_loffset, c,
314 314 sizeof (int32_t)) != DDI_SUCCESS)
315 315 error = EFAULT;
316 316 } else
317 317 error = EIO;
318 318 } else
319 319 error = uiomove(va + pageoff, nbytes, rw, uio);
320 320
321 321 if (devload)
322 322 hat_unload(kas.a_hat, mm_map, PAGESIZE, HAT_UNLOAD_UNLOCK);
323 323 else if (pp)
324 324 hat_kpm_mapout(pp, NULL, va);
325 325 else
326 326 hat_kpm_mapout_pfn(pfn);
327 327
328 328 mutex_exit(&mm_lock);
329 329 return (error);
330 330 }
331 331
332 332 static int
333 333 mmpagelock(struct as *as, caddr_t va)
334 334 {
335 335 struct seg *seg;
336 336 int i;
337 337
338 338 AS_LOCK_ENTER(as, RW_READER);
339 339 seg = as_segat(as, va);
340 340 i = (seg != NULL)? SEGOP_CAPABLE(seg, S_CAPABILITY_NOMINFLT) : 0;
341 341 AS_LOCK_EXIT(as);
342 342
343 343 return (i);
344 344 }
345 345
346 346 #ifdef __sparc
347 347
348 348 #define NEED_LOCK_KVADDR(kva) mmpagelock(&kas, kva)
349 349
350 350 #else /* __i386, __amd64 */
351 351
352 352 #define NEED_LOCK_KVADDR(va) 0
353 353
354 354 #endif /* __sparc */
355 355
356 356 /*ARGSUSED3*/
357 357 static int
358 358 mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred)
359 359 {
360 360 pfn_t v;
361 361 struct iovec *iov;
362 362 int error = 0;
363 363 size_t c;
364 364 ssize_t oresid = uio->uio_resid;
365 365 minor_t minor = getminor(dev);
366 366
367 367 while (uio->uio_resid > 0 && error == 0) {
368 368 iov = uio->uio_iov;
369 369 if (iov->iov_len == 0) {
370 370 uio->uio_iov++;
371 371 uio->uio_iovcnt--;
372 372 if (uio->uio_iovcnt < 0)
373 373 panic("mmrw");
374 374 continue;
375 375 }
376 376 switch (minor) {
377 377
378 378 case M_MEM:
379 379 memlist_read_lock();
380 380 if (!address_in_memlist(phys_install,
381 381 (uint64_t)uio->uio_loffset, 1)) {
382 382 memlist_read_unlock();
383 383 error = EFAULT;
384 384 break;
385 385 }
386 386 memlist_read_unlock();
387 387
388 388 v = BTOP((u_offset_t)uio->uio_loffset);
389 389 error = mmio(uio, rw, v,
390 390 uio->uio_loffset & PAGEOFFSET, 0, NULL);
391 391 break;
392 392
393 393 case M_KMEM:
394 394 case M_ALLKMEM:
395 395 {
396 396 page_t **ppp = NULL;
397 397 caddr_t vaddr = (caddr_t)uio->uio_offset;
398 398 int try_lock = NEED_LOCK_KVADDR(vaddr);
399 399 int locked = 0;
400 400
401 401 if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP)
402 402 break;
403 403
404 404 if (rw == UIO_WRITE)
405 405 mm_logkmem(uio);
406 406
407 407 /*
408 408 * If vaddr does not map a valid page, as_pagelock()
409 409 * will return failure. Hence we can't check the
410 410 * return value and return EFAULT here as we'd like.
411 411 * seg_kp and seg_kpm do not properly support
412 412 * as_pagelock() for this context so we avoid it
413 413 * using the try_lock set check above. Some day when
414 414 * the kernel page locking gets redesigned all this
415 415 * muck can be cleaned up.
416 416 */
417 417 if (try_lock)
418 418 locked = (as_pagelock(&kas, &ppp, vaddr,
419 419 PAGESIZE, S_WRITE) == 0);
420 420
421 421 v = hat_getpfnum(kas.a_hat,
422 422 (caddr_t)(uintptr_t)uio->uio_loffset);
423 423 if (v == PFN_INVALID) {
424 424 if (locked)
425 425 as_pageunlock(&kas, ppp, vaddr,
426 426 PAGESIZE, S_WRITE);
427 427 error = EFAULT;
428 428 break;
429 429 }
430 430
431 431 error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET,
432 432 minor == M_ALLKMEM || mm_kmem_io_access,
433 433 (locked && ppp) ? *ppp : NULL);
434 434 if (locked)
435 435 as_pageunlock(&kas, ppp, vaddr, PAGESIZE,
436 436 S_WRITE);
437 437 }
438 438
439 439 break;
440 440
441 441 case M_ZERO:
442 442 if (rw == UIO_READ) {
443 443 label_t ljb;
444 444
445 445 if (on_fault(&ljb)) {
446 446 no_fault();
447 447 error = EFAULT;
448 448 break;
449 449 }
450 450 uzero(iov->iov_base, iov->iov_len);
451 451 no_fault();
452 452 uio->uio_resid -= iov->iov_len;
453 453 uio->uio_loffset += iov->iov_len;
454 454 break;
455 455 }
456 456 /* else it's a write, fall through to NULL case */
457 457 /*FALLTHROUGH*/
458 458
459 459 case M_NULL:
460 460 if (rw == UIO_READ)
461 461 return (0);
462 462 c = iov->iov_len;
463 463 iov->iov_base += c;
464 464 iov->iov_len -= c;
465 465 uio->uio_loffset += c;
466 466 uio->uio_resid -= c;
467 467 break;
468 468
469 469 }
470 470 }
471 471 return (uio->uio_resid == oresid ? error : 0);
472 472 }
473 473
474 474 static int
475 475 mmread(dev_t dev, struct uio *uio, cred_t *cred)
476 476 {
477 477 return (mmrw(dev, uio, UIO_READ, cred));
478 478 }
479 479
480 480 static int
481 481 mmwrite(dev_t dev, struct uio *uio, cred_t *cred)
482 482 {
483 483 return (mmrw(dev, uio, UIO_WRITE, cred));
484 484 }
485 485
486 486 /*
487 487 * Private ioctl for libkvm to support kvm_physaddr().
488 488 * Given an address space and a VA, compute the PA.
489 489 */
490 490 static int
491 491 mmioctl_vtop(intptr_t data)
492 492 {
493 493 #ifdef _SYSCALL32
494 494 mem_vtop32_t vtop32;
495 495 #endif
496 496 mem_vtop_t mem_vtop;
497 497 proc_t *p;
498 498 pfn_t pfn = (pfn_t)PFN_INVALID;
499 499 pid_t pid = 0;
500 500 struct as *as;
501 501 struct seg *seg;
502 502
503 503 if (get_udatamodel() == DATAMODEL_NATIVE) {
504 504 if (copyin((void *)data, &mem_vtop, sizeof (mem_vtop_t)))
505 505 return (EFAULT);
506 506 }
507 507 #ifdef _SYSCALL32
508 508 else {
509 509 if (copyin((void *)data, &vtop32, sizeof (mem_vtop32_t)))
510 510 return (EFAULT);
511 511 mem_vtop.m_as = (struct as *)(uintptr_t)vtop32.m_as;
512 512 mem_vtop.m_va = (void *)(uintptr_t)vtop32.m_va;
513 513
514 514 if (mem_vtop.m_as != NULL)
515 515 return (EINVAL);
516 516 }
517 517 #endif
518 518
519 519 if (mem_vtop.m_as == &kas) {
520 520 pfn = hat_getpfnum(kas.a_hat, mem_vtop.m_va);
521 521 } else {
522 522 if (mem_vtop.m_as == NULL) {
523 523 /*
524 524 * Assume the calling process's address space if the
525 525 * caller didn't specify one.
526 526 */
527 527 p = curthread->t_procp;
528 528 if (p == NULL)
529 529 return (EIO);
530 530 mem_vtop.m_as = p->p_as;
531 531 }
532 532
533 533 mutex_enter(&pidlock);
534 534 for (p = practive; p != NULL; p = p->p_next) {
535 535 if (p->p_as == mem_vtop.m_as) {
536 536 pid = p->p_pid;
537 537 break;
538 538 }
539 539 }
540 540 mutex_exit(&pidlock);
541 541 if (p == NULL)
542 542 return (EIO);
543 543 p = sprlock(pid);
544 544 if (p == NULL)
545 545 return (EIO);
546 546 as = p->p_as;
547 547 if (as == mem_vtop.m_as) {
548 548 mutex_exit(&p->p_lock);
549 549 AS_LOCK_ENTER(as, RW_READER);
550 550 for (seg = AS_SEGFIRST(as); seg != NULL;
551 551 seg = AS_SEGNEXT(as, seg))
552 552 if ((uintptr_t)mem_vtop.m_va -
553 553 (uintptr_t)seg->s_base < seg->s_size)
554 554 break;
555 555 if (seg != NULL)
556 556 pfn = hat_getpfnum(as->a_hat, mem_vtop.m_va);
557 557 AS_LOCK_EXIT(as);
558 558 mutex_enter(&p->p_lock);
559 559 }
560 560 sprunlock(p);
561 561 }
562 562 mem_vtop.m_pfn = pfn;
563 563 if (pfn == PFN_INVALID)
564 564 return (EIO);
565 565
566 566 if (get_udatamodel() == DATAMODEL_NATIVE) {
567 567 if (copyout(&mem_vtop, (void *)data, sizeof (mem_vtop_t)))
568 568 return (EFAULT);
569 569 }
570 570 #ifdef _SYSCALL32
571 571 else {
572 572 vtop32.m_pfn = mem_vtop.m_pfn;
573 573 if (copyout(&vtop32, (void *)data, sizeof (mem_vtop32_t)))
574 574 return (EFAULT);
575 575 }
576 576 #endif
577 577
578 578 return (0);
579 579 }
580 580
581 581 /*
582 582 * Given a PA, execute the given page retire command on it.
583 583 */
584 584 static int
585 585 mmioctl_page_retire(int cmd, intptr_t data)
586 586 {
587 587 extern int page_retire_test(void);
588 588 uint64_t pa;
589 589
590 590 if (copyin((void *)data, &pa, sizeof (uint64_t))) {
591 591 return (EFAULT);
592 592 }
593 593
594 594 switch (cmd) {
595 595 case MEM_PAGE_ISRETIRED:
596 596 return (page_retire_check(pa, NULL));
597 597
598 598 case MEM_PAGE_UNRETIRE:
599 599 return (page_unretire(pa));
600 600
601 601 case MEM_PAGE_RETIRE:
602 602 return (page_retire(pa, PR_FMA));
603 603
604 604 case MEM_PAGE_RETIRE_MCE:
605 605 return (page_retire(pa, PR_MCE));
606 606
607 607 case MEM_PAGE_RETIRE_UE:
608 608 return (page_retire(pa, PR_UE));
609 609
610 610 case MEM_PAGE_GETERRORS:
611 611 {
612 612 uint64_t page_errors;
613 613 int rc = page_retire_check(pa, &page_errors);
614 614 if (copyout(&page_errors, (void *)data,
615 615 sizeof (uint64_t))) {
616 616 return (EFAULT);
617 617 }
618 618 return (rc);
619 619 }
620 620
621 621 case MEM_PAGE_RETIRE_TEST:
622 622 return (page_retire_test());
623 623
624 624 }
625 625
626 626 return (EINVAL);
627 627 }
628 628
629 629 #ifdef __sparc
630 630 /*
631 631 * Given a syndrome, syndrome type, and address return the
632 632 * associated memory name in the provided data buffer.
633 633 */
634 634 static int
635 635 mmioctl_get_mem_name(intptr_t data)
636 636 {
637 637 mem_name_t mem_name;
638 638 void *buf;
639 639 size_t bufsize;
640 640 int len, err;
641 641
642 642 if ((bufsize = cpu_get_name_bufsize()) == 0)
643 643 return (ENOTSUP);
644 644
645 645 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
646 646 return (err);
647 647
648 648 buf = kmem_alloc(bufsize, KM_SLEEP);
649 649
650 650 /*
651 651 * Call into cpu specific code to do the lookup.
652 652 */
653 653 if ((err = cpu_get_mem_name(mem_name.m_synd, mem_name.m_type,
654 654 mem_name.m_addr, buf, bufsize, &len)) != 0) {
655 655 kmem_free(buf, bufsize);
656 656 return (err);
657 657 }
658 658
659 659 if (len >= mem_name.m_namelen) {
660 660 kmem_free(buf, bufsize);
661 661 return (ENOSPC);
662 662 }
663 663
664 664 if (copyoutstr(buf, (char *)mem_name.m_name,
665 665 mem_name.m_namelen, NULL) != 0) {
666 666 kmem_free(buf, bufsize);
667 667 return (EFAULT);
668 668 }
669 669
670 670 kmem_free(buf, bufsize);
671 671 return (0);
672 672 }
673 673
674 674 /*
675 675 * Given a syndrome and address return information about the associated memory.
676 676 */
677 677 static int
678 678 mmioctl_get_mem_info(intptr_t data)
679 679 {
680 680 mem_info_t mem_info;
681 681 int err;
682 682
683 683 if (copyin((void *)data, &mem_info, sizeof (mem_info_t)))
684 684 return (EFAULT);
685 685
686 686 if ((err = cpu_get_mem_info(mem_info.m_synd, mem_info.m_addr,
687 687 &mem_info.m_mem_size, &mem_info.m_seg_size, &mem_info.m_bank_size,
688 688 &mem_info.m_segments, &mem_info.m_banks, &mem_info.m_mcid)) != 0)
689 689 return (err);
690 690
691 691 if (copyout(&mem_info, (void *)data, sizeof (mem_info_t)) != 0)
692 692 return (EFAULT);
693 693
694 694 return (0);
695 695 }
696 696
697 697 /*
698 698 * Given a memory name, return its associated serial id
699 699 */
700 700 static int
701 701 mmioctl_get_mem_sid(intptr_t data)
702 702 {
703 703 mem_name_t mem_name;
704 704 void *buf;
705 705 void *name;
706 706 size_t name_len;
707 707 size_t bufsize;
708 708 int len, err;
709 709
710 710 if ((bufsize = cpu_get_name_bufsize()) == 0)
711 711 return (ENOTSUP);
712 712
713 713 if ((err = mm_read_mem_name(data, &mem_name)) < 0)
714 714 return (err);
715 715
716 716 buf = kmem_alloc(bufsize, KM_SLEEP);
717 717
718 718 if (mem_name.m_namelen > 1024)
719 719 mem_name.m_namelen = 1024; /* cap at 1024 bytes */
720 720
721 721 name = kmem_alloc(mem_name.m_namelen, KM_SLEEP);
722 722
723 723 if ((err = copyinstr((char *)mem_name.m_name, (char *)name,
724 724 mem_name.m_namelen, &name_len)) != 0) {
725 725 kmem_free(buf, bufsize);
726 726 kmem_free(name, mem_name.m_namelen);
727 727 return (err);
728 728 }
729 729
730 730 /*
731 731 * Call into cpu specific code to do the lookup.
732 732 */
733 733 if ((err = cpu_get_mem_sid(name, buf, bufsize, &len)) != 0) {
734 734 kmem_free(buf, bufsize);
735 735 kmem_free(name, mem_name.m_namelen);
736 736 return (err);
737 737 }
738 738
739 739 if (len > mem_name.m_sidlen) {
740 740 kmem_free(buf, bufsize);
741 741 kmem_free(name, mem_name.m_namelen);
742 742 return (ENAMETOOLONG);
743 743 }
744 744
745 745 if (copyoutstr(buf, (char *)mem_name.m_sid,
746 746 mem_name.m_sidlen, NULL) != 0) {
747 747 kmem_free(buf, bufsize);
748 748 kmem_free(name, mem_name.m_namelen);
749 749 return (EFAULT);
750 750 }
751 751
752 752 kmem_free(buf, bufsize);
753 753 kmem_free(name, mem_name.m_namelen);
754 754 return (0);
755 755 }
756 756 #endif /* __sparc */
757 757
758 758 /*
759 759 * Private ioctls for
760 760 * libkvm to support kvm_physaddr().
761 761 * FMA support for page_retire() and memory attribute information.
762 762 */
763 763 /*ARGSUSED*/
764 764 static int
765 765 mmioctl(dev_t dev, int cmd, intptr_t data, int flag, cred_t *cred, int *rvalp)
766 766 {
767 767 if ((cmd == MEM_VTOP && getminor(dev) != M_KMEM) ||
768 768 (cmd != MEM_VTOP && getminor(dev) != M_MEM))
769 769 return (ENXIO);
770 770
771 771 switch (cmd) {
772 772 case MEM_VTOP:
773 773 return (mmioctl_vtop(data));
774 774
775 775 case MEM_PAGE_RETIRE:
776 776 case MEM_PAGE_ISRETIRED:
777 777 case MEM_PAGE_UNRETIRE:
778 778 case MEM_PAGE_RETIRE_MCE:
779 779 case MEM_PAGE_RETIRE_UE:
780 780 case MEM_PAGE_GETERRORS:
781 781 case MEM_PAGE_RETIRE_TEST:
782 782 return (mmioctl_page_retire(cmd, data));
783 783
784 784 #ifdef __sparc
785 785 case MEM_NAME:
786 786 return (mmioctl_get_mem_name(data));
787 787
788 788 case MEM_INFO:
789 789 return (mmioctl_get_mem_info(data));
790 790
791 791 case MEM_SID:
792 792 return (mmioctl_get_mem_sid(data));
793 793 #else
794 794 case MEM_NAME:
795 795 case MEM_INFO:
796 796 case MEM_SID:
797 797 return (ENOTSUP);
798 798 #endif /* __sparc */
799 799 }
800 800 return (ENXIO);
801 801 }
802 802
803 803 /*ARGSUSED2*/
804 804 static int
805 805 mmmmap(dev_t dev, off_t off, int prot)
806 806 {
807 807 pfn_t pf;
808 808 struct memlist *pmem;
809 809 minor_t minor = getminor(dev);
810 810
811 811 switch (minor) {
812 812 case M_MEM:
813 813 pf = btop(off);
814 814 memlist_read_lock();
815 815 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
816 816 if (pf >= BTOP(pmem->ml_address) &&
817 817 pf < BTOP(pmem->ml_address + pmem->ml_size)) {
818 818 memlist_read_unlock();
819 819 return (impl_obmem_pfnum(pf));
820 820 }
821 821 }
822 822 memlist_read_unlock();
823 823 break;
824 824
825 825 case M_KMEM:
826 826 case M_ALLKMEM:
827 827 /* no longer supported with KPR */
828 828 return (-1);
829 829
830 830 case M_ZERO:
831 831 /*
832 832 * We shouldn't be mmap'ing to /dev/zero here as
833 833 * mmsegmap() should have already converted
834 834 * a mapping request for this device to a mapping
835 835 * using seg_vn for anonymous memory.
836 836 */
837 837 break;
838 838
839 839 }
840 840 return (-1);
841 841 }
842 842
843 843 /*
844 844 * This function is called when a memory device is mmap'ed.
845 845 * Set up the mapping to the correct device driver.
846 846 */
847 847 static int
848 848 mmsegmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
849 849 uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
850 850 {
851 851 struct segvn_crargs vn_a;
852 852 struct segdev_crargs dev_a;
853 853 int error;
854 854 minor_t minor;
855 855 off_t i;
856 856
857 857 minor = getminor(dev);
858 858
859 859 as_rangelock(as);
860 860 /*
861 861 * No need to worry about vac alignment on /dev/zero
862 862 * since this is a "clone" object that doesn't yet exist.
863 863 */
864 864 error = choose_addr(as, addrp, len, off,
865 865 (minor == M_MEM) || (minor == M_KMEM), flags);
866 866 if (error != 0) {
867 867 as_rangeunlock(as);
868 868 return (error);
869 869 }
870 870
871 871 switch (minor) {
872 872 case M_MEM:
873 873 /* /dev/mem cannot be mmap'ed with MAP_PRIVATE */
874 874 if ((flags & MAP_TYPE) != MAP_SHARED) {
875 875 as_rangeunlock(as);
876 876 return (EINVAL);
877 877 }
878 878
879 879 /*
880 880 * Check to ensure that the entire range is
881 881 * legal and we are not trying to map in
882 882 * more than the device will let us.
883 883 */
884 884 for (i = 0; i < len; i += PAGESIZE) {
885 885 if (mmmmap(dev, off + i, maxprot) == -1) {
886 886 as_rangeunlock(as);
887 887 return (ENXIO);
888 888 }
889 889 }
890 890
891 891 /*
892 892 * Use seg_dev segment driver for /dev/mem mapping.
893 893 */
894 894 dev_a.mapfunc = mmmmap;
895 895 dev_a.dev = dev;
896 896 dev_a.offset = off;
897 897 dev_a.type = (flags & MAP_TYPE);
898 898 dev_a.prot = (uchar_t)prot;
899 899 dev_a.maxprot = (uchar_t)maxprot;
900 900 dev_a.hat_attr = 0;
901 901
902 902 /*
903 903 * Make /dev/mem mappings non-consistent since we can't
904 904 * alias pages that don't have page structs behind them,
905 905 * such as kernel stack pages. If someone mmap()s a kernel
906 906 * stack page and if we give him a tte with cv, a line from
907 907 * that page can get into both pages of the spitfire d$.
908 908 * But snoop from another processor will only invalidate
909 909 * the first page. This later caused kernel (xc_attention)
910 910 * to go into an infinite loop at pil 13 and no interrupts
911 911 * could come in. See 1203630.
912 912 *
913 913 */
914 914 dev_a.hat_flags = HAT_LOAD_NOCONSIST;
915 915 dev_a.devmap_data = NULL;
916 916
917 917 error = as_map(as, *addrp, len, segdev_create, &dev_a);
918 918 break;
919 919
920 920 case M_ZERO:
921 921 /*
922 922 * Use seg_vn segment driver for /dev/zero mapping.
923 923 * Passing in a NULL amp gives us the "cloning" effect.
924 924 */
925 925 vn_a.vp = NULL;
926 926 vn_a.offset = 0;
927 927 vn_a.type = (flags & MAP_TYPE);
928 928 vn_a.prot = prot;
929 929 vn_a.maxprot = maxprot;
930 930 vn_a.flags = flags & ~MAP_TYPE;
931 931 vn_a.cred = cred;
932 932 vn_a.amp = NULL;
933 933 vn_a.szc = 0;
934 934 vn_a.lgrp_mem_policy_flags = 0;
935 935 error = as_map(as, *addrp, len, segvn_create, &vn_a);
936 936 break;
937 937
938 938 case M_KMEM:
939 939 case M_ALLKMEM:
940 940 /* No longer supported with KPR. */
941 941 error = ENXIO;
942 942 break;
943 943
944 944 case M_NULL:
945 945 /*
946 946 * Use seg_dev segment driver for /dev/null mapping.
947 947 */
948 948 dev_a.mapfunc = mmmmap;
949 949 dev_a.dev = dev;
950 950 dev_a.offset = off;
951 951 dev_a.type = 0; /* neither PRIVATE nor SHARED */
952 952 dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE;
953 953 dev_a.hat_attr = 0;
954 954 dev_a.hat_flags = 0;
955 955 error = as_map(as, *addrp, len, segdev_create, &dev_a);
956 956 break;
957 957
958 958 default:
959 959 error = ENXIO;
960 960 }
961 961
962 962 as_rangeunlock(as);
963 963 return (error);
964 964 }
965 965
966 966 static struct cb_ops mm_cb_ops = {
967 967 mmopen, /* open */
968 968 nulldev, /* close */
969 969 nodev, /* strategy */
970 970 nodev, /* print */
971 971 nodev, /* dump */
972 972 mmread, /* read */
973 973 mmwrite, /* write */
974 974 mmioctl, /* ioctl */
975 975 nodev, /* devmap */
976 976 mmmmap, /* mmap */
977 977 mmsegmap, /* segmap */
978 978 mmchpoll, /* poll */
979 979 mmpropop, /* prop_op */
980 980 0, /* streamtab */
981 981 D_NEW | D_MP | D_64BIT | D_U64BIT
982 982 };
983 983
984 984 static struct dev_ops mm_ops = {
985 985 DEVO_REV, /* devo_rev, */
986 986 0, /* refcnt */
987 987 mm_info, /* get_dev_info */
988 988 nulldev, /* identify */
989 989 nulldev, /* probe */
990 990 mm_attach, /* attach */
991 991 nodev, /* detach */
992 992 nodev, /* reset */
993 993 &mm_cb_ops, /* driver operations */
↓ open down ↓ |
993 lines elided |
↑ open up ↑ |
994 994 (struct bus_ops *)0, /* bus operations */
995 995 NULL, /* power */
996 996 ddi_quiesce_not_needed, /* quiesce */
997 997 };
998 998
999 999 static struct modldrv modldrv = {
1000 1000 &mod_driverops, "memory driver", &mm_ops,
1001 1001 };
1002 1002
1003 1003 static struct modlinkage modlinkage = {
1004 - MODREV_1, &modldrv, NULL
1004 + MODREV_1, { &modldrv, NULL }
1005 1005 };
1006 1006
1007 1007 int
1008 1008 _init(void)
1009 1009 {
1010 1010 return (mod_install(&modlinkage));
1011 1011 }
1012 1012
1013 1013 int
1014 1014 _info(struct modinfo *modinfop)
1015 1015 {
1016 1016 return (mod_info(&modlinkage, modinfop));
1017 1017 }
1018 1018
1019 1019 int
1020 1020 _fini(void)
1021 1021 {
1022 1022 return (mod_remove(&modlinkage));
1023 1023 }
1024 1024
1025 1025 static int
1026 1026 mm_kstat_update(kstat_t *ksp, int rw)
1027 1027 {
1028 1028 struct memlist *pmem;
1029 1029 uint_t count;
1030 1030
1031 1031 if (rw == KSTAT_WRITE)
1032 1032 return (EACCES);
1033 1033
1034 1034 count = 0;
1035 1035 memlist_read_lock();
1036 1036 for (pmem = phys_install; pmem != NULL; pmem = pmem->ml_next) {
1037 1037 count++;
1038 1038 }
1039 1039 memlist_read_unlock();
1040 1040
1041 1041 ksp->ks_ndata = count;
1042 1042 ksp->ks_data_size = count * 2 * sizeof (uint64_t);
1043 1043
1044 1044 return (0);
1045 1045 }
1046 1046
1047 1047 static int
1048 1048 mm_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1049 1049 {
1050 1050 struct memlist *pmem;
1051 1051 struct memunit {
1052 1052 uint64_t address;
1053 1053 uint64_t size;
1054 1054 } *kspmem;
1055 1055
1056 1056 if (rw == KSTAT_WRITE)
1057 1057 return (EACCES);
1058 1058
1059 1059 ksp->ks_snaptime = gethrtime();
1060 1060
1061 1061 kspmem = (struct memunit *)buf;
1062 1062 memlist_read_lock();
1063 1063 for (pmem = phys_install; pmem != NULL;
1064 1064 pmem = pmem->ml_next, kspmem++) {
1065 1065 if ((caddr_t)kspmem >= (caddr_t)buf + ksp->ks_data_size)
1066 1066 break;
1067 1067 kspmem->address = pmem->ml_address;
1068 1068 kspmem->size = pmem->ml_size;
1069 1069 }
1070 1070 memlist_read_unlock();
1071 1071
1072 1072 return (0);
1073 1073 }
1074 1074
1075 1075 /*
1076 1076 * Read a mem_name_t from user-space and store it in the mem_name_t
1077 1077 * pointed to by the mem_name argument.
1078 1078 */
1079 1079 static int
1080 1080 mm_read_mem_name(intptr_t data, mem_name_t *mem_name)
1081 1081 {
1082 1082 if (get_udatamodel() == DATAMODEL_NATIVE) {
1083 1083 if (copyin((void *)data, mem_name, sizeof (mem_name_t)))
1084 1084 return (EFAULT);
1085 1085 }
1086 1086 #ifdef _SYSCALL32
1087 1087 else {
1088 1088 mem_name32_t mem_name32;
1089 1089
1090 1090 if (copyin((void *)data, &mem_name32, sizeof (mem_name32_t)))
1091 1091 return (EFAULT);
1092 1092 mem_name->m_addr = mem_name32.m_addr;
1093 1093 mem_name->m_synd = mem_name32.m_synd;
1094 1094 mem_name->m_type[0] = mem_name32.m_type[0];
1095 1095 mem_name->m_type[1] = mem_name32.m_type[1];
1096 1096 mem_name->m_name = (caddr_t)(uintptr_t)mem_name32.m_name;
1097 1097 mem_name->m_namelen = (size_t)mem_name32.m_namelen;
1098 1098 mem_name->m_sid = (caddr_t)(uintptr_t)mem_name32.m_sid;
1099 1099 mem_name->m_sidlen = (size_t)mem_name32.m_sidlen;
1100 1100 }
1101 1101 #endif /* _SYSCALL32 */
1102 1102
1103 1103 return (0);
1104 1104 }
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX