Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 19,28 ****
--- 19,30 ----
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2018 Joyent, Inc.
*/
/*
* This part of the file contains the mdb support for dcmds:
* ::memseg_list
*** 44,53 ****
--- 46,58 ----
#include <mdb/mdb_target.h>
#include <vm/page.h>
#include <vm/hat_i86.h>
+ #define VA_SIGN_BIT (1UL << 47)
+ #define VA_SIGN_EXTEND(va) (((va) ^ VA_SIGN_BIT) - VA_SIGN_BIT)
+
struct pfn2pp {
pfn_t pfn;
page_t *pp;
};
*** 396,428 ****
else
mfn = mmu_btop(pte & PT_PADDR);
return (mfn);
}
- /*
- * Print a PTE in more human friendly way. The PTE is assumed to be in
- * a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
- */
static int
do_pte_dcmd(int level, uint64_t pte)
{
static char *attr[] = {
"wrback", "wrthru", "uncached", "uncached",
"wrback", "wrthru", "wrcombine", "uncached"};
int pat_index = 0;
pfn_t mfn;
! mdb_printf("pte=%llr: ", pte);
! if (PTE_GET(pte, mmu.pt_nx))
! mdb_printf("noexec ");
mfn = pte2mfn(pte, level);
mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn);
if (PTE_GET(pte, PT_NOCONSIST))
mdb_printf("noconsist ");
if (PTE_GET(pte, PT_NOSYNC))
mdb_printf("nosync ");
--- 401,427 ----
else
mfn = mmu_btop(pte & PT_PADDR);
return (mfn);
}
static int
do_pte_dcmd(int level, uint64_t pte)
{
static char *attr[] = {
"wrback", "wrthru", "uncached", "uncached",
"wrback", "wrthru", "wrcombine", "uncached"};
int pat_index = 0;
pfn_t mfn;
! mdb_printf("pte=0x%llr: ", pte);
mfn = pte2mfn(pte, level);
mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn);
+ if (PTE_GET(pte, mmu.pt_nx))
+ mdb_printf("noexec ");
+
if (PTE_GET(pte, PT_NOCONSIST))
mdb_printf("noconsist ");
if (PTE_GET(pte, PT_NOSYNC))
mdb_printf("nosync ");
*** 474,529 ****
}
/*
* Print a PTE in more human friendly way. The PTE is assumed to be in
* a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
*/
/*ARGSUSED*/
int
pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
! int level = 0;
! uint64_t pte = 0;
! char *level_str = NULL;
! char *pte_str = NULL;
init_mmu();
if (mmu.num_level == 0)
return (DCMD_ERR);
! if (mdb_getopts(argc, argv,
! 'p', MDB_OPT_STR, &pte_str,
! 'l', MDB_OPT_STR, &level_str) != argc)
return (DCMD_USAGE);
! /*
! * parse the PTE to decode, if it's 0, we don't do anything
! */
! if (pte_str != NULL) {
! pte = mdb_strtoull(pte_str);
! } else {
! if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
- pte = addr;
- }
- if (pte == 0)
- return (DCMD_OK);
! /*
! * parse the level if supplied
! */
! if (level_str != NULL) {
! level = mdb_strtoull(level_str);
! if (level < 0 || level > mmu.max_level)
return (DCMD_ERR);
}
! return (do_pte_dcmd(level, pte));
}
static size_t
va2entry(htable_t *htable, uintptr_t addr)
{
--- 473,510 ----
}
/*
* Print a PTE in more human friendly way. The PTE is assumed to be in
* a level 0 page table, unless -l specifies another level.
*/
/*ARGSUSED*/
int
pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
! uint64_t level = 0;
init_mmu();
if (mmu.num_level == 0)
return (DCMD_ERR);
! if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
! if (mdb_getopts(argc, argv,
! 'l', MDB_OPT_UINT64, &level) != argc)
return (DCMD_USAGE);
! if (level > mmu.max_level) {
! mdb_warn("invalid level %lu\n", level);
return (DCMD_ERR);
}
! if (addr == 0)
! return (DCMD_OK);
!
! return (do_pte_dcmd((int)level, addr));
}
static size_t
va2entry(htable_t *htable, uintptr_t addr)
{
*** 535,563 ****
static x86pte_t
get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
{
x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
- size_t len;
! if (htable->ht_flags & HTABLE_VLP) {
! uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes;
ptr += va2entry(htable, addr) << mmu.pte_size_shift;
! len = mdb_vread(&buf, mmu.pte_size, ptr);
! } else {
paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
paddr += va2entry(htable, addr) << mmu.pte_size_shift;
- len = mdb_pread(&buf, mmu.pte_size, paddr);
- }
! if (len != mmu.pte_size)
! return (0);
!
! if (mmu.pte_size == sizeof (x86pte_t))
return (buf);
! return (*pte32);
}
static int
do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap,
pfn_t *mfnp)
--- 516,539 ----
static x86pte_t
get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
{
x86pte_t buf;
! if (htable->ht_flags & HTABLE_COPIED) {
! uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes;
ptr += va2entry(htable, addr) << mmu.pte_size_shift;
! return (*(x86pte_t *)ptr);
! }
!
paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
paddr += va2entry(htable, addr) << mmu.pte_size_shift;
! if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size)
return (buf);
!
! return (0);
}
static int
do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap,
pfn_t *mfnp)
*** 619,630 ****
continue;
pte = get_pte(&hat, &htable, addr);
if (print_level) {
! mdb_printf("\tlevel=%d htable=%p "
! "pte=%llr\n", level, ht, pte);
}
if (!PTE_ISVALID(pte)) {
mdb_printf("Address %p is unmapped.\n",
addr);
--- 595,606 ----
continue;
pte = get_pte(&hat, &htable, addr);
if (print_level) {
! mdb_printf("\tlevel=%d htable=0x%p "
! "pte=0x%llr\n", level, ht, pte);
}
if (!PTE_ISVALID(pte)) {
mdb_printf("Address %p is unmapped.\n",
addr);
*** 723,734 ****
uintptr_t base;
int h;
int level;
int entry;
x86pte_t pte;
- x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
physaddr_t paddr;
size_t len;
/*
* The hats are kept in a list with khat at the head.
--- 699,708 ----
*** 794,811 ****
*/
if (hatp != khat &&
base >= kernelbase)
continue;
! len = mdb_pread(&buf, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
- if (mmu.pte_size == sizeof (x86pte_t))
- pte = buf;
- else
- pte = *pte32;
if ((pte & PT_VALID) == 0)
continue;
if (level == 0 || !(pte & PT_PAGESIZE))
pte &= PT_PADDR;
--- 768,781 ----
*/
if (hatp != khat &&
base >= kernelbase)
continue;
! len = mdb_pread(&pte, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
if ((pte & PT_VALID) == 0)
continue;
if (level == 0 || !(pte & PT_PAGESIZE))
pte &= PT_PADDR;
*** 852,875 ****
return (do_report_maps(pfn));
}
static int
! do_ptable_dcmd(pfn_t pfn)
{
struct hat *hatp;
struct hat hat;
htable_t *ht;
htable_t htable;
uintptr_t base;
int h;
- int level;
int entry;
uintptr_t pagesize;
x86pte_t pte;
x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
physaddr_t paddr;
size_t len;
/*
* The hats are kept in a list with khat at the head.
--- 822,843 ----
return (do_report_maps(pfn));
}
static int
! do_ptable_dcmd(pfn_t pfn, uint64_t level)
{
struct hat *hatp;
struct hat hat;
htable_t *ht;
htable_t htable;
uintptr_t base;
int h;
int entry;
uintptr_t pagesize;
x86pte_t pte;
x86pte_t buf;
physaddr_t paddr;
size_t len;
/*
* The hats are kept in a list with khat at the head.
*** 910,944 ****
}
found_it:
if (htable.ht_pfn == pfn) {
mdb_printf("htable=%p\n", ht);
level = htable.ht_level;
base = htable.ht_vaddr;
pagesize = mmu.level_size[level];
} else {
! mdb_printf("Unknown pagetable - assuming level/addr 0");
! level = 0; /* assume level == 0 for PFN */
base = 0;
! pagesize = MMU_PAGESIZE;
}
paddr = mmu_ptob((physaddr_t)pfn);
for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
len = mdb_pread(&buf, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
- if (mmu.pte_size == sizeof (x86pte_t))
pte = buf;
- else
- pte = *pte32;
if (pte == 0)
continue;
! mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize);
do_pte_dcmd(level, pte);
}
done:
return (DCMD_OK);
--- 878,917 ----
}
found_it:
if (htable.ht_pfn == pfn) {
mdb_printf("htable=%p\n", ht);
+ if (level == (uint64_t)-1) {
level = htable.ht_level;
+ } else if (htable.ht_level != level) {
+ mdb_warn("htable has level %d but forcing level %lu\n",
+ htable.ht_level, level);
+ }
base = htable.ht_vaddr;
pagesize = mmu.level_size[level];
} else {
! if (level == (uint64_t)-1)
! level = 0;
! mdb_warn("couldn't find matching htable, using level=%lu, "
! "base address=0x0\n", level);
base = 0;
! pagesize = mmu.level_size[level];
}
paddr = mmu_ptob((physaddr_t)pfn);
for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
len = mdb_pread(&buf, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
pte = buf;
if (pte == 0)
continue;
! mdb_printf("[%3d] va=0x%p ", entry,
! VA_SIGN_EXTEND(base + entry * pagesize));
do_pte_dcmd(level, pte);
}
done:
return (DCMD_OK);
*** 951,960 ****
--- 924,934 ----
int
ptable_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
pfn_t pfn;
uint_t mflag = 0;
+ uint64_t level = (uint64_t)-1;
init_mmu();
if (mmu.num_level == 0)
return (DCMD_ERR);
*** 961,978 ****
if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
if (mdb_getopts(argc, argv,
! 'm', MDB_OPT_SETBITS, TRUE, &mflag, NULL) != argc)
return (DCMD_USAGE);
pfn = (pfn_t)addr;
if (mflag)
pfn = mdb_mfn_to_pfn(pfn);
! return (do_ptable_dcmd(pfn));
}
static int
do_htables_dcmd(hat_t *hatp)
{
--- 935,958 ----
if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
if (mdb_getopts(argc, argv,
! 'm', MDB_OPT_SETBITS, TRUE, &mflag,
! 'l', MDB_OPT_UINT64, &level, NULL) != argc)
return (DCMD_USAGE);
+ if (level != (uint64_t)-1 && level > mmu.max_level) {
+ mdb_warn("invalid level %lu\n", level);
+ return (DCMD_ERR);
+ }
+
pfn = (pfn_t)addr;
if (mflag)
pfn = mdb_mfn_to_pfn(pfn);
! return (do_ptable_dcmd(pfn, level));
}
static int
do_htables_dcmd(hat_t *hatp)
{
*** 1028,1033 ****
--- 1008,1122 ----
return (DCMD_USAGE);
hat = (hat_t *)addr;
return (do_htables_dcmd(hat));
+ }
+
+ static uintptr_t
+ entry2va(size_t *entries)
+ {
+ uintptr_t va = 0;
+
+ for (level_t l = mmu.max_level; l >= 0; l--)
+ va += entries[l] << mmu.level_shift[l];
+
+ return (VA_SIGN_EXTEND(va));
+ }
+
+ static void
+ ptmap_report(size_t *entries, uintptr_t start,
+ boolean_t user, boolean_t writable, boolean_t wflag)
+ {
+ uint64_t curva = entry2va(entries);
+
+ mdb_printf("mapped %s,%s range of %lu bytes: %a-%a\n",
+ user ? "user" : "kernel", writable ? "writable" : "read-only",
+ curva - start, start, curva - 1);
+ if (wflag && start >= kernelbase)
+ (void) mdb_call_dcmd("whatis", start, DCMD_ADDRSPEC, 0, NULL);
+ }
+
+ int
+ ptmap_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+ {
+ physaddr_t paddrs[MAX_NUM_LEVEL] = { 0, };
+ size_t entry[MAX_NUM_LEVEL] = { 0, };
+ uintptr_t start = (uintptr_t)-1;
+ boolean_t writable = B_FALSE;
+ boolean_t user = B_FALSE;
+ boolean_t wflag = B_FALSE;
+ level_t curlevel;
+
+ if ((flags & DCMD_ADDRSPEC) == 0)
+ return (DCMD_USAGE);
+
+ if (mdb_getopts(argc, argv,
+ 'w', MDB_OPT_SETBITS, TRUE, &wflag, NULL) != argc)
+ return (DCMD_USAGE);
+
+ init_mmu();
+
+ if (mmu.num_level == 0)
+ return (DCMD_ERR);
+
+ curlevel = mmu.max_level;
+
+ paddrs[curlevel] = addr & MMU_PAGEMASK;
+
+ for (;;) {
+ physaddr_t pte_addr;
+ x86pte_t pte;
+
+ pte_addr = paddrs[curlevel] +
+ (entry[curlevel] << mmu.pte_size_shift);
+
+ if (mdb_pread(&pte, sizeof (pte), pte_addr) != sizeof (pte)) {
+ mdb_warn("couldn't read pte at %p", pte_addr);
+ return (DCMD_ERR);
+ }
+
+ if (PTE_GET(pte, PT_VALID) == 0) {
+ if (start != (uintptr_t)-1) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ start = (uintptr_t)-1;
+ }
+ } else if (curlevel == 0 || PTE_GET(pte, PT_PAGESIZE)) {
+ if (start == (uintptr_t)-1) {
+ start = entry2va(entry);
+ user = PTE_GET(pte, PT_USER);
+ writable = PTE_GET(pte, PT_WRITABLE);
+ } else if (user != PTE_GET(pte, PT_USER) ||
+ writable != PTE_GET(pte, PT_WRITABLE)) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ start = entry2va(entry);
+ user = PTE_GET(pte, PT_USER);
+ writable = PTE_GET(pte, PT_WRITABLE);
+ }
+ } else {
+ /* Descend a level. */
+ physaddr_t pa = mmu_ptob(pte2mfn(pte, curlevel));
+ paddrs[--curlevel] = pa;
+ entry[curlevel] = 0;
+ continue;
+ }
+
+ while (++entry[curlevel] == mmu.ptes_per_table) {
+ /* Ascend back up. */
+ entry[curlevel] = 0;
+ if (curlevel == mmu.max_level) {
+ if (start != (uintptr_t)-1) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ }
+ goto out;
+ }
+
+ curlevel++;
+ }
+ }
+
+ out:
+ return (DCMD_OK);
}