Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -19,10 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2018 Joyent, Inc.
*/
/*
* This part of the file contains the mdb support for dcmds:
* ::memseg_list
@@ -44,10 +46,13 @@
#include <mdb/mdb_target.h>
#include <vm/page.h>
#include <vm/hat_i86.h>
+#define VA_SIGN_BIT (1UL << 47)
+#define VA_SIGN_EXTEND(va) (((va) ^ VA_SIGN_BIT) - VA_SIGN_BIT)
+
struct pfn2pp {
pfn_t pfn;
page_t *pp;
};
@@ -396,33 +401,27 @@
else
mfn = mmu_btop(pte & PT_PADDR);
return (mfn);
}
-/*
- * Print a PTE in more human friendly way. The PTE is assumed to be in
- * a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
- */
static int
do_pte_dcmd(int level, uint64_t pte)
{
static char *attr[] = {
"wrback", "wrthru", "uncached", "uncached",
"wrback", "wrthru", "wrcombine", "uncached"};
int pat_index = 0;
pfn_t mfn;
- mdb_printf("pte=%llr: ", pte);
- if (PTE_GET(pte, mmu.pt_nx))
- mdb_printf("noexec ");
+ mdb_printf("pte=0x%llr: ", pte);
mfn = pte2mfn(pte, level);
mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn);
+ if (PTE_GET(pte, mmu.pt_nx))
+ mdb_printf("noexec ");
+
if (PTE_GET(pte, PT_NOCONSIST))
mdb_printf("noconsist ");
if (PTE_GET(pte, PT_NOSYNC))
mdb_printf("nosync ");
@@ -474,56 +473,38 @@
}
/*
* Print a PTE in more human friendly way. The PTE is assumed to be in
* a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
*/
/*ARGSUSED*/
int
pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
- int level = 0;
- uint64_t pte = 0;
- char *level_str = NULL;
- char *pte_str = NULL;
+ uint64_t level = 0;
init_mmu();
if (mmu.num_level == 0)
return (DCMD_ERR);
- if (mdb_getopts(argc, argv,
- 'p', MDB_OPT_STR, &pte_str,
- 'l', MDB_OPT_STR, &level_str) != argc)
+ if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
- /*
- * parse the PTE to decode, if it's 0, we don't do anything
- */
- if (pte_str != NULL) {
- pte = mdb_strtoull(pte_str);
- } else {
- if ((flags & DCMD_ADDRSPEC) == 0)
+ if (mdb_getopts(argc, argv,
+ 'l', MDB_OPT_UINT64, &level) != argc)
return (DCMD_USAGE);
- pte = addr;
- }
- if (pte == 0)
- return (DCMD_OK);
- /*
- * parse the level if supplied
- */
- if (level_str != NULL) {
- level = mdb_strtoull(level_str);
- if (level < 0 || level > mmu.max_level)
+ if (level > mmu.max_level) {
+ mdb_warn("invalid level %lu\n", level);
return (DCMD_ERR);
}
- return (do_pte_dcmd(level, pte));
+ if (addr == 0)
+ return (DCMD_OK);
+
+ return (do_pte_dcmd((int)level, addr));
}
static size_t
va2entry(htable_t *htable, uintptr_t addr)
{
@@ -535,29 +516,24 @@
static x86pte_t
get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
{
x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
- size_t len;
- if (htable->ht_flags & HTABLE_VLP) {
- uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes;
+ if (htable->ht_flags & HTABLE_COPIED) {
+ uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes;
ptr += va2entry(htable, addr) << mmu.pte_size_shift;
- len = mdb_vread(&buf, mmu.pte_size, ptr);
- } else {
+ return (*(x86pte_t *)ptr);
+ }
+
paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
paddr += va2entry(htable, addr) << mmu.pte_size_shift;
- len = mdb_pread(&buf, mmu.pte_size, paddr);
- }
- if (len != mmu.pte_size)
- return (0);
-
- if (mmu.pte_size == sizeof (x86pte_t))
+ if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size)
return (buf);
- return (*pte32);
+
+ return (0);
}
static int
do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap,
pfn_t *mfnp)
@@ -619,12 +595,12 @@
continue;
pte = get_pte(&hat, &htable, addr);
if (print_level) {
- mdb_printf("\tlevel=%d htable=%p "
- "pte=%llr\n", level, ht, pte);
+ mdb_printf("\tlevel=%d htable=0x%p "
+ "pte=0x%llr\n", level, ht, pte);
}
if (!PTE_ISVALID(pte)) {
mdb_printf("Address %p is unmapped.\n",
addr);
@@ -723,12 +699,10 @@
uintptr_t base;
int h;
int level;
int entry;
x86pte_t pte;
- x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
physaddr_t paddr;
size_t len;
/*
* The hats are kept in a list with khat at the head.
@@ -794,18 +768,14 @@
*/
if (hatp != khat &&
base >= kernelbase)
continue;
- len = mdb_pread(&buf, mmu.pte_size,
+ len = mdb_pread(&pte, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
- if (mmu.pte_size == sizeof (x86pte_t))
- pte = buf;
- else
- pte = *pte32;
if ((pte & PT_VALID) == 0)
continue;
if (level == 0 || !(pte & PT_PAGESIZE))
pte &= PT_PADDR;
@@ -852,24 +822,22 @@
return (do_report_maps(pfn));
}
static int
-do_ptable_dcmd(pfn_t pfn)
+do_ptable_dcmd(pfn_t pfn, uint64_t level)
{
struct hat *hatp;
struct hat hat;
htable_t *ht;
htable_t htable;
uintptr_t base;
int h;
- int level;
int entry;
uintptr_t pagesize;
x86pte_t pte;
x86pte_t buf;
- x86pte32_t *pte32 = (x86pte32_t *)&buf;
physaddr_t paddr;
size_t len;
/*
* The hats are kept in a list with khat at the head.
@@ -910,35 +878,40 @@
}
found_it:
if (htable.ht_pfn == pfn) {
mdb_printf("htable=%p\n", ht);
+ if (level == (uint64_t)-1) {
level = htable.ht_level;
+ } else if (htable.ht_level != level) {
+ mdb_warn("htable has level %d but forcing level %lu\n",
+ htable.ht_level, level);
+ }
base = htable.ht_vaddr;
pagesize = mmu.level_size[level];
} else {
- mdb_printf("Unknown pagetable - assuming level/addr 0");
- level = 0; /* assume level == 0 for PFN */
+ if (level == (uint64_t)-1)
+ level = 0;
+ mdb_warn("couldn't find matching htable, using level=%lu, "
+ "base address=0x0\n", level);
base = 0;
- pagesize = MMU_PAGESIZE;
+ pagesize = mmu.level_size[level];
}
paddr = mmu_ptob((physaddr_t)pfn);
for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
len = mdb_pread(&buf, mmu.pte_size,
paddr + entry * mmu.pte_size);
if (len != mmu.pte_size)
return (DCMD_ERR);
- if (mmu.pte_size == sizeof (x86pte_t))
pte = buf;
- else
- pte = *pte32;
if (pte == 0)
continue;
- mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize);
+ mdb_printf("[%3d] va=0x%p ", entry,
+ VA_SIGN_EXTEND(base + entry * pagesize));
do_pte_dcmd(level, pte);
}
done:
return (DCMD_OK);
@@ -951,10 +924,11 @@
int
ptable_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
pfn_t pfn;
uint_t mflag = 0;
+ uint64_t level = (uint64_t)-1;
init_mmu();
if (mmu.num_level == 0)
return (DCMD_ERR);
@@ -961,18 +935,24 @@
if ((flags & DCMD_ADDRSPEC) == 0)
return (DCMD_USAGE);
if (mdb_getopts(argc, argv,
- 'm', MDB_OPT_SETBITS, TRUE, &mflag, NULL) != argc)
+ 'm', MDB_OPT_SETBITS, TRUE, &mflag,
+ 'l', MDB_OPT_UINT64, &level, NULL) != argc)
return (DCMD_USAGE);
+ if (level != (uint64_t)-1 && level > mmu.max_level) {
+ mdb_warn("invalid level %lu\n", level);
+ return (DCMD_ERR);
+ }
+
pfn = (pfn_t)addr;
if (mflag)
pfn = mdb_mfn_to_pfn(pfn);
- return (do_ptable_dcmd(pfn));
+ return (do_ptable_dcmd(pfn, level));
}
static int
do_htables_dcmd(hat_t *hatp)
{
@@ -1028,6 +1008,115 @@
return (DCMD_USAGE);
hat = (hat_t *)addr;
return (do_htables_dcmd(hat));
+}
+
+static uintptr_t
+entry2va(size_t *entries)
+{
+ uintptr_t va = 0;
+
+ for (level_t l = mmu.max_level; l >= 0; l--)
+ va += entries[l] << mmu.level_shift[l];
+
+ return (VA_SIGN_EXTEND(va));
+}
+
+static void
+ptmap_report(size_t *entries, uintptr_t start,
+ boolean_t user, boolean_t writable, boolean_t wflag)
+{
+ uint64_t curva = entry2va(entries);
+
+ mdb_printf("mapped %s,%s range of %lu bytes: %a-%a\n",
+ user ? "user" : "kernel", writable ? "writable" : "read-only",
+ curva - start, start, curva - 1);
+ if (wflag && start >= kernelbase)
+ (void) mdb_call_dcmd("whatis", start, DCMD_ADDRSPEC, 0, NULL);
+}
+
+int
+ptmap_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ physaddr_t paddrs[MAX_NUM_LEVEL] = { 0, };
+ size_t entry[MAX_NUM_LEVEL] = { 0, };
+ uintptr_t start = (uintptr_t)-1;
+ boolean_t writable = B_FALSE;
+ boolean_t user = B_FALSE;
+ boolean_t wflag = B_FALSE;
+ level_t curlevel;
+
+ if ((flags & DCMD_ADDRSPEC) == 0)
+ return (DCMD_USAGE);
+
+ if (mdb_getopts(argc, argv,
+ 'w', MDB_OPT_SETBITS, TRUE, &wflag, NULL) != argc)
+ return (DCMD_USAGE);
+
+ init_mmu();
+
+ if (mmu.num_level == 0)
+ return (DCMD_ERR);
+
+ curlevel = mmu.max_level;
+
+ paddrs[curlevel] = addr & MMU_PAGEMASK;
+
+ for (;;) {
+ physaddr_t pte_addr;
+ x86pte_t pte;
+
+ pte_addr = paddrs[curlevel] +
+ (entry[curlevel] << mmu.pte_size_shift);
+
+ if (mdb_pread(&pte, sizeof (pte), pte_addr) != sizeof (pte)) {
+ mdb_warn("couldn't read pte at %p", pte_addr);
+ return (DCMD_ERR);
+ }
+
+ if (PTE_GET(pte, PT_VALID) == 0) {
+ if (start != (uintptr_t)-1) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ start = (uintptr_t)-1;
+ }
+ } else if (curlevel == 0 || PTE_GET(pte, PT_PAGESIZE)) {
+ if (start == (uintptr_t)-1) {
+ start = entry2va(entry);
+ user = PTE_GET(pte, PT_USER);
+ writable = PTE_GET(pte, PT_WRITABLE);
+ } else if (user != PTE_GET(pte, PT_USER) ||
+ writable != PTE_GET(pte, PT_WRITABLE)) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ start = entry2va(entry);
+ user = PTE_GET(pte, PT_USER);
+ writable = PTE_GET(pte, PT_WRITABLE);
+ }
+ } else {
+ /* Descend a level. */
+ physaddr_t pa = mmu_ptob(pte2mfn(pte, curlevel));
+ paddrs[--curlevel] = pa;
+ entry[curlevel] = 0;
+ continue;
+ }
+
+ while (++entry[curlevel] == mmu.ptes_per_table) {
+ /* Ascend back up. */
+ entry[curlevel] = 0;
+ if (curlevel == mmu.max_level) {
+ if (start != (uintptr_t)-1) {
+ ptmap_report(entry, start,
+ user, writable, wflag);
+ }
+ goto out;
+ }
+
+ curlevel++;
+ }
+ }
+
+out:
+ return (DCMD_OK);
}