Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

@@ -19,10 +19,12 @@
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright 2018 Joyent, Inc.
  */
 
 /*
  * This part of the file contains the mdb support for dcmds:
  *      ::memseg_list

@@ -44,10 +46,13 @@
 #include <mdb/mdb_target.h>
 
 #include <vm/page.h>
 #include <vm/hat_i86.h>
 
+#define VA_SIGN_BIT (1UL << 47)
+#define VA_SIGN_EXTEND(va) (((va) ^ VA_SIGN_BIT) - VA_SIGN_BIT)
+
 struct pfn2pp {
         pfn_t pfn;
         page_t *pp;
 };
 

@@ -396,33 +401,27 @@
         else
                 mfn = mmu_btop(pte & PT_PADDR);
         return (mfn);
 }
 
-/*
- * Print a PTE in more human friendly way. The PTE is assumed to be in
- * a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
- */
 static int
 do_pte_dcmd(int level, uint64_t pte)
 {
         static char *attr[] = {
             "wrback", "wrthru", "uncached", "uncached",
             "wrback", "wrthru", "wrcombine", "uncached"};
         int pat_index = 0;
         pfn_t mfn;
 
-        mdb_printf("pte=%llr: ", pte);
-        if (PTE_GET(pte, mmu.pt_nx))
-                mdb_printf("noexec ");
+        mdb_printf("pte=0x%llr: ", pte);
 
         mfn = pte2mfn(pte, level);
         mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn);
 
+        if (PTE_GET(pte, mmu.pt_nx))
+                mdb_printf("noexec ");
+
         if (PTE_GET(pte, PT_NOCONSIST))
                 mdb_printf("noconsist ");
 
         if (PTE_GET(pte, PT_NOSYNC))
                 mdb_printf("nosync ");

@@ -474,56 +473,38 @@
 }
 
 /*
  * Print a PTE in more human friendly way. The PTE is assumed to be in
  * a level 0 page table, unless -l specifies another level.
- *
- * The PTE value can be specified as the -p option, since on a 32 bit kernel
- * with PAE running it's larger than a uintptr_t.
  */
 /*ARGSUSED*/
 int
 pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
-        int level = 0;
-        uint64_t pte = 0;
-        char *level_str = NULL;
-        char *pte_str = NULL;
+        uint64_t level = 0;
 
         init_mmu();
 
         if (mmu.num_level == 0)
                 return (DCMD_ERR);
 
-        if (mdb_getopts(argc, argv,
-            'p', MDB_OPT_STR, &pte_str,
-            'l', MDB_OPT_STR, &level_str) != argc)
+        if ((flags & DCMD_ADDRSPEC) == 0)
                 return (DCMD_USAGE);
 
-        /*
-         * parse the PTE to decode, if it's 0, we don't do anything
-         */
-        if (pte_str != NULL) {
-                pte = mdb_strtoull(pte_str);
-        } else {
-                if ((flags & DCMD_ADDRSPEC) == 0)
+        if (mdb_getopts(argc, argv,
+            'l', MDB_OPT_UINT64, &level) != argc)
                         return (DCMD_USAGE);
-                pte = addr;
-        }
-        if (pte == 0)
-                return (DCMD_OK);
 
-        /*
-         * parse the level if supplied
-         */
-        if (level_str != NULL) {
-                level = mdb_strtoull(level_str);
-                if (level < 0 || level > mmu.max_level)
+        if (level > mmu.max_level) {
+                mdb_warn("invalid level %lu\n", level);
                         return (DCMD_ERR);
         }
 
-        return (do_pte_dcmd(level, pte));
+        if (addr == 0)
+                return (DCMD_OK);
+
+        return (do_pte_dcmd((int)level, addr));
 }
 
 static size_t
 va2entry(htable_t *htable, uintptr_t addr)
 {

@@ -535,29 +516,24 @@
 
 static x86pte_t
 get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
 {
         x86pte_t buf;
-        x86pte32_t *pte32 = (x86pte32_t *)&buf;
-        size_t len;
 
-        if (htable->ht_flags & HTABLE_VLP) {
-                uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes;
+        if (htable->ht_flags & HTABLE_COPIED) {
+                uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes;
                 ptr += va2entry(htable, addr) << mmu.pte_size_shift;
-                len = mdb_vread(&buf, mmu.pte_size, ptr);
-        } else {
+                return (*(x86pte_t *)ptr);
+        }
+
                 paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
                 paddr += va2entry(htable, addr) << mmu.pte_size_shift;
-                len = mdb_pread(&buf, mmu.pte_size, paddr);
-        }
 
-        if (len != mmu.pte_size)
-                return (0);
-
-        if (mmu.pte_size == sizeof (x86pte_t))
+        if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size)
                 return (buf);
-        return (*pte32);
+
+        return (0);
 }
 
 static int
 do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap,
     pfn_t *mfnp)

@@ -619,12 +595,12 @@
                                         continue;
 
                                 pte = get_pte(&hat, &htable, addr);
 
                                 if (print_level) {
-                                        mdb_printf("\tlevel=%d htable=%p "
-                                            "pte=%llr\n", level, ht, pte);
+                                        mdb_printf("\tlevel=%d htable=0x%p "
+                                            "pte=0x%llr\n", level, ht, pte);
                                 }
 
                                 if (!PTE_ISVALID(pte)) {
                                         mdb_printf("Address %p is unmapped.\n",
                                             addr);

@@ -723,12 +699,10 @@
         uintptr_t base;
         int h;
         int level;
         int entry;
         x86pte_t pte;
-        x86pte_t buf;
-        x86pte32_t *pte32 = (x86pte32_t *)&buf;
         physaddr_t paddr;
         size_t len;
 
         /*
          * The hats are kept in a list with khat at the head.

@@ -794,18 +768,14 @@
                                          */
                                         if (hatp != khat &&
                                             base >= kernelbase)
                                                 continue;
 
-                                        len = mdb_pread(&buf, mmu.pte_size,
+                                        len = mdb_pread(&pte, mmu.pte_size,
                                             paddr + entry * mmu.pte_size);
                                         if (len != mmu.pte_size)
                                                 return (DCMD_ERR);
-                                        if (mmu.pte_size == sizeof (x86pte_t))
-                                                pte = buf;
-                                        else
-                                                pte = *pte32;
 
                                         if ((pte & PT_VALID) == 0)
                                                 continue;
                                         if (level == 0 || !(pte & PT_PAGESIZE))
                                                 pte &= PT_PADDR;

@@ -852,24 +822,22 @@
 
         return (do_report_maps(pfn));
 }
 
 static int
-do_ptable_dcmd(pfn_t pfn)
+do_ptable_dcmd(pfn_t pfn, uint64_t level)
 {
         struct hat *hatp;
         struct hat hat;
         htable_t *ht;
         htable_t htable;
         uintptr_t base;
         int h;
-        int level;
         int entry;
         uintptr_t pagesize;
         x86pte_t pte;
         x86pte_t buf;
-        x86pte32_t *pte32 = (x86pte32_t *)&buf;
         physaddr_t paddr;
         size_t len;
 
         /*
          * The hats are kept in a list with khat at the head.

@@ -910,35 +878,40 @@
         }
 
 found_it:
         if (htable.ht_pfn == pfn) {
                 mdb_printf("htable=%p\n", ht);
+                if (level == (uint64_t)-1) {
                 level = htable.ht_level;
+                } else if (htable.ht_level != level) {
+                        mdb_warn("htable has level %d but forcing level %lu\n",
+                            htable.ht_level, level);
+                }
                 base = htable.ht_vaddr;
                 pagesize = mmu.level_size[level];
         } else {
-                mdb_printf("Unknown pagetable - assuming level/addr 0");
-                level = 0;      /* assume level == 0 for PFN */
+                if (level == (uint64_t)-1)
+                        level = 0;
+                mdb_warn("couldn't find matching htable, using level=%lu, "
+                    "base address=0x0\n", level);
                 base = 0;
-                pagesize = MMU_PAGESIZE;
+                pagesize = mmu.level_size[level];
         }
 
         paddr = mmu_ptob((physaddr_t)pfn);
         for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
                 len = mdb_pread(&buf, mmu.pte_size,
                     paddr + entry * mmu.pte_size);
                 if (len != mmu.pte_size)
                         return (DCMD_ERR);
-                if (mmu.pte_size == sizeof (x86pte_t))
                         pte = buf;
-                else
-                        pte = *pte32;
 
                 if (pte == 0)
                         continue;
 
-                mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize);
+                mdb_printf("[%3d] va=0x%p ", entry,
+                    VA_SIGN_EXTEND(base + entry * pagesize));
                 do_pte_dcmd(level, pte);
         }
 
 done:
         return (DCMD_OK);

@@ -951,10 +924,11 @@
 int
 ptable_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 {
         pfn_t pfn;
         uint_t mflag = 0;
+        uint64_t level = (uint64_t)-1;
 
         init_mmu();
 
         if (mmu.num_level == 0)
                 return (DCMD_ERR);

@@ -961,18 +935,24 @@
 
         if ((flags & DCMD_ADDRSPEC) == 0)
                 return (DCMD_USAGE);
 
         if (mdb_getopts(argc, argv,
-            'm', MDB_OPT_SETBITS, TRUE, &mflag, NULL) != argc)
+            'm', MDB_OPT_SETBITS, TRUE, &mflag,
+            'l', MDB_OPT_UINT64, &level, NULL) != argc)
                 return (DCMD_USAGE);
 
+        if (level != (uint64_t)-1 && level > mmu.max_level) {
+                mdb_warn("invalid level %lu\n", level);
+                return (DCMD_ERR);
+        }
+
         pfn = (pfn_t)addr;
         if (mflag)
                 pfn = mdb_mfn_to_pfn(pfn);
 
-        return (do_ptable_dcmd(pfn));
+        return (do_ptable_dcmd(pfn, level));
 }
 
 static int
 do_htables_dcmd(hat_t *hatp)
 {

@@ -1028,6 +1008,115 @@
                 return (DCMD_USAGE);
 
         hat = (hat_t *)addr;
 
         return (do_htables_dcmd(hat));
+}
+
+static uintptr_t
+entry2va(size_t *entries)
+{
+        uintptr_t va = 0;
+
+        for (level_t l = mmu.max_level; l >= 0; l--)
+                va += entries[l] << mmu.level_shift[l];
+
+        return (VA_SIGN_EXTEND(va));
+}
+
+static void
+ptmap_report(size_t *entries, uintptr_t start,
+    boolean_t user, boolean_t writable, boolean_t wflag)
+{
+        uint64_t curva = entry2va(entries);
+
+        mdb_printf("mapped %s,%s range of %lu bytes: %a-%a\n",
+            user ? "user" : "kernel", writable ? "writable" : "read-only",
+            curva - start, start, curva - 1);
+        if (wflag && start >= kernelbase)
+                (void) mdb_call_dcmd("whatis", start, DCMD_ADDRSPEC, 0, NULL);
+}
+
+int
+ptmap_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+        physaddr_t paddrs[MAX_NUM_LEVEL] = { 0, };
+        size_t entry[MAX_NUM_LEVEL] = { 0, };
+        uintptr_t start = (uintptr_t)-1;
+        boolean_t writable = B_FALSE;
+        boolean_t user = B_FALSE;
+        boolean_t wflag = B_FALSE;
+        level_t curlevel;
+
+        if ((flags & DCMD_ADDRSPEC) == 0)
+                return (DCMD_USAGE);
+
+        if (mdb_getopts(argc, argv,
+            'w', MDB_OPT_SETBITS, TRUE, &wflag, NULL) != argc)
+                return (DCMD_USAGE);
+
+        init_mmu();
+
+        if (mmu.num_level == 0)
+                return (DCMD_ERR);
+
+        curlevel = mmu.max_level;
+
+        paddrs[curlevel] = addr & MMU_PAGEMASK;
+
+        for (;;) {
+                physaddr_t pte_addr;
+                x86pte_t pte;
+
+                pte_addr = paddrs[curlevel] +
+                    (entry[curlevel] << mmu.pte_size_shift);
+
+                if (mdb_pread(&pte, sizeof (pte), pte_addr) != sizeof (pte)) {
+                        mdb_warn("couldn't read pte at %p", pte_addr);
+                        return (DCMD_ERR);
+                }
+
+                if (PTE_GET(pte, PT_VALID) == 0) {
+                        if (start != (uintptr_t)-1) {
+                                ptmap_report(entry, start,
+                                    user, writable, wflag);
+                                start = (uintptr_t)-1;
+                        }
+                } else if (curlevel == 0 || PTE_GET(pte, PT_PAGESIZE)) {
+                        if (start == (uintptr_t)-1) {
+                                start = entry2va(entry);
+                                user = PTE_GET(pte, PT_USER);
+                                writable = PTE_GET(pte, PT_WRITABLE);
+                        } else if (user != PTE_GET(pte, PT_USER) ||
+                            writable != PTE_GET(pte, PT_WRITABLE)) {
+                                ptmap_report(entry, start,
+                                    user, writable, wflag);
+                                start = entry2va(entry);
+                                user = PTE_GET(pte, PT_USER);
+                                writable = PTE_GET(pte, PT_WRITABLE);
+                        }
+                } else {
+                        /* Descend a level. */
+                        physaddr_t pa = mmu_ptob(pte2mfn(pte, curlevel));
+                        paddrs[--curlevel] = pa;
+                        entry[curlevel] = 0;
+                        continue;
+                }
+
+                while (++entry[curlevel] == mmu.ptes_per_table) {
+                        /* Ascend back up. */
+                        entry[curlevel] = 0;
+                        if (curlevel == mmu.max_level) {
+                                if (start != (uintptr_t)-1) {
+                                        ptmap_report(entry, start,
+                                            user, writable, wflag);
+                                }
+                                goto out;
+                        }
+
+                        curlevel++;
+                }
+        }
+
+out:
+        return (DCMD_OK);
 }