Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 19,28 **** --- 19,30 ---- * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * + * Copyright 2018 Joyent, Inc. */ /* * This part of the file contains the mdb support for dcmds: * ::memseg_list
*** 44,53 **** --- 46,58 ---- #include <mdb/mdb_target.h> #include <vm/page.h> #include <vm/hat_i86.h> + #define VA_SIGN_BIT (1UL << 47) + #define VA_SIGN_EXTEND(va) (((va) ^ VA_SIGN_BIT) - VA_SIGN_BIT) + struct pfn2pp { pfn_t pfn; page_t *pp; };
*** 396,428 **** else mfn = mmu_btop(pte & PT_PADDR); return (mfn); } - /* - * Print a PTE in more human friendly way. The PTE is assumed to be in - * a level 0 page table, unless -l specifies another level. - * - * The PTE value can be specified as the -p option, since on a 32 bit kernel - * with PAE running it's larger than a uintptr_t. - */ static int do_pte_dcmd(int level, uint64_t pte) { static char *attr[] = { "wrback", "wrthru", "uncached", "uncached", "wrback", "wrthru", "wrcombine", "uncached"}; int pat_index = 0; pfn_t mfn; ! mdb_printf("pte=%llr: ", pte); ! if (PTE_GET(pte, mmu.pt_nx)) ! mdb_printf("noexec "); mfn = pte2mfn(pte, level); mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn); if (PTE_GET(pte, PT_NOCONSIST)) mdb_printf("noconsist "); if (PTE_GET(pte, PT_NOSYNC)) mdb_printf("nosync "); --- 401,427 ---- else mfn = mmu_btop(pte & PT_PADDR); return (mfn); } static int do_pte_dcmd(int level, uint64_t pte) { static char *attr[] = { "wrback", "wrthru", "uncached", "uncached", "wrback", "wrthru", "wrcombine", "uncached"}; int pat_index = 0; pfn_t mfn; ! mdb_printf("pte=0x%llr: ", pte); mfn = pte2mfn(pte, level); mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn); + if (PTE_GET(pte, mmu.pt_nx)) + mdb_printf("noexec "); + if (PTE_GET(pte, PT_NOCONSIST)) mdb_printf("noconsist "); if (PTE_GET(pte, PT_NOSYNC)) mdb_printf("nosync ");
*** 474,529 **** } /* * Print a PTE in more human friendly way. The PTE is assumed to be in * a level 0 page table, unless -l specifies another level. - * - * The PTE value can be specified as the -p option, since on a 32 bit kernel - * with PAE running it's larger than a uintptr_t. */ /*ARGSUSED*/ int pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { ! int level = 0; ! uint64_t pte = 0; ! char *level_str = NULL; ! char *pte_str = NULL; init_mmu(); if (mmu.num_level == 0) return (DCMD_ERR); ! if (mdb_getopts(argc, argv, ! 'p', MDB_OPT_STR, &pte_str, ! 'l', MDB_OPT_STR, &level_str) != argc) return (DCMD_USAGE); ! /* ! * parse the PTE to decode, if it's 0, we don't do anything ! */ ! if (pte_str != NULL) { ! pte = mdb_strtoull(pte_str); ! } else { ! if ((flags & DCMD_ADDRSPEC) == 0) return (DCMD_USAGE); - pte = addr; - } - if (pte == 0) - return (DCMD_OK); ! /* ! * parse the level if supplied ! */ ! if (level_str != NULL) { ! level = mdb_strtoull(level_str); ! if (level < 0 || level > mmu.max_level) return (DCMD_ERR); } ! return (do_pte_dcmd(level, pte)); } static size_t va2entry(htable_t *htable, uintptr_t addr) { --- 473,510 ---- } /* * Print a PTE in more human friendly way. The PTE is assumed to be in * a level 0 page table, unless -l specifies another level. */ /*ARGSUSED*/ int pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { ! uint64_t level = 0; init_mmu(); if (mmu.num_level == 0) return (DCMD_ERR); ! if ((flags & DCMD_ADDRSPEC) == 0) return (DCMD_USAGE); ! if (mdb_getopts(argc, argv, ! 'l', MDB_OPT_UINT64, &level) != argc) return (DCMD_USAGE); ! if (level > mmu.max_level) { ! mdb_warn("invalid level %lu\n", level); return (DCMD_ERR); } ! if (addr == 0) ! return (DCMD_OK); ! ! return (do_pte_dcmd((int)level, addr)); } static size_t va2entry(htable_t *htable, uintptr_t addr) {
*** 535,563 **** static x86pte_t get_pte(hat_t *hat, htable_t *htable, uintptr_t addr) { x86pte_t buf; - x86pte32_t *pte32 = (x86pte32_t *)&buf; - size_t len; ! if (htable->ht_flags & HTABLE_VLP) { ! uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes; ptr += va2entry(htable, addr) << mmu.pte_size_shift; ! len = mdb_vread(&buf, mmu.pte_size, ptr); ! } else { paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn); paddr += va2entry(htable, addr) << mmu.pte_size_shift; - len = mdb_pread(&buf, mmu.pte_size, paddr); - } ! if (len != mmu.pte_size) ! return (0); ! ! if (mmu.pte_size == sizeof (x86pte_t)) return (buf); ! return (*pte32); } static int do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap, pfn_t *mfnp) --- 516,539 ---- static x86pte_t get_pte(hat_t *hat, htable_t *htable, uintptr_t addr) { x86pte_t buf; ! if (htable->ht_flags & HTABLE_COPIED) { ! uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes; ptr += va2entry(htable, addr) << mmu.pte_size_shift; ! return (*(x86pte_t *)ptr); ! } ! paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn); paddr += va2entry(htable, addr) << mmu.pte_size_shift; ! if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size) return (buf); ! ! return (0); } static int do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap, pfn_t *mfnp)
*** 619,630 **** continue; pte = get_pte(&hat, &htable, addr); if (print_level) { ! mdb_printf("\tlevel=%d htable=%p " ! "pte=%llr\n", level, ht, pte); } if (!PTE_ISVALID(pte)) { mdb_printf("Address %p is unmapped.\n", addr); --- 595,606 ---- continue; pte = get_pte(&hat, &htable, addr); if (print_level) { ! mdb_printf("\tlevel=%d htable=0x%p " ! "pte=0x%llr\n", level, ht, pte); } if (!PTE_ISVALID(pte)) { mdb_printf("Address %p is unmapped.\n", addr);
*** 723,734 **** uintptr_t base; int h; int level; int entry; x86pte_t pte; - x86pte_t buf; - x86pte32_t *pte32 = (x86pte32_t *)&buf; physaddr_t paddr; size_t len; /* * The hats are kept in a list with khat at the head. --- 699,708 ----
*** 794,811 **** */ if (hatp != khat && base >= kernelbase) continue; ! len = mdb_pread(&buf, mmu.pte_size, paddr + entry * mmu.pte_size); if (len != mmu.pte_size) return (DCMD_ERR); - if (mmu.pte_size == sizeof (x86pte_t)) - pte = buf; - else - pte = *pte32; if ((pte & PT_VALID) == 0) continue; if (level == 0 || !(pte & PT_PAGESIZE)) pte &= PT_PADDR; --- 768,781 ---- */ if (hatp != khat && base >= kernelbase) continue; ! len = mdb_pread(&pte, mmu.pte_size, paddr + entry * mmu.pte_size); if (len != mmu.pte_size) return (DCMD_ERR); if ((pte & PT_VALID) == 0) continue; if (level == 0 || !(pte & PT_PAGESIZE)) pte &= PT_PADDR;
*** 852,875 **** return (do_report_maps(pfn)); } static int ! do_ptable_dcmd(pfn_t pfn) { struct hat *hatp; struct hat hat; htable_t *ht; htable_t htable; uintptr_t base; int h; - int level; int entry; uintptr_t pagesize; x86pte_t pte; x86pte_t buf; - x86pte32_t *pte32 = (x86pte32_t *)&buf; physaddr_t paddr; size_t len; /* * The hats are kept in a list with khat at the head. --- 822,843 ---- return (do_report_maps(pfn)); } static int ! do_ptable_dcmd(pfn_t pfn, uint64_t level) { struct hat *hatp; struct hat hat; htable_t *ht; htable_t htable; uintptr_t base; int h; int entry; uintptr_t pagesize; x86pte_t pte; x86pte_t buf; physaddr_t paddr; size_t len; /* * The hats are kept in a list with khat at the head.
*** 910,944 **** } found_it: if (htable.ht_pfn == pfn) { mdb_printf("htable=%p\n", ht); level = htable.ht_level; base = htable.ht_vaddr; pagesize = mmu.level_size[level]; } else { ! mdb_printf("Unknown pagetable - assuming level/addr 0"); ! level = 0; /* assume level == 0 for PFN */ base = 0; ! pagesize = MMU_PAGESIZE; } paddr = mmu_ptob((physaddr_t)pfn); for (entry = 0; entry < mmu.ptes_per_table; ++entry) { len = mdb_pread(&buf, mmu.pte_size, paddr + entry * mmu.pte_size); if (len != mmu.pte_size) return (DCMD_ERR); - if (mmu.pte_size == sizeof (x86pte_t)) pte = buf; - else - pte = *pte32; if (pte == 0) continue; ! mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize); do_pte_dcmd(level, pte); } done: return (DCMD_OK); --- 878,917 ---- } found_it: if (htable.ht_pfn == pfn) { mdb_printf("htable=%p\n", ht); + if (level == (uint64_t)-1) { level = htable.ht_level; + } else if (htable.ht_level != level) { + mdb_warn("htable has level %d but forcing level %lu\n", + htable.ht_level, level); + } base = htable.ht_vaddr; pagesize = mmu.level_size[level]; } else { ! if (level == (uint64_t)-1) ! level = 0; ! mdb_warn("couldn't find matching htable, using level=%lu, " ! "base address=0x0\n", level); base = 0; ! pagesize = mmu.level_size[level]; } paddr = mmu_ptob((physaddr_t)pfn); for (entry = 0; entry < mmu.ptes_per_table; ++entry) { len = mdb_pread(&buf, mmu.pte_size, paddr + entry * mmu.pte_size); if (len != mmu.pte_size) return (DCMD_ERR); pte = buf; if (pte == 0) continue; ! mdb_printf("[%3d] va=0x%p ", entry, ! VA_SIGN_EXTEND(base + entry * pagesize)); do_pte_dcmd(level, pte); } done: return (DCMD_OK);
*** 951,960 **** --- 924,934 ---- int ptable_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { pfn_t pfn; uint_t mflag = 0; + uint64_t level = (uint64_t)-1; init_mmu(); if (mmu.num_level == 0) return (DCMD_ERR);
*** 961,978 **** if ((flags & DCMD_ADDRSPEC) == 0) return (DCMD_USAGE); if (mdb_getopts(argc, argv, ! 'm', MDB_OPT_SETBITS, TRUE, &mflag, NULL) != argc) return (DCMD_USAGE); pfn = (pfn_t)addr; if (mflag) pfn = mdb_mfn_to_pfn(pfn); ! return (do_ptable_dcmd(pfn)); } static int do_htables_dcmd(hat_t *hatp) { --- 935,958 ---- if ((flags & DCMD_ADDRSPEC) == 0) return (DCMD_USAGE); if (mdb_getopts(argc, argv, ! 'm', MDB_OPT_SETBITS, TRUE, &mflag, ! 'l', MDB_OPT_UINT64, &level, NULL) != argc) return (DCMD_USAGE); + if (level != (uint64_t)-1 && level > mmu.max_level) { + mdb_warn("invalid level %lu\n", level); + return (DCMD_ERR); + } + pfn = (pfn_t)addr; if (mflag) pfn = mdb_mfn_to_pfn(pfn); ! return (do_ptable_dcmd(pfn, level)); } static int do_htables_dcmd(hat_t *hatp) {
*** 1028,1033 **** --- 1008,1122 ---- return (DCMD_USAGE); hat = (hat_t *)addr; return (do_htables_dcmd(hat)); + } + + static uintptr_t + entry2va(size_t *entries) + { + uintptr_t va = 0; + + for (level_t l = mmu.max_level; l >= 0; l--) + va += entries[l] << mmu.level_shift[l]; + + return (VA_SIGN_EXTEND(va)); + } + + static void + ptmap_report(size_t *entries, uintptr_t start, + boolean_t user, boolean_t writable, boolean_t wflag) + { + uint64_t curva = entry2va(entries); + + mdb_printf("mapped %s,%s range of %lu bytes: %a-%a\n", + user ? "user" : "kernel", writable ? "writable" : "read-only", + curva - start, start, curva - 1); + if (wflag && start >= kernelbase) + (void) mdb_call_dcmd("whatis", start, DCMD_ADDRSPEC, 0, NULL); + } + + int + ptmap_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) + { + physaddr_t paddrs[MAX_NUM_LEVEL] = { 0, }; + size_t entry[MAX_NUM_LEVEL] = { 0, }; + uintptr_t start = (uintptr_t)-1; + boolean_t writable = B_FALSE; + boolean_t user = B_FALSE; + boolean_t wflag = B_FALSE; + level_t curlevel; + + if ((flags & DCMD_ADDRSPEC) == 0) + return (DCMD_USAGE); + + if (mdb_getopts(argc, argv, + 'w', MDB_OPT_SETBITS, TRUE, &wflag, NULL) != argc) + return (DCMD_USAGE); + + init_mmu(); + + if (mmu.num_level == 0) + return (DCMD_ERR); + + curlevel = mmu.max_level; + + paddrs[curlevel] = addr & MMU_PAGEMASK; + + for (;;) { + physaddr_t pte_addr; + x86pte_t pte; + + pte_addr = paddrs[curlevel] + + (entry[curlevel] << mmu.pte_size_shift); + + if (mdb_pread(&pte, sizeof (pte), pte_addr) != sizeof (pte)) { + mdb_warn("couldn't read pte at %p", pte_addr); + return (DCMD_ERR); + } + + if (PTE_GET(pte, PT_VALID) == 0) { + if (start != (uintptr_t)-1) { + ptmap_report(entry, start, + user, writable, wflag); + start = (uintptr_t)-1; + } + } else if (curlevel == 0 || PTE_GET(pte, PT_PAGESIZE)) { + if (start == (uintptr_t)-1) { + start = entry2va(entry); + user = PTE_GET(pte, PT_USER); + writable = PTE_GET(pte, PT_WRITABLE); + } else if (user != PTE_GET(pte, PT_USER) || + writable != PTE_GET(pte, PT_WRITABLE)) { + ptmap_report(entry, start, + user, writable, wflag); + start = entry2va(entry); + user = PTE_GET(pte, PT_USER); + writable = PTE_GET(pte, PT_WRITABLE); + } + } else { + /* Descend a level. */ + physaddr_t pa = mmu_ptob(pte2mfn(pte, curlevel)); + paddrs[--curlevel] = pa; + entry[curlevel] = 0; + continue; + } + + while (++entry[curlevel] == mmu.ptes_per_table) { + /* Ascend back up. */ + entry[curlevel] = 0; + if (curlevel == mmu.max_level) { + if (start != (uintptr_t)-1) { + ptmap_report(entry, start, + user, writable, wflag); + } + goto out; + } + + curlevel++; + } + } + + out: + return (DCMD_OK); }