Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 20,30 ****
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
! * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
--- 20,30 ----
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
! * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
*** 135,145 ****
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
! mmu_tlbflush_entry((caddr_t)va);
} else {
t.cmd = MMUEXT_INVLPG_LOCAL;
t.arg1.linear_addr = (uintptr_t)va;
if (HYPERVISOR_mmuext_op(&t, 1, &count, DOMID_SELF) < 0)
panic("HYPERVISOR_mmuext_op() failed");
--- 135,145 ----
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
! mmu_flush_tlb_page((uintptr_t)va);
} else {
t.cmd = MMUEXT_INVLPG_LOCAL;
t.arg1.linear_addr = (uintptr_t)va;
if (HYPERVISOR_mmuext_op(&t, 1, &count, DOMID_SELF) < 0)
panic("HYPERVISOR_mmuext_op() failed");
*** 152,162 ****
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
! mmu_tlbflush_entry((caddr_t)va);
return;
}
t.cmd = MMUEXT_INVLPG_MULTI;
t.arg1.linear_addr = (uintptr_t)va;
--- 152,162 ----
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
! mmu_flush_tlb_page((uintptr_t)va);
return;
}
t.cmd = MMUEXT_INVLPG_MULTI;
t.arg1.linear_addr = (uintptr_t)va;
*** 619,633 ****
* We also skip if HAT_FREEING because hat_pte_unmap()
* won't zero out the PTE's. That would lead to hitting
* stale PTEs either here or under hat_unload() when we
* steal and unload the same page table in competing
* threads.
*/
! while (hat != NULL &&
! (hat->hat_flags &
! (HAT_VICTIM | HAT_SHARED | HAT_FREEING)) != 0)
hat = hat->hat_next;
if (hat == NULL)
break;
/*
--- 619,637 ----
* We also skip if HAT_FREEING because hat_pte_unmap()
* won't zero out the PTE's. That would lead to hitting
* stale PTEs either here or under hat_unload() when we
* steal and unload the same page table in competing
* threads.
+ *
+ * We skip HATs that belong to CPUs, to make our lives
+ * simpler.
*/
! while (hat != NULL && (hat->hat_flags &
! (HAT_VICTIM | HAT_SHARED | HAT_FREEING |
! HAT_PCP)) != 0) {
hat = hat->hat_next;
+ }
if (hat == NULL)
break;
/*
*** 666,677 ****
for (ht = list; (ht) && (reap); ht = ht->ht_next) {
if (ht->ht_hat == NULL)
continue;
ASSERT(ht->ht_hat == hat);
#if defined(__xpv) && defined(__amd64)
! if (!(ht->ht_flags & HTABLE_VLP) &&
! ht->ht_level == mmu.max_level) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
}
#endif
/*
--- 670,681 ----
for (ht = list; (ht) && (reap); ht = ht->ht_next) {
if (ht->ht_hat == NULL)
continue;
ASSERT(ht->ht_hat == hat);
#if defined(__xpv) && defined(__amd64)
! ASSERT(!(ht->ht_flags & HTABLE_COPIED));
! if (ht->ht_level == mmu.max_level) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
}
#endif
/*
*** 777,796 ****
uintptr_t vaddr,
level_t level,
htable_t *shared)
{
htable_t *ht = NULL;
! uint_t is_vlp;
uint_t is_bare = 0;
uint_t need_to_zero = 1;
int kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP);
if (level < 0 || level > TOP_LEVEL(hat))
panic("htable_alloc(): level %d out of range\n", level);
! is_vlp = (hat->hat_flags & HAT_VLP) && level == VLP_LEVEL;
! if (is_vlp || shared != NULL)
is_bare = 1;
/*
* First reuse a cached htable from the hat_ht_cached field, this
* avoids unnecessary trips through kmem/page allocators.
--- 781,801 ----
uintptr_t vaddr,
level_t level,
htable_t *shared)
{
htable_t *ht = NULL;
! uint_t is_copied;
uint_t is_bare = 0;
uint_t need_to_zero = 1;
int kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP);
if (level < 0 || level > TOP_LEVEL(hat))
panic("htable_alloc(): level %d out of range\n", level);
! is_copied = (hat->hat_flags & HAT_COPIED) &&
! level == hat->hat_max_level;
! if (is_copied || shared != NULL)
is_bare = 1;
/*
* First reuse a cached htable from the hat_ht_cached field, this
* avoids unnecessary trips through kmem/page allocators.
*** 928,941 ****
ht->ht_lock_cnt = 0;
ht->ht_valid_cnt = 0;
}
/*
! * setup flags, etc. for VLP htables
*/
! if (is_vlp) {
! ht->ht_flags |= HTABLE_VLP;
ASSERT(ht->ht_pfn == PFN_INVALID);
need_to_zero = 0;
}
/*
--- 933,946 ----
ht->ht_lock_cnt = 0;
ht->ht_valid_cnt = 0;
}
/*
! * setup flags, etc. for copied page tables.
*/
! if (is_copied) {
! ht->ht_flags |= HTABLE_COPIED;
ASSERT(ht->ht_pfn == PFN_INVALID);
need_to_zero = 0;
}
/*
*** 982,992 ****
*/
if (hat != NULL &&
!(ht->ht_flags & HTABLE_SHARED_PFN) &&
(use_boot_reserve ||
(!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) {
! ASSERT((ht->ht_flags & HTABLE_VLP) == 0);
ASSERT(ht->ht_pfn != PFN_INVALID);
hat_enter(hat);
ht->ht_next = hat->hat_ht_cached;
hat->hat_ht_cached = ht;
hat_exit(hat);
--- 987,997 ----
*/
if (hat != NULL &&
!(ht->ht_flags & HTABLE_SHARED_PFN) &&
(use_boot_reserve ||
(!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) {
! ASSERT((ht->ht_flags & HTABLE_COPIED) == 0);
ASSERT(ht->ht_pfn != PFN_INVALID);
hat_enter(hat);
ht->ht_next = hat->hat_ht_cached;
hat->hat_ht_cached = ht;
hat_exit(hat);
*** 997,1007 ****
* If we have a hardware page table, free it.
* We don't free page tables that are accessed by sharing.
*/
if (ht->ht_flags & HTABLE_SHARED_PFN) {
ASSERT(ht->ht_pfn != PFN_INVALID);
! } else if (!(ht->ht_flags & HTABLE_VLP)) {
ptable_free(ht->ht_pfn);
#if defined(__amd64) && defined(__xpv)
if (ht->ht_level == mmu.max_level && hat != NULL) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
--- 1002,1012 ----
* If we have a hardware page table, free it.
* We don't free page tables that are accessed by sharing.
*/
if (ht->ht_flags & HTABLE_SHARED_PFN) {
ASSERT(ht->ht_pfn != PFN_INVALID);
! } else if (!(ht->ht_flags & HTABLE_COPIED)) {
ptable_free(ht->ht_pfn);
#if defined(__amd64) && defined(__xpv)
if (ht->ht_level == mmu.max_level && hat != NULL) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
*** 1109,1127 ****
#endif
panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE,
found, expect);
/*
! * When a top level VLP page table entry changes, we must issue
! * a reload of cr3 on all processors.
*
! * If we don't need do do that, then we still have to INVLPG against
! * an address covered by the inner page table, as the latest processors
* have TLB-like caches for non-leaf page table entries.
*/
if (!(hat->hat_flags & HAT_FREEING)) {
! hat_tlb_inval(hat, (higher->ht_flags & HTABLE_VLP) ?
DEMAP_ALL_ADDR : old->ht_vaddr);
}
HTABLE_DEC(higher->ht_valid_cnt);
}
--- 1114,1132 ----
#endif
panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE,
found, expect);
/*
! * When a top level PTE changes for a copied htable, we must trigger a
! * hat_pcp_update() on all HAT CPUs.
*
! * If we don't need do do that, then we still have to INVLPG against an
! * address covered by the inner page table, as the latest processors
* have TLB-like caches for non-leaf page table entries.
*/
if (!(hat->hat_flags & HAT_FREEING)) {
! hat_tlb_inval(hat, (higher->ht_flags & HTABLE_COPIED) ?
DEMAP_ALL_ADDR : old->ht_vaddr);
}
HTABLE_DEC(higher->ht_valid_cnt);
}
*** 1146,1164 ****
found = x86pte_cas(higher, entry, 0, newptp);
if ((found & ~PT_REF) != 0)
panic("HAT: ptp not 0, found=" FMT_PTE, found);
/*
! * When any top level VLP page table entry changes, we must issue
! * a reload of cr3 on all processors using it.
* We also need to do this for the kernel hat on PAE 32 bit kernel.
*/
if (
#ifdef __i386
! (higher->ht_hat == kas.a_hat && higher->ht_level == VLP_LEVEL) ||
#endif
! (higher->ht_flags & HTABLE_VLP))
hat_tlb_inval(higher->ht_hat, DEMAP_ALL_ADDR);
}
/*
* Release of hold on an htable. If this is the last use and the pagetable
--- 1151,1171 ----
found = x86pte_cas(higher, entry, 0, newptp);
if ((found & ~PT_REF) != 0)
panic("HAT: ptp not 0, found=" FMT_PTE, found);
/*
! * When a top level PTE changes for a copied htable, we must trigger a
! * hat_pcp_update() on all HAT CPUs.
! *
* We also need to do this for the kernel hat on PAE 32 bit kernel.
*/
if (
#ifdef __i386
! (higher->ht_hat == kas.a_hat &&
! higher->ht_level == higher->ht_hat->hat_max_level) ||
#endif
! (higher->ht_flags & HTABLE_COPIED))
hat_tlb_inval(higher->ht_hat, DEMAP_ALL_ADDR);
}
/*
* Release of hold on an htable. If this is the last use and the pagetable
*** 1293,1303 ****
#if defined(__amd64)
/*
* 32 bit address spaces on 64 bit kernels need to check
* for overflow of the 32 bit address space
*/
! if ((hat->hat_flags & HAT_VLP) && vaddr >= ((uint64_t)1 << 32))
return (NULL);
#endif
base = 0;
} else {
base = vaddr & LEVEL_MASK(level + 1);
--- 1300,1311 ----
#if defined(__amd64)
/*
* 32 bit address spaces on 64 bit kernels need to check
* for overflow of the 32 bit address space
*/
! if ((hat->hat_flags & HAT_COPIED_32) &&
! vaddr >= ((uint64_t)1 << 32))
return (NULL);
#endif
base = 0;
} else {
base = vaddr & LEVEL_MASK(level + 1);
*** 1941,1954 ****
*/
static x86pte_t *
x86pte_access_pagetable(htable_t *ht, uint_t index)
{
/*
! * VLP pagetables are contained in the hat_t
*/
! if (ht->ht_flags & HTABLE_VLP)
! return (PT_INDEX_PTR(ht->ht_hat->hat_vlp_ptes, index));
return (x86pte_mapin(ht->ht_pfn, index, ht));
}
/*
* map the given pfn into the page table window.
--- 1949,1964 ----
*/
static x86pte_t *
x86pte_access_pagetable(htable_t *ht, uint_t index)
{
/*
! * HTABLE_COPIED pagetables are contained in the hat_t
*/
! if (ht->ht_flags & HTABLE_COPIED) {
! ASSERT3U(index, <, ht->ht_hat->hat_num_copied);
! return (PT_INDEX_PTR(ht->ht_hat->hat_copied_ptes, index));
! }
return (x86pte_mapin(ht->ht_pfn, index, ht));
}
/*
* map the given pfn into the page table window.
*** 1977,1987 ****
--- 1987,2000 ----
/*
* Disable preemption and grab the CPU's hci_mutex
*/
kpreempt_disable();
+
ASSERT(CPU->cpu_hat_info != NULL);
+ ASSERT(!(getcr4() & CR4_PCIDE));
+
mutex_enter(&CPU->cpu_hat_info->hci_mutex);
x = PWIN_TABLE(CPU->cpu_id);
pteptr = (x86pte_t *)PWIN_PTE_VA(x);
#ifndef __xpv
if (mmu.pae_hat)
*** 2012,2022 ****
if (mmu.pae_hat)
*pteptr = newpte;
else
*(x86pte32_t *)pteptr = newpte;
XPV_DISALLOW_PAGETABLE_UPDATES();
! mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
}
}
return (PT_INDEX_PTR(PWIN_VA(x), index));
}
--- 2025,2035 ----
if (mmu.pae_hat)
*pteptr = newpte;
else
*(x86pte32_t *)pteptr = newpte;
XPV_DISALLOW_PAGETABLE_UPDATES();
! mmu_flush_tlb_kpage((uintptr_t)PWIN_VA(x));
}
}
return (PT_INDEX_PTR(PWIN_VA(x), index));
}
*** 2024,2037 ****
* Release access to a page table.
*/
static void
x86pte_release_pagetable(htable_t *ht)
{
! /*
! * nothing to do for VLP htables
! */
! if (ht->ht_flags & HTABLE_VLP)
return;
x86pte_mapout();
}
--- 2037,2047 ----
* Release access to a page table.
*/
static void
x86pte_release_pagetable(htable_t *ht)
{
! if (ht->ht_flags & HTABLE_COPIED)
return;
x86pte_mapout();
}
*** 2128,2138 ****
#ifdef __xpv
if (!IN_XPV_PANIC())
xen_flush_va((caddr_t)addr);
else
#endif
! mmu_tlbflush_entry((caddr_t)addr);
goto done;
}
/*
* Detect if we have a collision of installing a large
--- 2138,2148 ----
#ifdef __xpv
if (!IN_XPV_PANIC())
xen_flush_va((caddr_t)addr);
else
#endif
! mmu_flush_tlb_page(addr);
goto done;
}
/*
* Detect if we have a collision of installing a large
*** 2187,2197 ****
int cnt = 1;
int count;
maddr_t ma;
if (!IN_XPV_PANIC()) {
! ASSERT(!(ht->ht_flags & HTABLE_VLP)); /* no VLP yet */
ma = pa_to_ma(PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry));
t[0].ptr = ma | MMU_NORMAL_PT_UPDATE;
t[0].val = new;
#if defined(__amd64)
--- 2197,2207 ----
int cnt = 1;
int count;
maddr_t ma;
if (!IN_XPV_PANIC()) {
! ASSERT(!(ht->ht_flags & HTABLE_COPIED));
ma = pa_to_ma(PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry));
t[0].ptr = ma | MMU_NORMAL_PT_UPDATE;
t[0].val = new;
#if defined(__amd64)
*** 2344,2354 ****
#ifndef __xpv
/*
* Copy page tables - this is just a little more complicated than the
* previous routines. Note that it's also not atomic! It also is never
! * used for VLP pagetables.
*/
void
x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
{
caddr_t src_va;
--- 2354,2364 ----
#ifndef __xpv
/*
* Copy page tables - this is just a little more complicated than the
* previous routines. Note that it's also not atomic! It also is never
! * used for HTABLE_COPIED pagetables.
*/
void
x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
{
caddr_t src_va;
*** 2356,2367 ****
size_t size;
x86pte_t *pteptr;
x86pte_t pte;
ASSERT(khat_running);
! ASSERT(!(dest->ht_flags & HTABLE_VLP));
! ASSERT(!(src->ht_flags & HTABLE_VLP));
ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN));
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
/*
* Acquire access to the CPU pagetable windows for the dest and source.
--- 2366,2377 ----
size_t size;
x86pte_t *pteptr;
x86pte_t pte;
ASSERT(khat_running);
! ASSERT(!(dest->ht_flags & HTABLE_COPIED));
! ASSERT(!(src->ht_flags & HTABLE_COPIED));
ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN));
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
/*
* Acquire access to the CPU pagetable windows for the dest and source.
*** 2371,2380 ****
--- 2381,2392 ----
src_va = (caddr_t)
PT_INDEX_PTR(hat_kpm_pfn2va(src->ht_pfn), entry);
} else {
uint_t x = PWIN_SRC(CPU->cpu_id);
+ ASSERT(!(getcr4() & CR4_PCIDE));
+
/*
* Finish defining the src pagetable mapping
*/
src_va = (caddr_t)PT_INDEX_PTR(PWIN_VA(x), entry);
pte = MAKEPTE(src->ht_pfn, 0) | mmu.pt_global | mmu.pt_nx;
*** 2381,2391 ****
pteptr = (x86pte_t *)PWIN_PTE_VA(x);
if (mmu.pae_hat)
*pteptr = pte;
else
*(x86pte32_t *)pteptr = pte;
! mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
}
/*
* now do the copy
*/
--- 2393,2403 ----
pteptr = (x86pte_t *)PWIN_PTE_VA(x);
if (mmu.pae_hat)
*pteptr = pte;
else
*(x86pte32_t *)pteptr = pte;
! mmu_flush_tlb_kpage((uintptr_t)PWIN_VA(x));
}
/*
* now do the copy
*/
*** 2448,2458 ****
/*
* Map in the page table to be zeroed.
*/
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
! ASSERT(!(dest->ht_flags & HTABLE_VLP));
/*
* On the hypervisor we don't use x86pte_access_pagetable() since
* in this case the page is not pinned yet.
*/
--- 2460,2470 ----
/*
* Map in the page table to be zeroed.
*/
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
! ASSERT(!(dest->ht_flags & HTABLE_COPIED));
/*
* On the hypervisor we don't use x86pte_access_pagetable() since
* in this case the page is not pinned yet.
*/
*** 2502,2512 ****
* Dump all page tables
*/
for (hat = kas.a_hat; hat != NULL; hat = hat->hat_next) {
for (h = 0; h < hat->hat_num_hash; ++h) {
for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
! if ((ht->ht_flags & HTABLE_VLP) == 0)
dump_page(ht->ht_pfn);
}
}
}
}
--- 2514,2524 ----
* Dump all page tables
*/
for (hat = kas.a_hat; hat != NULL; hat = hat->hat_next) {
for (h = 0; h < hat->hat_num_hash; ++h) {
for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
! if ((ht->ht_flags & HTABLE_COPIED) == 0)
dump_page(ht->ht_pfn);
}
}
}
}