Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -20,11 +20,11 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
*/
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/kmem.h>
@@ -135,11 +135,11 @@
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
- mmu_tlbflush_entry((caddr_t)va);
+ mmu_flush_tlb_page((uintptr_t)va);
} else {
t.cmd = MMUEXT_INVLPG_LOCAL;
t.arg1.linear_addr = (uintptr_t)va;
if (HYPERVISOR_mmuext_op(&t, 1, &count, DOMID_SELF) < 0)
panic("HYPERVISOR_mmuext_op() failed");
@@ -152,11 +152,11 @@
{
struct mmuext_op t;
uint_t count;
if (IN_XPV_PANIC()) {
- mmu_tlbflush_entry((caddr_t)va);
+ mmu_flush_tlb_page((uintptr_t)va);
return;
}
t.cmd = MMUEXT_INVLPG_MULTI;
t.arg1.linear_addr = (uintptr_t)va;
@@ -619,15 +619,19 @@
* We also skip if HAT_FREEING because hat_pte_unmap()
* won't zero out the PTE's. That would lead to hitting
* stale PTEs either here or under hat_unload() when we
* steal and unload the same page table in competing
* threads.
+ *
+ * We skip HATs that belong to CPUs, to make our lives
+ * simpler.
*/
- while (hat != NULL &&
- (hat->hat_flags &
- (HAT_VICTIM | HAT_SHARED | HAT_FREEING)) != 0)
+ while (hat != NULL && (hat->hat_flags &
+ (HAT_VICTIM | HAT_SHARED | HAT_FREEING |
+ HAT_PCP)) != 0) {
hat = hat->hat_next;
+ }
if (hat == NULL)
break;
/*
@@ -666,12 +670,12 @@
for (ht = list; (ht) && (reap); ht = ht->ht_next) {
if (ht->ht_hat == NULL)
continue;
ASSERT(ht->ht_hat == hat);
#if defined(__xpv) && defined(__amd64)
- if (!(ht->ht_flags & HTABLE_VLP) &&
- ht->ht_level == mmu.max_level) {
+ ASSERT(!(ht->ht_flags & HTABLE_COPIED));
+ if (ht->ht_level == mmu.max_level) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
}
#endif
/*
@@ -777,20 +781,21 @@
uintptr_t vaddr,
level_t level,
htable_t *shared)
{
htable_t *ht = NULL;
- uint_t is_vlp;
+ uint_t is_copied;
uint_t is_bare = 0;
uint_t need_to_zero = 1;
int kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP);
if (level < 0 || level > TOP_LEVEL(hat))
panic("htable_alloc(): level %d out of range\n", level);
- is_vlp = (hat->hat_flags & HAT_VLP) && level == VLP_LEVEL;
- if (is_vlp || shared != NULL)
+ is_copied = (hat->hat_flags & HAT_COPIED) &&
+ level == hat->hat_max_level;
+ if (is_copied || shared != NULL)
is_bare = 1;
/*
* First reuse a cached htable from the hat_ht_cached field, this
* avoids unnecessary trips through kmem/page allocators.
@@ -928,14 +933,14 @@
ht->ht_lock_cnt = 0;
ht->ht_valid_cnt = 0;
}
/*
- * setup flags, etc. for VLP htables
+ * setup flags, etc. for copied page tables.
*/
- if (is_vlp) {
- ht->ht_flags |= HTABLE_VLP;
+ if (is_copied) {
+ ht->ht_flags |= HTABLE_COPIED;
ASSERT(ht->ht_pfn == PFN_INVALID);
need_to_zero = 0;
}
/*
@@ -982,11 +987,11 @@
*/
if (hat != NULL &&
!(ht->ht_flags & HTABLE_SHARED_PFN) &&
(use_boot_reserve ||
(!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) {
- ASSERT((ht->ht_flags & HTABLE_VLP) == 0);
+ ASSERT((ht->ht_flags & HTABLE_COPIED) == 0);
ASSERT(ht->ht_pfn != PFN_INVALID);
hat_enter(hat);
ht->ht_next = hat->hat_ht_cached;
hat->hat_ht_cached = ht;
hat_exit(hat);
@@ -997,11 +1002,11 @@
* If we have a hardware page table, free it.
* We don't free page tables that are accessed by sharing.
*/
if (ht->ht_flags & HTABLE_SHARED_PFN) {
ASSERT(ht->ht_pfn != PFN_INVALID);
- } else if (!(ht->ht_flags & HTABLE_VLP)) {
+ } else if (!(ht->ht_flags & HTABLE_COPIED)) {
ptable_free(ht->ht_pfn);
#if defined(__amd64) && defined(__xpv)
if (ht->ht_level == mmu.max_level && hat != NULL) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
@@ -1109,19 +1114,19 @@
#endif
panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE,
found, expect);
/*
- * When a top level VLP page table entry changes, we must issue
- * a reload of cr3 on all processors.
+ * When a top level PTE changes for a copied htable, we must trigger a
+ * hat_pcp_update() on all HAT CPUs.
*
- * If we don't need do do that, then we still have to INVLPG against
- * an address covered by the inner page table, as the latest processors
+ * If we don't need do do that, then we still have to INVLPG against an
+ * address covered by the inner page table, as the latest processors
* have TLB-like caches for non-leaf page table entries.
*/
if (!(hat->hat_flags & HAT_FREEING)) {
- hat_tlb_inval(hat, (higher->ht_flags & HTABLE_VLP) ?
+ hat_tlb_inval(hat, (higher->ht_flags & HTABLE_COPIED) ?
DEMAP_ALL_ADDR : old->ht_vaddr);
}
HTABLE_DEC(higher->ht_valid_cnt);
}
@@ -1146,19 +1151,21 @@
found = x86pte_cas(higher, entry, 0, newptp);
if ((found & ~PT_REF) != 0)
panic("HAT: ptp not 0, found=" FMT_PTE, found);
/*
- * When any top level VLP page table entry changes, we must issue
- * a reload of cr3 on all processors using it.
+ * When a top level PTE changes for a copied htable, we must trigger a
+ * hat_pcp_update() on all HAT CPUs.
+ *
* We also need to do this for the kernel hat on PAE 32 bit kernel.
*/
if (
#ifdef __i386
- (higher->ht_hat == kas.a_hat && higher->ht_level == VLP_LEVEL) ||
+ (higher->ht_hat == kas.a_hat &&
+ higher->ht_level == higher->ht_hat->hat_max_level) ||
#endif
- (higher->ht_flags & HTABLE_VLP))
+ (higher->ht_flags & HTABLE_COPIED))
hat_tlb_inval(higher->ht_hat, DEMAP_ALL_ADDR);
}
/*
* Release of hold on an htable. If this is the last use and the pagetable
@@ -1293,11 +1300,12 @@
#if defined(__amd64)
/*
* 32 bit address spaces on 64 bit kernels need to check
* for overflow of the 32 bit address space
*/
- if ((hat->hat_flags & HAT_VLP) && vaddr >= ((uint64_t)1 << 32))
+ if ((hat->hat_flags & HAT_COPIED_32) &&
+ vaddr >= ((uint64_t)1 << 32))
return (NULL);
#endif
base = 0;
} else {
base = vaddr & LEVEL_MASK(level + 1);
@@ -1941,14 +1949,16 @@
*/
static x86pte_t *
x86pte_access_pagetable(htable_t *ht, uint_t index)
{
/*
- * VLP pagetables are contained in the hat_t
+ * HTABLE_COPIED pagetables are contained in the hat_t
*/
- if (ht->ht_flags & HTABLE_VLP)
- return (PT_INDEX_PTR(ht->ht_hat->hat_vlp_ptes, index));
+ if (ht->ht_flags & HTABLE_COPIED) {
+ ASSERT3U(index, <, ht->ht_hat->hat_num_copied);
+ return (PT_INDEX_PTR(ht->ht_hat->hat_copied_ptes, index));
+ }
return (x86pte_mapin(ht->ht_pfn, index, ht));
}
/*
* map the given pfn into the page table window.
@@ -1977,11 +1987,14 @@
/*
* Disable preemption and grab the CPU's hci_mutex
*/
kpreempt_disable();
+
ASSERT(CPU->cpu_hat_info != NULL);
+ ASSERT(!(getcr4() & CR4_PCIDE));
+
mutex_enter(&CPU->cpu_hat_info->hci_mutex);
x = PWIN_TABLE(CPU->cpu_id);
pteptr = (x86pte_t *)PWIN_PTE_VA(x);
#ifndef __xpv
if (mmu.pae_hat)
@@ -2012,11 +2025,11 @@
if (mmu.pae_hat)
*pteptr = newpte;
else
*(x86pte32_t *)pteptr = newpte;
XPV_DISALLOW_PAGETABLE_UPDATES();
- mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
+ mmu_flush_tlb_kpage((uintptr_t)PWIN_VA(x));
}
}
return (PT_INDEX_PTR(PWIN_VA(x), index));
}
@@ -2024,14 +2037,11 @@
* Release access to a page table.
*/
static void
x86pte_release_pagetable(htable_t *ht)
{
- /*
- * nothing to do for VLP htables
- */
- if (ht->ht_flags & HTABLE_VLP)
+ if (ht->ht_flags & HTABLE_COPIED)
return;
x86pte_mapout();
}
@@ -2128,11 +2138,11 @@
#ifdef __xpv
if (!IN_XPV_PANIC())
xen_flush_va((caddr_t)addr);
else
#endif
- mmu_tlbflush_entry((caddr_t)addr);
+ mmu_flush_tlb_page(addr);
goto done;
}
/*
* Detect if we have a collision of installing a large
@@ -2187,11 +2197,11 @@
int cnt = 1;
int count;
maddr_t ma;
if (!IN_XPV_PANIC()) {
- ASSERT(!(ht->ht_flags & HTABLE_VLP)); /* no VLP yet */
+ ASSERT(!(ht->ht_flags & HTABLE_COPIED));
ma = pa_to_ma(PT_INDEX_PHYSADDR(pfn_to_pa(ht->ht_pfn), entry));
t[0].ptr = ma | MMU_NORMAL_PT_UPDATE;
t[0].val = new;
#if defined(__amd64)
@@ -2344,11 +2354,11 @@
#ifndef __xpv
/*
* Copy page tables - this is just a little more complicated than the
* previous routines. Note that it's also not atomic! It also is never
- * used for VLP pagetables.
+ * used for HTABLE_COPIED pagetables.
*/
void
x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count)
{
caddr_t src_va;
@@ -2356,12 +2366,12 @@
size_t size;
x86pte_t *pteptr;
x86pte_t pte;
ASSERT(khat_running);
- ASSERT(!(dest->ht_flags & HTABLE_VLP));
- ASSERT(!(src->ht_flags & HTABLE_VLP));
+ ASSERT(!(dest->ht_flags & HTABLE_COPIED));
+ ASSERT(!(src->ht_flags & HTABLE_COPIED));
ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN));
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
/*
* Acquire access to the CPU pagetable windows for the dest and source.
@@ -2371,10 +2381,12 @@
src_va = (caddr_t)
PT_INDEX_PTR(hat_kpm_pfn2va(src->ht_pfn), entry);
} else {
uint_t x = PWIN_SRC(CPU->cpu_id);
+ ASSERT(!(getcr4() & CR4_PCIDE));
+
/*
* Finish defining the src pagetable mapping
*/
src_va = (caddr_t)PT_INDEX_PTR(PWIN_VA(x), entry);
pte = MAKEPTE(src->ht_pfn, 0) | mmu.pt_global | mmu.pt_nx;
@@ -2381,11 +2393,11 @@
pteptr = (x86pte_t *)PWIN_PTE_VA(x);
if (mmu.pae_hat)
*pteptr = pte;
else
*(x86pte32_t *)pteptr = pte;
- mmu_tlbflush_entry((caddr_t)(PWIN_VA(x)));
+ mmu_flush_tlb_kpage((uintptr_t)PWIN_VA(x));
}
/*
* now do the copy
*/
@@ -2448,11 +2460,11 @@
/*
* Map in the page table to be zeroed.
*/
ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN));
- ASSERT(!(dest->ht_flags & HTABLE_VLP));
+ ASSERT(!(dest->ht_flags & HTABLE_COPIED));
/*
* On the hypervisor we don't use x86pte_access_pagetable() since
* in this case the page is not pinned yet.
*/
@@ -2502,11 +2514,11 @@
* Dump all page tables
*/
for (hat = kas.a_hat; hat != NULL; hat = hat->hat_next) {
for (h = 0; h < hat->hat_num_hash; ++h) {
for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
- if ((ht->ht_flags & HTABLE_VLP) == 0)
+ if ((ht->ht_flags & HTABLE_COPIED) == 0)
dump_page(ht->ht_pfn);
}
}
}
}