Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
*** 22,31 ****
--- 22,32 ----
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2018 Joyent, Inc.
*/
#ifndef _VM_HAT_I86_H
#define _VM_HAT_I86_H
*** 60,83 ****
*
* hment_t - Links together multiple PTEs to a single page.
*/
/*
! * VLP processes have a 32 bit address range, so their top level is 2 and
! * with only 4 PTEs in that table.
*/
! #define VLP_LEVEL (2)
! #define VLP_NUM_PTES (4)
! #define VLP_SIZE (VLP_NUM_PTES * sizeof (x86pte_t))
! #define TOP_LEVEL(h) (((h)->hat_flags & HAT_VLP) ? VLP_LEVEL : mmu.max_level)
! #define VLP_COPY(fromptep, toptep) { \
! toptep[0] = fromptep[0]; \
! toptep[1] = fromptep[1]; \
! toptep[2] = fromptep[2]; \
! toptep[3] = fromptep[3]; \
! }
/*
* The hat struct exists for each address space.
*/
struct hat {
kmutex_t hat_mutex;
--- 61,90 ----
*
* hment_t - Links together multiple PTEs to a single page.
*/
/*
! * Maximum number of per-CPU pagetable entries that we'll need to cache in the
! * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
! * information.
*/
! #if defined(__xpv)
! /*
! * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
! * struct member for it at least to make life easier and not make the member
! * conditional.
! */
! #define MAX_COPIED_PTES 1
! #else
! /*
! * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
! */
! #define MAX_COPIED_PTES 512
! #endif /* __xpv */
+ #define TOP_LEVEL(h) (((h)->hat_max_level))
+
/*
* The hat struct exists for each address space.
*/
struct hat {
kmutex_t hat_mutex;
*** 85,101 ****
uint_t hat_stats;
pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1];
pgcnt_t hat_ism_pgcnt;
cpuset_t hat_cpus;
uint16_t hat_flags;
htable_t *hat_htable; /* top level htable */
struct hat *hat_next;
struct hat *hat_prev;
uint_t hat_num_hash; /* number of htable hash buckets */
htable_t **hat_ht_hash; /* htable hash buckets */
htable_t *hat_ht_cached; /* cached free htables */
! x86pte_t hat_vlp_ptes[VLP_NUM_PTES];
#if defined(__amd64) && defined(__xpv)
pfn_t hat_user_ptable; /* alt top ptable for user mode */
#endif
};
typedef struct hat hat_t;
--- 92,110 ----
uint_t hat_stats;
pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1];
pgcnt_t hat_ism_pgcnt;
cpuset_t hat_cpus;
uint16_t hat_flags;
+ uint8_t hat_max_level; /* top level of this HAT */
+ uint_t hat_num_copied; /* Actual num of hat_copied_ptes[] */
htable_t *hat_htable; /* top level htable */
struct hat *hat_next;
struct hat *hat_prev;
uint_t hat_num_hash; /* number of htable hash buckets */
htable_t **hat_ht_hash; /* htable hash buckets */
htable_t *hat_ht_cached; /* cached free htables */
! x86pte_t hat_copied_ptes[MAX_COPIED_PTES];
#if defined(__amd64) && defined(__xpv)
pfn_t hat_user_ptable; /* alt top ptable for user mode */
#endif
};
typedef struct hat hat_t;
*** 104,134 ****
atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
#define PGCNT_DEC(hat, level) \
atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
/*
! * Flags for the hat_flags field
*
* HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
* demap()s can be avoided.
*
! * HAT_VLP - indicates a 32 bit process has a virtual address range less than
! * the hardware's physical address range. (VLP->Virtual Less-than Physical)
! * Note - never used on the hypervisor.
*
* HAT_VICTIM - This is set while a hat is being examined for page table
* stealing and prevents it from being freed.
*
* HAT_SHARED - The hat has exported it's page tables via hat_share()
*
* HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
*/
#define HAT_FREEING (0x0001)
! #define HAT_VLP (0x0002)
! #define HAT_VICTIM (0x0004)
! #define HAT_SHARED (0x0008)
! #define HAT_PINNED (0x0010)
/*
* Additional platform attribute for hat_devload() to force no caching.
*/
#define HAT_PLAT_NOCACHE (0x100000)
--- 113,150 ----
atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
#define PGCNT_DEC(hat, level) \
atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
/*
! * Flags for the hat_flags field. For more information, please see the big
! * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
*
* HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
* demap()s can be avoided.
*
! * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
! * big comment in hat_i86.c for a description.
*
+ * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
+ *
* HAT_VICTIM - This is set while a hat is being examined for page table
* stealing and prevents it from being freed.
*
* HAT_SHARED - The hat has exported it's page tables via hat_share()
*
* HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
+ *
+ * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
+ * not a process).
*/
#define HAT_FREEING (0x0001)
! #define HAT_VICTIM (0x0002)
! #define HAT_SHARED (0x0004)
! #define HAT_PINNED (0x0008)
! #define HAT_COPIED (0x0010)
! #define HAT_COPIED_32 (0x0020)
! #define HAT_PCP (0x0040)
/*
* Additional platform attribute for hat_devload() to force no caching.
*/
#define HAT_PLAT_NOCACHE (0x100000)
*** 153,162 ****
--- 169,181 ----
ulong_t hs_hm_put_reserve;
ulong_t hs_hm_get_reserve;
ulong_t hs_hm_steals;
ulong_t hs_hm_steal_exam;
ulong_t hs_tlb_inval_delayed;
+ ulong_t hs_hat_copied64;
+ ulong_t hs_hat_copied32;
+ ulong_t hs_hat_normal64;
};
extern struct hatstats hatstat;
#ifdef DEBUG
#define HATSTAT_INC(x) (++hatstat.x)
#else
*** 226,236 ****
* x86 specific routines for use online in setup or i86pc/vm files
*/
extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
caddr_t ekernelheap);
extern void hat_kern_setup(void);
- extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
extern void hat_init_finish(void);
extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
extern pfn_t hat_kpm_va2pfn(caddr_t);
--- 245,254 ----
*** 238,280 ****
extern uintptr_t hat_kernelbase(uintptr_t);
extern void hat_kmap_init(uintptr_t base, size_t len);
extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
! #if !defined(__xpv)
! /*
! * routines to deal with delayed TLB invalidations for idle CPUs
! */
! extern void tlb_going_idle(void);
! extern void tlb_service(void);
! #endif
/*
! * Hat switch function invoked to load a new context into %cr3
*/
! extern void hat_switch(struct hat *hat);
! #ifdef __xpv
/*
* Interfaces to use around code that maps/unmaps grant table references.
*/
extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
extern void hat_release_mapping(hat_t *, caddr_t);
- #define XPV_DISALLOW_MIGRATE() xen_block_migrate()
- #define XPV_ALLOW_MIGRATE() xen_allow_migrate()
-
#else
#define XPV_DISALLOW_MIGRATE() /* nothing */
#define XPV_ALLOW_MIGRATE() /* nothing */
#define pfn_is_foreign(pfn) __lintzero
! #endif
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
--- 256,321 ----
extern uintptr_t hat_kernelbase(uintptr_t);
extern void hat_kmap_init(uintptr_t base, size_t len);
extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
! extern void mmu_calc_user_slots(void);
! extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
! extern void hat_switch(struct hat *hat);
+ #define TLB_RANGE_LEN(r) ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
+
/*
! * A range of virtual pages for purposes of demapping.
*/
! typedef struct tlb_range {
! uintptr_t tr_va; /* address of page */
! ulong_t tr_cnt; /* number of pages in range */
! int8_t tr_level; /* page table level */
! } tlb_range_t;
! #if defined(__xpv)
!
! #define XPV_DISALLOW_MIGRATE() xen_block_migrate()
! #define XPV_ALLOW_MIGRATE() xen_allow_migrate()
!
! #define mmu_flush_tlb_page(va) mmu_invlpg((caddr_t)va)
! #define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va)
!
/*
* Interfaces to use around code that maps/unmaps grant table references.
*/
extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
extern void hat_release_mapping(hat_t *, caddr_t);
#else
#define XPV_DISALLOW_MIGRATE() /* nothing */
#define XPV_ALLOW_MIGRATE() /* nothing */
#define pfn_is_foreign(pfn) __lintzero
! typedef enum flush_tlb_type {
! FLUSH_TLB_ALL = 1,
! FLUSH_TLB_NONGLOBAL = 2,
! FLUSH_TLB_RANGE = 3,
! } flush_tlb_type_t;
+ extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
+ extern void mmu_flush_tlb_kpage(uintptr_t);
+ extern void mmu_flush_tlb_page(uintptr_t);
+ extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
+
+ /*
+ * routines to deal with delayed TLB invalidations for idle CPUs
+ */
+ extern void tlb_going_idle(void);
+ extern void tlb_service(void);
+
+ #endif /* !__xpv */
+
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif