Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
@@ -25,11 +25,11 @@
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
*/
/*
- * Copyright 2016 Joyent, Inc.
+ * Copyright 2018 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/types.h>
#include <sys/thread.h>
@@ -78,14 +78,14 @@
#include <sys/hypervisor.h>
#endif
#include <sys/cpu_module.h>
#include <sys/ontrap.h>
-struct cpu cpus[1]; /* CPU data */
-struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */
-struct cpu *cpu_free_list; /* list for released CPUs */
-cpu_core_t cpu_core[NCPU]; /* cpu_core structures */
+struct cpu cpus[1] __aligned(MMU_PAGESIZE);
+struct cpu *cpu[NCPU] = {&cpus[0]};
+struct cpu *cpu_free_list;
+cpu_core_t cpu_core[NCPU];
#define cpu_next_free cpu_prev
/*
* Useful for disabling MP bring-up on a MP capable system.
@@ -166,25 +166,25 @@
void
init_cpu_syscall(struct cpu *cp)
{
kpreempt_disable();
-#if defined(__amd64)
if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
uint64_t flags;
-#if !defined(__lint)
+#if !defined(__xpv)
/*
* The syscall instruction imposes a certain ordering on
* segment selectors, so we double-check that ordering
* here.
*/
- ASSERT(KDS_SEL == KCS_SEL + 8);
- ASSERT(UDS_SEL == U32CS_SEL + 8);
- ASSERT(UCS_SEL == U32CS_SEL + 16);
+ CTASSERT(KDS_SEL == KCS_SEL + 8);
+ CTASSERT(UDS_SEL == U32CS_SEL + 8);
+ CTASSERT(UCS_SEL == U32CS_SEL + 16);
#endif
+
/*
* Turn syscall/sysret extensions on.
*/
cpu_asysc_enable();
@@ -191,12 +191,21 @@
/*
* Program the magic registers ..
*/
wrmsr(MSR_AMD_STAR,
((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32);
- wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall);
- wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32);
+ if (kpti_enable == 1) {
+ wrmsr(MSR_AMD_LSTAR,
+ (uint64_t)(uintptr_t)tr_sys_syscall);
+ wrmsr(MSR_AMD_CSTAR,
+ (uint64_t)(uintptr_t)tr_sys_syscall32);
+ } else {
+ wrmsr(MSR_AMD_LSTAR,
+ (uint64_t)(uintptr_t)sys_syscall);
+ wrmsr(MSR_AMD_CSTAR,
+ (uint64_t)(uintptr_t)sys_syscall32);
+ }
/*
* This list of flags is masked off the incoming
* %rfl when we enter the kernel.
*/
@@ -203,48 +212,48 @@
flags = PS_IE | PS_T;
if (is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE)
flags |= PS_ACHK;
wrmsr(MSR_AMD_SFMASK, flags);
}
-#endif
/*
- * On 32-bit kernels, we use sysenter/sysexit because it's too
- * hard to use syscall/sysret, and it is more portable anyway.
- *
* On 64-bit kernels on Nocona machines, the 32-bit syscall
* variant isn't available to 32-bit applications, but sysenter is.
*/
if (is_x86_feature(x86_featureset, X86FSET_MSR) &&
is_x86_feature(x86_featureset, X86FSET_SEP)) {
-#if !defined(__lint)
+#if !defined(__xpv)
/*
* The sysenter instruction imposes a certain ordering on
* segment selectors, so we double-check that ordering
* here. See "sysenter" in Intel document 245471-012, "IA-32
* Intel Architecture Software Developer's Manual Volume 2:
* Instruction Set Reference"
*/
- ASSERT(KDS_SEL == KCS_SEL + 8);
+ CTASSERT(KDS_SEL == KCS_SEL + 8);
- ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3));
- ASSERT32(UDS_SEL == UCS_SEL + 8);
-
- ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3));
- ASSERT64(UDS_SEL == U32CS_SEL + 8);
+ CTASSERT(U32CS_SEL == ((KCS_SEL + 16) | 3));
+ CTASSERT(UDS_SEL == U32CS_SEL + 8);
#endif
cpu_sep_enable();
/*
* resume() sets this value to the base of the threads stack
* via a context handler.
*/
wrmsr(MSR_INTC_SEP_ESP, 0);
- wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter);
+
+ if (kpti_enable == 1) {
+ wrmsr(MSR_INTC_SEP_EIP,
+ (uint64_t)(uintptr_t)tr_sys_sysenter);
+ } else {
+ wrmsr(MSR_INTC_SEP_EIP,
+ (uint64_t)(uintptr_t)sys_sysenter);
}
+ }
kpreempt_enable();
}
#if !defined(__xpv)
@@ -416,24 +425,24 @@
set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA,
SEL_KPL, 0, 1);
#endif
/*
- * If we have more than one node, each cpu gets a copy of IDT
- * local to its node. If this is a Pentium box, we use cpu 0's
- * IDT. cpu 0's IDT has been made read-only to workaround the
- * cmpxchgl register bug
+ * Allocate pages for the CPU LDT.
*/
- if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) {
+ cp->cpu_m.mcpu_ldt = kmem_zalloc(LDT_CPU_SIZE, KM_SLEEP);
+ cp->cpu_m.mcpu_ldt_len = 0;
+
+ /*
+ * Allocate a per-CPU IDT and initialize the new IDT to the currently
+ * runing CPU.
+ */
#if !defined(__lint)
ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE);
#endif
- cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP);
+ cp->cpu_idt = kmem_alloc(PAGESIZE, KM_SLEEP);
bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE);
- } else {
- cp->cpu_idt = CPU->cpu_idt;
- }
/*
* alloc space for cpuid info
*/
cpuid_alloc_space(cp);
@@ -567,10 +576,14 @@
if (cp->cpu_idt != CPU->cpu_idt)
kmem_free(cp->cpu_idt, PAGESIZE);
cp->cpu_idt = NULL;
+ kmem_free(cp->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
+ cp->cpu_m.mcpu_ldt = NULL;
+ cp->cpu_m.mcpu_ldt_len = 0;
+
kmem_free(cp->cpu_gdt, PAGESIZE);
cp->cpu_gdt = NULL;
if (cp->cpu_supp_freqs != NULL) {
size_t len = strlen(cp->cpu_supp_freqs) + 1;
@@ -1781,10 +1794,12 @@
* it's set. So the startup thread may have no chance to switch back
* again if it's switched away with CPU_QUIESCED set.
*/
cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED);
+ enable_pcid();
+
/*
* Setup this processor for XSAVE.
*/
if (fp_save_mech == FP_XSAVE) {
xsave_setup_msr(cp);