Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 25,35 **** /* * Copyright (c) 2010, Intel Corporation. * All rights reserved. */ /* ! * Copyright 2016 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> #include <sys/thread.h> --- 25,35 ---- /* * Copyright (c) 2010, Intel Corporation. * All rights reserved. */ /* ! * Copyright 2018 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. */ #include <sys/types.h> #include <sys/thread.h>
*** 78,91 **** #include <sys/hypervisor.h> #endif #include <sys/cpu_module.h> #include <sys/ontrap.h> ! struct cpu cpus[1]; /* CPU data */ ! struct cpu *cpu[NCPU] = {&cpus[0]}; /* pointers to all CPUs */ ! struct cpu *cpu_free_list; /* list for released CPUs */ ! cpu_core_t cpu_core[NCPU]; /* cpu_core structures */ #define cpu_next_free cpu_prev /* * Useful for disabling MP bring-up on a MP capable system. --- 78,91 ---- #include <sys/hypervisor.h> #endif #include <sys/cpu_module.h> #include <sys/ontrap.h> ! struct cpu cpus[1] __aligned(MMU_PAGESIZE); ! struct cpu *cpu[NCPU] = {&cpus[0]}; ! struct cpu *cpu_free_list; ! cpu_core_t cpu_core[NCPU]; #define cpu_next_free cpu_prev /* * Useful for disabling MP bring-up on a MP capable system.
*** 166,190 **** void init_cpu_syscall(struct cpu *cp) { kpreempt_disable(); - #if defined(__amd64) if (is_x86_feature(x86_featureset, X86FSET_MSR) && is_x86_feature(x86_featureset, X86FSET_ASYSC)) { uint64_t flags; ! #if !defined(__lint) /* * The syscall instruction imposes a certain ordering on * segment selectors, so we double-check that ordering * here. */ ! ASSERT(KDS_SEL == KCS_SEL + 8); ! ASSERT(UDS_SEL == U32CS_SEL + 8); ! ASSERT(UCS_SEL == U32CS_SEL + 16); #endif /* * Turn syscall/sysret extensions on. */ cpu_asysc_enable(); --- 166,190 ---- void init_cpu_syscall(struct cpu *cp) { kpreempt_disable(); if (is_x86_feature(x86_featureset, X86FSET_MSR) && is_x86_feature(x86_featureset, X86FSET_ASYSC)) { uint64_t flags; ! #if !defined(__xpv) /* * The syscall instruction imposes a certain ordering on * segment selectors, so we double-check that ordering * here. */ ! CTASSERT(KDS_SEL == KCS_SEL + 8); ! CTASSERT(UDS_SEL == U32CS_SEL + 8); ! CTASSERT(UCS_SEL == U32CS_SEL + 16); #endif + /* * Turn syscall/sysret extensions on. */ cpu_asysc_enable();
*** 191,202 **** /* * Program the magic registers .. */ wrmsr(MSR_AMD_STAR, ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); ! wrmsr(MSR_AMD_LSTAR, (uint64_t)(uintptr_t)sys_syscall); ! wrmsr(MSR_AMD_CSTAR, (uint64_t)(uintptr_t)sys_syscall32); /* * This list of flags is masked off the incoming * %rfl when we enter the kernel. */ --- 191,211 ---- /* * Program the magic registers .. */ wrmsr(MSR_AMD_STAR, ((uint64_t)(U32CS_SEL << 16 | KCS_SEL)) << 32); ! if (kpti_enable == 1) { ! wrmsr(MSR_AMD_LSTAR, ! (uint64_t)(uintptr_t)tr_sys_syscall); ! wrmsr(MSR_AMD_CSTAR, ! (uint64_t)(uintptr_t)tr_sys_syscall32); ! } else { ! wrmsr(MSR_AMD_LSTAR, ! (uint64_t)(uintptr_t)sys_syscall); ! wrmsr(MSR_AMD_CSTAR, ! (uint64_t)(uintptr_t)sys_syscall32); ! } /* * This list of flags is masked off the incoming * %rfl when we enter the kernel. */
*** 203,250 **** flags = PS_IE | PS_T; if (is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE) flags |= PS_ACHK; wrmsr(MSR_AMD_SFMASK, flags); } - #endif /* - * On 32-bit kernels, we use sysenter/sysexit because it's too - * hard to use syscall/sysret, and it is more portable anyway. - * * On 64-bit kernels on Nocona machines, the 32-bit syscall * variant isn't available to 32-bit applications, but sysenter is. */ if (is_x86_feature(x86_featureset, X86FSET_MSR) && is_x86_feature(x86_featureset, X86FSET_SEP)) { ! #if !defined(__lint) /* * The sysenter instruction imposes a certain ordering on * segment selectors, so we double-check that ordering * here. See "sysenter" in Intel document 245471-012, "IA-32 * Intel Architecture Software Developer's Manual Volume 2: * Instruction Set Reference" */ ! ASSERT(KDS_SEL == KCS_SEL + 8); ! ASSERT32(UCS_SEL == ((KCS_SEL + 16) | 3)); ! ASSERT32(UDS_SEL == UCS_SEL + 8); ! ! ASSERT64(U32CS_SEL == ((KCS_SEL + 16) | 3)); ! ASSERT64(UDS_SEL == U32CS_SEL + 8); #endif cpu_sep_enable(); /* * resume() sets this value to the base of the threads stack * via a context handler. */ wrmsr(MSR_INTC_SEP_ESP, 0); ! wrmsr(MSR_INTC_SEP_EIP, (uint64_t)(uintptr_t)sys_sysenter); } kpreempt_enable(); } #if !defined(__xpv) --- 212,259 ---- flags = PS_IE | PS_T; if (is_x86_feature(x86_featureset, X86FSET_SMAP) == B_TRUE) flags |= PS_ACHK; wrmsr(MSR_AMD_SFMASK, flags); } /* * On 64-bit kernels on Nocona machines, the 32-bit syscall * variant isn't available to 32-bit applications, but sysenter is. */ if (is_x86_feature(x86_featureset, X86FSET_MSR) && is_x86_feature(x86_featureset, X86FSET_SEP)) { ! #if !defined(__xpv) /* * The sysenter instruction imposes a certain ordering on * segment selectors, so we double-check that ordering * here. See "sysenter" in Intel document 245471-012, "IA-32 * Intel Architecture Software Developer's Manual Volume 2: * Instruction Set Reference" */ ! CTASSERT(KDS_SEL == KCS_SEL + 8); ! CTASSERT(U32CS_SEL == ((KCS_SEL + 16) | 3)); ! CTASSERT(UDS_SEL == U32CS_SEL + 8); #endif cpu_sep_enable(); /* * resume() sets this value to the base of the threads stack * via a context handler. */ wrmsr(MSR_INTC_SEP_ESP, 0); ! ! if (kpti_enable == 1) { ! wrmsr(MSR_INTC_SEP_EIP, ! (uint64_t)(uintptr_t)tr_sys_sysenter); ! } else { ! wrmsr(MSR_INTC_SEP_EIP, ! (uint64_t)(uintptr_t)sys_sysenter); } + } kpreempt_enable(); } #if !defined(__xpv)
*** 416,439 **** set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, SEL_KPL, 0, 1); #endif /* ! * If we have more than one node, each cpu gets a copy of IDT ! * local to its node. If this is a Pentium box, we use cpu 0's ! * IDT. cpu 0's IDT has been made read-only to workaround the ! * cmpxchgl register bug */ ! if (system_hardware.hd_nodes && x86_type != X86_TYPE_P5) { #if !defined(__lint) ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); #endif ! cp->cpu_idt = kmem_zalloc(PAGESIZE, KM_SLEEP); bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); - } else { - cp->cpu_idt = CPU->cpu_idt; - } /* * alloc space for cpuid info */ cpuid_alloc_space(cp); --- 425,448 ---- set_usegd(&cp->cpu_gdt[GDT_GS], cp, sizeof (struct cpu) -1, SDT_MEMRWA, SEL_KPL, 0, 1); #endif /* ! * Allocate pages for the CPU LDT. */ ! cp->cpu_m.mcpu_ldt = kmem_zalloc(LDT_CPU_SIZE, KM_SLEEP); ! cp->cpu_m.mcpu_ldt_len = 0; ! ! /* ! * Allocate a per-CPU IDT and initialize the new IDT to the currently ! * runing CPU. ! */ #if !defined(__lint) ASSERT((sizeof (*CPU->cpu_idt) * NIDT) <= PAGESIZE); #endif ! cp->cpu_idt = kmem_alloc(PAGESIZE, KM_SLEEP); bcopy(CPU->cpu_idt, cp->cpu_idt, PAGESIZE); /* * alloc space for cpuid info */ cpuid_alloc_space(cp);
*** 567,576 **** --- 576,589 ---- if (cp->cpu_idt != CPU->cpu_idt) kmem_free(cp->cpu_idt, PAGESIZE); cp->cpu_idt = NULL; + kmem_free(cp->cpu_m.mcpu_ldt, LDT_CPU_SIZE); + cp->cpu_m.mcpu_ldt = NULL; + cp->cpu_m.mcpu_ldt_len = 0; + kmem_free(cp->cpu_gdt, PAGESIZE); cp->cpu_gdt = NULL; if (cp->cpu_supp_freqs != NULL) { size_t len = strlen(cp->cpu_supp_freqs) + 1;
*** 1781,1790 **** --- 1794,1805 ---- * it's set. So the startup thread may have no chance to switch back * again if it's switched away with CPU_QUIESCED set. */ cp->cpu_flags &= ~(CPU_POWEROFF | CPU_QUIESCED); + enable_pcid(); + /* * Setup this processor for XSAVE. */ if (fp_save_mech == FP_XSAVE) { xsave_setup_msr(cp);