Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 24,34 **** /* * Copyright (c) 2010, Intel Corporation. * All rights reserved. */ /* ! * Copyright 2011 Joyent, Inc. All rights reserved. */ /* * Welcome to the world of the "real mode platter". * See also startup.c, mpcore.s and apic.c for related routines. --- 24,34 ---- /* * Copyright (c) 2010, Intel Corporation. * All rights reserved. */ /* ! * Copyright 2018 Joyent, Inc */ /* * Welcome to the world of the "real mode platter". * See also startup.c, mpcore.s and apic.c for related routines.
*** 131,144 **** rmp_gdt_init(rm_platter_t *rm) { #if defined(__amd64) /* Use the kas address space for the CPU startup thread. */ ! if (MAKECR3(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" "located above 4G in physical memory (@ 0x%lx)", ! MAKECR3(kas.a_hat->hat_htable->ht_pfn)); /* * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY * by code in real_mode_start_cpu(): * --- 131,145 ---- rmp_gdt_init(rm_platter_t *rm) { #if defined(__amd64) /* Use the kas address space for the CPU startup thread. */ ! if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) { panic("Cannot initialize CPUs; kernel's 64-bit page tables\n" "located above 4G in physical memory (@ 0x%lx)", ! mmu_ptob(kas.a_hat->hat_htable->ht_pfn)); ! } /* * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY * by code in real_mode_start_cpu(): *
*** 172,205 **** static void * mach_cpucontext_alloc_tables(struct cpu *cp) { tss_t *ntss; struct cpu_tables *ct; /* * Allocate space for stack, tss, gdt and idt. We round the size * allotted for cpu_tables up, so that the TSS is on a unique page. * This is more efficient when running in virtual machines. */ ! ct = kmem_zalloc(P2ROUNDUP(sizeof (*ct), PAGESIZE), KM_SLEEP); if ((uintptr_t)ct & PAGEOFFSET) panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables", cp->cpu_id); ntss = cp->cpu_tss = &ct->ct_tss; #if defined(__amd64) /* * #DF (double fault). */ ! ntss->tss_ist1 = (uint64_t)&ct->ct_stack[sizeof (ct->ct_stack)]; #elif defined(__i386) ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp = ! (uint32_t)&ct->ct_stack[sizeof (ct->ct_stack)]; ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL; ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc; --- 173,254 ---- static void * mach_cpucontext_alloc_tables(struct cpu *cp) { tss_t *ntss; struct cpu_tables *ct; + size_t ctsize; /* * Allocate space for stack, tss, gdt and idt. We round the size * allotted for cpu_tables up, so that the TSS is on a unique page. * This is more efficient when running in virtual machines. */ ! ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE); ! ct = kmem_zalloc(ctsize, KM_SLEEP); if ((uintptr_t)ct & PAGEOFFSET) panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables", cp->cpu_id); ntss = cp->cpu_tss = &ct->ct_tss; #if defined(__amd64) + uintptr_t va; + size_t len; /* * #DF (double fault). */ ! ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)]; + /* + * #NM (non-maskable interrupt) + */ + ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)]; + + /* + * #MC (machine check exception / hardware error) + */ + ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)]; + + /* + * #DB, #BP debug interrupts and KDI/kmdb + */ + ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp; + + if (kpti_enable == 1) { + /* + * #GP, #PF, #SS fault interrupts + */ + ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp; + + /* + * Used by all other interrupts + */ + ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp; + + /* + * On AMD64 we need to make sure that all of the pages of the + * struct cpu_tables are punched through onto the user CPU for + * kpti. + * + * The final page will always be the TSS, so treat that + * separately. + */ + for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE; + len >= MMU_PAGESIZE; + len -= MMU_PAGESIZE, va += MMU_PAGESIZE) { + /* The doublefault stack must be RW */ + hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE); + } + ASSERT3U((uintptr_t)ntss, ==, va); + hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ); + } + #elif defined(__i386) ntss->tss_esp0 = ntss->tss_esp1 = ntss->tss_esp2 = ntss->tss_esp = ! (uint32_t)&ct->ct_stack1[sizeof (ct->ct_stack1)]; ntss->tss_ss0 = ntss->tss_ss1 = ntss->tss_ss2 = ntss->tss_ss = KDS_SEL; ntss->tss_eip = (uint32_t)cp->cpu_thread->t_pc;
*** 306,330 **** rm->rm_gdt_base = cp->cpu_gdt; rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1; /* * CPU needs to access kernel address space after powering on. - * When hot-adding CPU at runtime, directly use top level page table - * of kas other than the return value of getcr3(). getcr3() returns - * current process's top level page table, which may be different from - * the one of kas. */ ! rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn); rm->rm_cpu = cp->cpu_id; /* ! * For hot-adding CPU at runtime, Machine Check and Performance Counter ! * should be disabled. They will be enabled on demand after CPU powers ! * on successfully */ rm->rm_cr4 = getcr4(); ! rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE); rmp_gdt_init(rm); return (ct); } --- 355,375 ---- rm->rm_gdt_base = cp->cpu_gdt; rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1; /* * CPU needs to access kernel address space after powering on. */ ! rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE); rm->rm_cpu = cp->cpu_id; /* ! * We need to mask off any bits set on our boot CPU that can't apply ! * while the subject CPU is initializing. If appropriate, they are ! * enabled later on. */ rm->rm_cr4 = getcr4(); ! rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE); rmp_gdt_init(rm); return (ct); }