OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2018 Joyent, Inc.
  27  */
  28 
  29 #ifndef _SYS_MACHCPUVAR_H
  30 #define _SYS_MACHCPUVAR_H
  31 
  32 #ifdef  __cplusplus
  33 extern "C" {
  34 #endif
  35 
  36 #include <sys/inttypes.h>
  37 #include <sys/x_call.h>
  38 #include <sys/tss.h>
  39 #include <sys/segments.h>
  40 #include <sys/rm_platter.h>
  41 #include <sys/avintr.h>
  42 #include <sys/pte.h>
  43 #include <sys/stddef.h>
  44 #include <sys/debug.h>
  45 #include <sys/cpuvar.h>
  46 
  47 #ifndef _ASM
  48 /*
  49  * On a virtualized platform a virtual cpu may not be actually
  50  * on a physical cpu, especially in situations where a configuration has
  51  * more vcpus than pcpus.  This function tells us (if it's able) if the
  52  * specified vcpu is currently running on a pcpu.  Note if it is not
  53  * known or not able to determine, it will return the unknown state.
  54  */
  55 #define VCPU_STATE_UNKNOWN      0
  56 #define VCPU_ON_PCPU            1
  57 #define VCPU_NOT_ON_PCPU        2
  58 
  59 extern int vcpu_on_pcpu(processorid_t);
  60 
  61 /*
  62  * Machine specific fields of the cpu struct
  63  * defined in common/sys/cpuvar.h.
  64  *
  65  * Note:  This is kinda kludgy but seems to be the best
  66  * of our alternatives.
  67  */
  68 
  69 struct cpuid_info;
  70 struct cpu_ucode_info;
  71 struct cmi_hdl;
  72 
  73 /*
  74  * A note about the hypervisor affinity bits: a one bit in the affinity mask
  75  * means the corresponding event channel is allowed to be serviced
  76  * by this cpu.
  77  */
  78 struct xen_evt_data {
  79         ulong_t         pending_sel[PIL_MAX + 1]; /* event array selectors */
  80         ulong_t         pending_evts[PIL_MAX + 1][sizeof (ulong_t) * 8];
  81         ulong_t         evt_affinity[sizeof (ulong_t) * 8]; /* service on cpu */
  82 };
  83 
  84 struct kpti_frame {
  85         uint64_t        kf_lower_redzone;
  86 
  87         /* Stashed value of %cr3 when we entered the trampoline. */
  88         greg_t          kf_tr_cr3;
  89 
  90         /*
  91          * We use %r13-r14 as scratch registers in the trampoline code,
  92          * so stash those here "below" the rest of the stack so they can be
  93          * pushed/popped if needed.
  94          */
  95         greg_t          kf_r14;
  96         greg_t          kf_r13;
  97 
  98         /*
  99          * Part of this struct is used as the HW stack frame when taking an
 100          * interrupt on the user page table. The CPU is going to push a bunch
 101          * of regs onto the stack pointer set in the TSS/IDT (which we set to
 102          * &kf_rsp here).
 103          *
 104          * This is only a temporary holding area for them (we'll move them over
 105          * to the real interrupt stack once we've set %cr3).
 106          *
 107          * Note that these must be cleared during a process switch on this cpu.
 108          */
 109         greg_t          kf_err;         /* Bottom of initial hw stack frame */
 110         greg_t          kf_rip;
 111         greg_t          kf_cs;
 112         greg_t          kf_rflags;
 113         greg_t          kf_rsp;
 114         greg_t          kf_ss;
 115 
 116         greg_t          kf_tr_rsp;      /* Top of HW stack frame */
 117         /* We also write this with the %rsp value on tramp entry */
 118 
 119         /* Written to 0x1 when this kpti_frame is in use. */
 120         uint64_t        kf_tr_flag;
 121 
 122         uint64_t        kf_middle_redzone;
 123 
 124         /*
 125          * The things we need to write to %cr3 to change between page tables.
 126          * These live "above" the HW stack.
 127          */
 128         greg_t          kf_kernel_cr3;
 129         greg_t          kf_user_cr3;
 130         greg_t          kf_tr_ret_rsp;
 131 
 132         uint64_t        kf_unused;              /* For 16-byte align */
 133 
 134         uint64_t        kf_upper_redzone;
 135 };
 136 









 137 /*
 138  * This first value, MACHCPU_SIZE is the size of all the members in the cpu_t
 139  * AND struct machcpu, before we get to the mcpu_pad and the kpti area.
 140  * The KPTI is used to contain per-CPU data that is visible in both sets of
 141  * page-tables, and hence must be page-aligned and page-sized. See
 142  * hat_pcp_setup().
 143  *
 144  * There is a CTASSERT in os/intr.c that checks these numbers.
 145  */
 146 #define MACHCPU_SIZE    (572 + 1584)
 147 #define MACHCPU_PAD     (MMU_PAGESIZE - MACHCPU_SIZE)
 148 #define MACHCPU_PAD2    (MMU_PAGESIZE - 16 - 3 * sizeof (struct kpti_frame))
 149 
 150 struct  machcpu {
 151         /*
 152          * x_call fields - used for interprocessor cross calls
 153          */
 154         struct xc_msg   *xc_msgbox;
 155         struct xc_msg   *xc_free;
 156         xc_data_t       xc_data;
 157         uint32_t        xc_wait_cnt;
 158         volatile uint32_t xc_work_cnt;
 159 
 160         int             mcpu_nodeid;            /* node-id */
 161         int             mcpu_pri;               /* CPU priority */
 162 
 163         struct hat      *mcpu_current_hat; /* cpu's current hat */
 164 
 165         struct hat_cpu_info     *mcpu_hat_info;
 166 
 167         volatile ulong_t        mcpu_tlb_info;
 168 
 169         /* i86 hardware table addresses that cannot be shared */
 170 
 171         user_desc_t     *mcpu_gdt;      /* GDT */
 172         gate_desc_t     *mcpu_idt;      /* current IDT */
 173 
 174         tss_t           *mcpu_tss;      /* TSS */
 175         void            *mcpu_ldt;
 176         size_t          mcpu_ldt_len;
 177 
 178         kmutex_t        mcpu_ppaddr_mutex;
 179         caddr_t         mcpu_caddr1;    /* per cpu CADDR1 */
 180         caddr_t         mcpu_caddr2;    /* per cpu CADDR2 */
 181         uint64_t        mcpu_caddr1pte;
 182         uint64_t        mcpu_caddr2pte;
 183 
 184         struct softint  mcpu_softinfo;
 185         uint64_t        pil_high_start[HIGH_LEVELS];
 186         uint64_t        intrstat[PIL_MAX + 1][2];
 187 
 188         struct cpuid_info        *mcpu_cpi;
 189 
 190 #if defined(__amd64)
 191         greg_t  mcpu_rtmp_rsp;          /* syscall: temporary %rsp stash */
 192         greg_t  mcpu_rtmp_r15;          /* syscall: temporary %r15 stash */
 193 #endif
 194 
 195         struct vcpu_info *mcpu_vcpu_info;
 196         uint64_t        mcpu_gdtpa;     /* hypervisor: GDT physical address */
 197 
 198         uint16_t mcpu_intr_pending;     /* hypervisor: pending intrpt levels */
 199         uint16_t mcpu_ec_mbox;          /* hypervisor: evtchn_dev mailbox */
 200         struct xen_evt_data *mcpu_evt_pend; /* hypervisor: pending events */
 201 
 202         volatile uint32_t *mcpu_mwait;  /* MONITOR/MWAIT buffer */
 203         void (*mcpu_idle_cpu)(void);    /* idle function */
 204         uint16_t mcpu_idle_type;        /* CPU next idle type */
 205         uint16_t max_cstates;           /* supported max cstates */
 206 
 207         struct cpu_ucode_info   *mcpu_ucode_info;
 208 
 209         void                    *mcpu_pm_mach_state;
 210         struct cmi_hdl          *mcpu_cmi_hdl;
 211         void                    *mcpu_mach_ctx_ptr;
 212 
 213         /*
 214          * A stamp that is unique per processor and changes
 215          * whenever an interrupt happens. Userful for detecting
 216          * if a section of code gets interrupted.
 217          * The high order 16 bits will hold the cpu->cpu_id.
 218          * The low order bits will be incremented on every interrupt.
 219          */
 220         volatile uint32_t       mcpu_istamp;
 221 


 222         char                    mcpu_pad[MACHCPU_PAD];
 223 
 224         /* This is the start of the page */
 225         char                    mcpu_pad2[MACHCPU_PAD2];
 226         struct kpti_frame       mcpu_kpti;
 227         struct kpti_frame       mcpu_kpti_flt;
 228         struct kpti_frame       mcpu_kpti_dbg;
 229         char                    mcpu_pad3[16];
 230 };
 231 
 232 #define NINTR_THREADS   (LOCK_LEVEL-1)  /* number of interrupt threads */
 233 #define MWAIT_HALTED    (1)             /* mcpu_mwait set when halting */
 234 #define MWAIT_RUNNING   (0)             /* mcpu_mwait set to wakeup */
 235 #define MWAIT_WAKEUP_IPI        (2)     /* need IPI to wakeup */
 236 #define MWAIT_WAKEUP(cpu)       (*((cpu)->cpu_m.mcpu_mwait) = MWAIT_RUNNING)
 237 
 238 #endif  /* _ASM */
 239 
 240 /* Please DON'T add any more of this namespace-poisoning sewage here */
 241 
 242 #define cpu_nodeid cpu_m.mcpu_nodeid
 243 #define cpu_pri cpu_m.mcpu_pri
 244 #define cpu_current_hat cpu_m.mcpu_current_hat
 245 #define cpu_hat_info cpu_m.mcpu_hat_info
 246 #define cpu_ppaddr_mutex cpu_m.mcpu_ppaddr_mutex
 247 #define cpu_gdt cpu_m.mcpu_gdt
 248 #define cpu_idt cpu_m.mcpu_idt
 249 #define cpu_tss cpu_m.mcpu_tss
 250 #define cpu_caddr1 cpu_m.mcpu_caddr1
 251 #define cpu_caddr2 cpu_m.mcpu_caddr2
 252 #define cpu_softinfo cpu_m.mcpu_softinfo
 253 #define cpu_caddr1pte cpu_m.mcpu_caddr1pte
 254 #define cpu_caddr2pte cpu_m.mcpu_caddr2pte
 255 
 256 #ifdef  __cplusplus
 257 }
 258 #endif
 259 
 260 #endif  /* _SYS_MACHCPUVAR_H */
--- EOF ---