Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/sys/machcpuvar.h
          +++ new/usr/src/uts/i86pc/sys/machcpuvar.h
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26      - * Copyright 2011 Joyent, Inc. All rights reserved.
       26 + * Copyright 2018 Joyent, Inc.
  27   27   */
  28   28  
  29   29  #ifndef _SYS_MACHCPUVAR_H
  30   30  #define _SYS_MACHCPUVAR_H
  31   31  
  32   32  #ifdef  __cplusplus
  33   33  extern "C" {
  34   34  #endif
  35   35  
  36   36  #include <sys/inttypes.h>
  37   37  #include <sys/x_call.h>
  38   38  #include <sys/tss.h>
  39   39  #include <sys/segments.h>
  40   40  #include <sys/rm_platter.h>
  41   41  #include <sys/avintr.h>
  42   42  #include <sys/pte.h>
       43 +#include <sys/stddef.h>
       44 +#include <sys/debug.h>
       45 +#include <sys/cpuvar.h>
  43   46  
  44   47  #ifndef _ASM
  45   48  /*
  46   49   * On a virtualized platform a virtual cpu may not be actually
  47   50   * on a physical cpu, especially in situations where a configuration has
  48   51   * more vcpus than pcpus.  This function tells us (if it's able) if the
  49   52   * specified vcpu is currently running on a pcpu.  Note if it is not
  50   53   * known or not able to determine, it will return the unknown state.
  51   54   */
  52   55  #define VCPU_STATE_UNKNOWN      0
↓ open down ↓ 18 lines elided ↑ open up ↑
  71   74   * A note about the hypervisor affinity bits: a one bit in the affinity mask
  72   75   * means the corresponding event channel is allowed to be serviced
  73   76   * by this cpu.
  74   77   */
  75   78  struct xen_evt_data {
  76   79          ulong_t         pending_sel[PIL_MAX + 1]; /* event array selectors */
  77   80          ulong_t         pending_evts[PIL_MAX + 1][sizeof (ulong_t) * 8];
  78   81          ulong_t         evt_affinity[sizeof (ulong_t) * 8]; /* service on cpu */
  79   82  };
  80   83  
       84 +struct kpti_frame {
       85 +        uint64_t        kf_lower_redzone;
       86 +
       87 +        /* Stashed value of %cr3 when we entered the trampoline. */
       88 +        greg_t          kf_tr_cr3;
       89 +
       90 +        /*
       91 +         * We use %r13-r14 as scratch registers in the trampoline code,
       92 +         * so stash those here "below" the rest of the stack so they can be
       93 +         * pushed/popped if needed.
       94 +         */
       95 +        greg_t          kf_r14;
       96 +        greg_t          kf_r13;
       97 +
       98 +        /*
       99 +         * Part of this struct is used as the HW stack frame when taking an
      100 +         * interrupt on the user page table. The CPU is going to push a bunch
      101 +         * of regs onto the stack pointer set in the TSS/IDT (which we set to
      102 +         * &kf_rsp here).
      103 +         *
      104 +         * This is only a temporary holding area for them (we'll move them over
      105 +         * to the real interrupt stack once we've set %cr3).
      106 +         *
      107 +         * Note that these must be cleared during a process switch on this cpu.
      108 +         */
      109 +        greg_t          kf_err;         /* Bottom of initial hw stack frame */
      110 +        greg_t          kf_rip;
      111 +        greg_t          kf_cs;
      112 +        greg_t          kf_rflags;
      113 +        greg_t          kf_rsp;
      114 +        greg_t          kf_ss;
      115 +
      116 +        greg_t          kf_tr_rsp;      /* Top of HW stack frame */
      117 +        /* We also write this with the %rsp value on tramp entry */
      118 +
      119 +        /* Written to 0x1 when this kpti_frame is in use. */
      120 +        uint64_t        kf_tr_flag;
      121 +
      122 +        uint64_t        kf_middle_redzone;
      123 +
      124 +        /*
      125 +         * The things we need to write to %cr3 to change between page tables.
      126 +         * These live "above" the HW stack.
      127 +         */
      128 +        greg_t          kf_kernel_cr3;
      129 +        greg_t          kf_user_cr3;
      130 +        greg_t          kf_tr_ret_rsp;
      131 +
      132 +        uint64_t        kf_unused;              /* For 16-byte align */
      133 +
      134 +        uint64_t        kf_upper_redzone;
      135 +};
      136 +
      137 +/*
      138 + * This first value, MACHCPU_SIZE is the size of all the members in the cpu_t
      139 + * AND struct machcpu, before we get to the mcpu_pad and the kpti area.
      140 + * The KPTI is used to contain per-CPU data that is visible in both sets of
      141 + * page-tables, and hence must be page-aligned and page-sized. See
      142 + * hat_pcp_setup().
      143 + *
      144 + * There is a CTASSERT in os/intr.c that checks these numbers.
      145 + */
      146 +#define MACHCPU_SIZE    (572 + 1584)
      147 +#define MACHCPU_PAD     (MMU_PAGESIZE - MACHCPU_SIZE)
      148 +#define MACHCPU_PAD2    (MMU_PAGESIZE - 16 - 3 * sizeof (struct kpti_frame))
      149 +
  81  150  struct  machcpu {
  82  151          /*
  83  152           * x_call fields - used for interprocessor cross calls
  84  153           */
  85  154          struct xc_msg   *xc_msgbox;
  86  155          struct xc_msg   *xc_free;
  87  156          xc_data_t       xc_data;
  88  157          uint32_t        xc_wait_cnt;
  89  158          volatile uint32_t xc_work_cnt;
  90  159  
↓ open down ↓ 5 lines elided ↑ open up ↑
  96  165          struct hat_cpu_info     *mcpu_hat_info;
  97  166  
  98  167          volatile ulong_t        mcpu_tlb_info;
  99  168  
 100  169          /* i86 hardware table addresses that cannot be shared */
 101  170  
 102  171          user_desc_t     *mcpu_gdt;      /* GDT */
 103  172          gate_desc_t     *mcpu_idt;      /* current IDT */
 104  173  
 105  174          tss_t           *mcpu_tss;      /* TSS */
      175 +        void            *mcpu_ldt;
      176 +        size_t          mcpu_ldt_len;
 106  177  
 107  178          kmutex_t        mcpu_ppaddr_mutex;
 108  179          caddr_t         mcpu_caddr1;    /* per cpu CADDR1 */
 109  180          caddr_t         mcpu_caddr2;    /* per cpu CADDR2 */
 110  181          uint64_t        mcpu_caddr1pte;
 111  182          uint64_t        mcpu_caddr2pte;
 112  183  
 113  184          struct softint  mcpu_softinfo;
 114  185          uint64_t        pil_high_start[HIGH_LEVELS];
 115  186          uint64_t        intrstat[PIL_MAX + 1][2];
↓ open down ↓ 24 lines elided ↑ open up ↑
 140  211          void                    *mcpu_mach_ctx_ptr;
 141  212  
 142  213          /*
 143  214           * A stamp that is unique per processor and changes
 144  215           * whenever an interrupt happens. Userful for detecting
 145  216           * if a section of code gets interrupted.
 146  217           * The high order 16 bits will hold the cpu->cpu_id.
 147  218           * The low order bits will be incremented on every interrupt.
 148  219           */
 149  220          volatile uint32_t       mcpu_istamp;
      221 +
      222 +        char                    mcpu_pad[MACHCPU_PAD];
      223 +
      224 +        /* This is the start of the page */
      225 +        char                    mcpu_pad2[MACHCPU_PAD2];
      226 +        struct kpti_frame       mcpu_kpti;
      227 +        struct kpti_frame       mcpu_kpti_flt;
      228 +        struct kpti_frame       mcpu_kpti_dbg;
      229 +        char                    mcpu_pad3[16];
 150  230  };
 151  231  
 152  232  #define NINTR_THREADS   (LOCK_LEVEL-1)  /* number of interrupt threads */
 153  233  #define MWAIT_HALTED    (1)             /* mcpu_mwait set when halting */
 154  234  #define MWAIT_RUNNING   (0)             /* mcpu_mwait set to wakeup */
 155  235  #define MWAIT_WAKEUP_IPI        (2)     /* need IPI to wakeup */
 156  236  #define MWAIT_WAKEUP(cpu)       (*((cpu)->cpu_m.mcpu_mwait) = MWAIT_RUNNING)
 157  237  
 158  238  #endif  /* _ASM */
 159  239  
 160  240  /* Please DON'T add any more of this namespace-poisoning sewage here */
 161  241  
 162  242  #define cpu_nodeid cpu_m.mcpu_nodeid
 163  243  #define cpu_pri cpu_m.mcpu_pri
 164  244  #define cpu_current_hat cpu_m.mcpu_current_hat
 165  245  #define cpu_hat_info cpu_m.mcpu_hat_info
 166  246  #define cpu_ppaddr_mutex cpu_m.mcpu_ppaddr_mutex
 167  247  #define cpu_gdt cpu_m.mcpu_gdt
 168  248  #define cpu_idt cpu_m.mcpu_idt
 169  249  #define cpu_tss cpu_m.mcpu_tss
 170      -#define cpu_ldt cpu_m.mcpu_ldt
 171  250  #define cpu_caddr1 cpu_m.mcpu_caddr1
 172  251  #define cpu_caddr2 cpu_m.mcpu_caddr2
 173  252  #define cpu_softinfo cpu_m.mcpu_softinfo
 174  253  #define cpu_caddr1pte cpu_m.mcpu_caddr1pte
 175  254  #define cpu_caddr2pte cpu_m.mcpu_caddr2pte
 176  255  
 177  256  #ifdef  __cplusplus
 178  257  }
 179  258  #endif
 180  259  
 181  260  #endif  /* _SYS_MACHCPUVAR_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX