Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/vm/hat_i86.h
          +++ new/usr/src/uts/i86pc/vm/hat_i86.h
↓ open down ↓ 16 lines elided ↑ open up ↑
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  /*
  26   26   * Copyright (c) 2014 by Delphix. All rights reserved.
       27 + * Copyright 2018 Joyent, Inc.
  27   28   */
  28   29  
  29   30  #ifndef _VM_HAT_I86_H
  30   31  #define _VM_HAT_I86_H
  31   32  
  32   33  
  33   34  #ifdef  __cplusplus
  34   35  extern "C" {
  35   36  #endif
  36   37  
↓ open down ↓ 18 lines elided ↑ open up ↑
  55   56  /*
  56   57   * The essential data types involved:
  57   58   *
  58   59   * htable_t     - There is one of these for each page table and it is used
  59   60   *              by the HAT to manage the page table.
  60   61   *
  61   62   * hment_t      - Links together multiple PTEs to a single page.
  62   63   */
  63   64  
  64   65  /*
  65      - * VLP processes have a 32 bit address range, so their top level is 2 and
  66      - * with only 4 PTEs in that table.
       66 + * Maximum number of per-CPU pagetable entries that we'll need to cache in the
       67 + * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
       68 + * information.
  67   69   */
  68      -#define VLP_LEVEL       (2)
  69      -#define VLP_NUM_PTES    (4)
  70      -#define VLP_SIZE        (VLP_NUM_PTES * sizeof (x86pte_t))
  71      -#define TOP_LEVEL(h)    (((h)->hat_flags & HAT_VLP) ? VLP_LEVEL : mmu.max_level)
  72      -#define VLP_COPY(fromptep, toptep) { \
  73      -        toptep[0] = fromptep[0]; \
  74      -        toptep[1] = fromptep[1]; \
  75      -        toptep[2] = fromptep[2]; \
  76      -        toptep[3] = fromptep[3]; \
  77      -}
       70 +#if defined(__xpv)
       71 +/*
       72 + * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
       73 + * struct member for it at least to make life easier and not make the member
       74 + * conditional.
       75 + */
       76 +#define MAX_COPIED_PTES 1
       77 +#else
       78 +/*
       79 + * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
       80 + */
       81 +#define MAX_COPIED_PTES 512
       82 +#endif  /* __xpv */
  78   83  
       84 +#define TOP_LEVEL(h)    (((h)->hat_max_level))
       85 +
  79   86  /*
  80   87   * The hat struct exists for each address space.
  81   88   */
  82   89  struct hat {
  83   90          kmutex_t        hat_mutex;
  84   91          struct as       *hat_as;
  85   92          uint_t          hat_stats;
  86   93          pgcnt_t         hat_pages_mapped[MAX_PAGE_LEVEL + 1];
  87   94          pgcnt_t         hat_ism_pgcnt;
  88   95          cpuset_t        hat_cpus;
  89   96          uint16_t        hat_flags;
       97 +        uint8_t         hat_max_level;  /* top level of this HAT */
       98 +        uint_t          hat_num_copied; /* Actual num of hat_copied_ptes[] */
  90   99          htable_t        *hat_htable;    /* top level htable */
  91  100          struct hat      *hat_next;
  92  101          struct hat      *hat_prev;
  93  102          uint_t          hat_num_hash;   /* number of htable hash buckets */
  94  103          htable_t        **hat_ht_hash;  /* htable hash buckets */
  95  104          htable_t        *hat_ht_cached; /* cached free htables */
  96      -        x86pte_t        hat_vlp_ptes[VLP_NUM_PTES];
      105 +        x86pte_t        hat_copied_ptes[MAX_COPIED_PTES];
  97  106  #if defined(__amd64) && defined(__xpv)
  98  107          pfn_t           hat_user_ptable; /* alt top ptable for user mode */
  99  108  #endif
 100  109  };
 101  110  typedef struct hat hat_t;
 102  111  
 103  112  #define PGCNT_INC(hat, level)   \
 104  113          atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
 105  114  #define PGCNT_DEC(hat, level)   \
 106  115          atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
 107  116  
 108  117  /*
 109      - * Flags for the hat_flags field
      118 + * Flags for the hat_flags field. For more information, please see the big
      119 + * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
 110  120   *
 111  121   * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
 112  122   *      demap()s can be avoided.
 113  123   *
 114      - * HAT_VLP - indicates a 32 bit process has a virtual address range less than
 115      - *      the hardware's physical address range. (VLP->Virtual Less-than Physical)
 116      - *      Note - never used on the hypervisor.
      124 + * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
      125 + *      big comment in hat_i86.c for a description.
 117  126   *
      127 + * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
      128 + *
 118  129   * HAT_VICTIM - This is set while a hat is being examined for page table
 119  130   *      stealing and prevents it from being freed.
 120  131   *
 121  132   * HAT_SHARED - The hat has exported it's page tables via hat_share()
 122  133   *
 123  134   * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
      135 + *
      136 + * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
      137 + *      not a process).
 124  138   */
 125  139  #define HAT_FREEING     (0x0001)
 126      -#define HAT_VLP         (0x0002)
 127      -#define HAT_VICTIM      (0x0004)
 128      -#define HAT_SHARED      (0x0008)
 129      -#define HAT_PINNED      (0x0010)
      140 +#define HAT_VICTIM      (0x0002)
      141 +#define HAT_SHARED      (0x0004)
      142 +#define HAT_PINNED      (0x0008)
      143 +#define HAT_COPIED      (0x0010)
      144 +#define HAT_COPIED_32   (0x0020)
      145 +#define HAT_PCP         (0x0040)
 130  146  
 131  147  /*
 132  148   * Additional platform attribute for hat_devload() to force no caching.
 133  149   */
 134  150  #define HAT_PLAT_NOCACHE        (0x100000)
 135  151  
 136  152  /*
 137  153   * Simple statistics for the HAT. These are just counters that are
 138  154   * atomically incremented. They can be reset directly from the kernel
 139  155   * debugger.
↓ open down ↓ 8 lines elided ↑ open up ↑
 148  164          ulong_t hs_htable_rputs;        /* putbacks to reserve */
 149  165          ulong_t hs_htable_shared;       /* number of htables shared */
 150  166          ulong_t hs_htable_unshared;     /* number of htables unshared */
 151  167          ulong_t hs_hm_alloc;
 152  168          ulong_t hs_hm_free;
 153  169          ulong_t hs_hm_put_reserve;
 154  170          ulong_t hs_hm_get_reserve;
 155  171          ulong_t hs_hm_steals;
 156  172          ulong_t hs_hm_steal_exam;
 157  173          ulong_t hs_tlb_inval_delayed;
      174 +        ulong_t hs_hat_copied64;
      175 +        ulong_t hs_hat_copied32;
      176 +        ulong_t hs_hat_normal64;
 158  177  };
 159  178  extern struct hatstats hatstat;
 160  179  #ifdef DEBUG
 161  180  #define HATSTAT_INC(x)  (++hatstat.x)
 162  181  #else
 163  182  #define HATSTAT_INC(x)  (0)
 164  183  #endif
 165  184  
 166  185  #if defined(_KERNEL)
 167  186  
↓ open down ↓ 53 lines elided ↑ open up ↑
 221  240   * not in any include file???
 222  241   */
 223  242  extern void halt(char *fmt);
 224  243  
 225  244  /*
 226  245   * x86 specific routines for use online in setup or i86pc/vm files
 227  246   */
 228  247  extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
 229  248          caddr_t ekernelheap);
 230  249  extern void hat_kern_setup(void);
 231      -extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
 232  250  extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
 233  251          x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
 234  252  extern void hat_init_finish(void);
 235  253  extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
 236  254  extern pfn_t hat_kpm_va2pfn(caddr_t);
 237  255  extern page_t *hat_kpm_vaddr2page(caddr_t);
 238  256  extern uintptr_t hat_kernelbase(uintptr_t);
 239  257  extern void hat_kmap_init(uintptr_t base, size_t len);
 240  258  
 241  259  extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
 242  260  
 243      -#if !defined(__xpv)
 244      -/*
 245      - * routines to deal with delayed TLB invalidations for idle CPUs
 246      - */
 247      -extern void tlb_going_idle(void);
 248      -extern void tlb_service(void);
 249      -#endif
      261 +extern void mmu_calc_user_slots(void);
      262 +extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
      263 +extern void hat_switch(struct hat *hat);
 250  264  
      265 +#define TLB_RANGE_LEN(r)        ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
      266 +
 251  267  /*
 252      - * Hat switch function invoked to load a new context into %cr3
      268 + * A range of virtual pages for purposes of demapping.
 253  269   */
 254      -extern void hat_switch(struct hat *hat);
      270 +typedef struct tlb_range {
      271 +        uintptr_t tr_va;        /* address of page */
      272 +        ulong_t tr_cnt;         /* number of pages in range */
      273 +        int8_t  tr_level;       /* page table level */
      274 +} tlb_range_t;
 255  275  
 256      -#ifdef __xpv
      276 +#if defined(__xpv)
      277 +
      278 +#define XPV_DISALLOW_MIGRATE()  xen_block_migrate()
      279 +#define XPV_ALLOW_MIGRATE()     xen_allow_migrate()
      280 +
      281 +#define mmu_flush_tlb_page(va)  mmu_invlpg((caddr_t)va)
      282 +#define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va)
      283 +
 257  284  /*
 258  285   * Interfaces to use around code that maps/unmaps grant table references.
 259  286   */
 260  287  extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
 261  288  extern void hat_release_mapping(hat_t *, caddr_t);
 262  289  
 263      -#define XPV_DISALLOW_MIGRATE()  xen_block_migrate()
 264      -#define XPV_ALLOW_MIGRATE()     xen_allow_migrate()
 265      -
 266  290  #else
 267  291  
 268  292  #define XPV_DISALLOW_MIGRATE()  /* nothing */
 269  293  #define XPV_ALLOW_MIGRATE()     /* nothing */
 270  294  
 271  295  #define pfn_is_foreign(pfn)     __lintzero
 272  296  
 273      -#endif
      297 +typedef enum flush_tlb_type {
      298 +        FLUSH_TLB_ALL = 1,
      299 +        FLUSH_TLB_NONGLOBAL = 2,
      300 +        FLUSH_TLB_RANGE = 3,
      301 +} flush_tlb_type_t;
 274  302  
      303 +extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
      304 +extern void mmu_flush_tlb_kpage(uintptr_t);
      305 +extern void mmu_flush_tlb_page(uintptr_t);
 275  306  
      307 +extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
      308 +
      309 +/*
      310 + * routines to deal with delayed TLB invalidations for idle CPUs
      311 + */
      312 +extern void tlb_going_idle(void);
      313 +extern void tlb_service(void);
      314 +
      315 +#endif /* !__xpv */
      316 +
 276  317  #endif  /* _KERNEL */
 277  318  
 278  319  #ifdef  __cplusplus
 279  320  }
 280  321  #endif
 281  322  
 282  323  #endif  /* _VM_HAT_I86_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX