Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2014 by Delphix. All rights reserved.

  27  */
  28 
  29 #ifndef _VM_HAT_I86_H
  30 #define _VM_HAT_I86_H
  31 
  32 
  33 #ifdef  __cplusplus
  34 extern "C" {
  35 #endif
  36 
  37 /*
  38  * VM - Hardware Address Translation management.
  39  *
  40  * This file describes the contents of the x86_64 HAT data structures.
  41  */
  42 #include <sys/types.h>
  43 #include <sys/t_lock.h>
  44 #include <sys/cpuvar.h>
  45 #include <sys/x_call.h>
  46 #include <vm/seg.h>
  47 #include <vm/page.h>
  48 #include <sys/vmparam.h>
  49 #include <sys/vm_machparam.h>
  50 #include <sys/promif.h>
  51 #include <vm/hat_pte.h>
  52 #include <vm/htable.h>
  53 #include <vm/hment.h>
  54 
  55 /*
  56  * The essential data types involved:
  57  *
  58  * htable_t     - There is one of these for each page table and it is used
  59  *              by the HAT to manage the page table.
  60  *
  61  * hment_t      - Links together multiple PTEs to a single page.
  62  */
  63 
  64 /*
  65  * VLP processes have a 32 bit address range, so their top level is 2 and
  66  * with only 4 PTEs in that table.

  67  */
  68 #define VLP_LEVEL       (2)
  69 #define VLP_NUM_PTES    (4)
  70 #define VLP_SIZE        (VLP_NUM_PTES * sizeof (x86pte_t))
  71 #define TOP_LEVEL(h)    (((h)->hat_flags & HAT_VLP) ? VLP_LEVEL : mmu.max_level)
  72 #define VLP_COPY(fromptep, toptep) { \
  73         toptep[0] = fromptep[0]; \
  74         toptep[1] = fromptep[1]; \
  75         toptep[2] = fromptep[2]; \
  76         toptep[3] = fromptep[3]; \
  77 }



  78 


  79 /*
  80  * The hat struct exists for each address space.
  81  */
  82 struct hat {
  83         kmutex_t        hat_mutex;
  84         struct as       *hat_as;
  85         uint_t          hat_stats;
  86         pgcnt_t         hat_pages_mapped[MAX_PAGE_LEVEL + 1];
  87         pgcnt_t         hat_ism_pgcnt;
  88         cpuset_t        hat_cpus;
  89         uint16_t        hat_flags;


  90         htable_t        *hat_htable;    /* top level htable */
  91         struct hat      *hat_next;
  92         struct hat      *hat_prev;
  93         uint_t          hat_num_hash;   /* number of htable hash buckets */
  94         htable_t        **hat_ht_hash;  /* htable hash buckets */
  95         htable_t        *hat_ht_cached; /* cached free htables */
  96         x86pte_t        hat_vlp_ptes[VLP_NUM_PTES];
  97 #if defined(__amd64) && defined(__xpv)
  98         pfn_t           hat_user_ptable; /* alt top ptable for user mode */
  99 #endif
 100 };
 101 typedef struct hat hat_t;
 102 
 103 #define PGCNT_INC(hat, level)   \
 104         atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
 105 #define PGCNT_DEC(hat, level)   \
 106         atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
 107 
 108 /*
 109  * Flags for the hat_flags field

 110  *
 111  * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
 112  *      demap()s can be avoided.
 113  *
 114  * HAT_VLP - indicates a 32 bit process has a virtual address range less than
 115  *      the hardware's physical address range. (VLP->Virtual Less-than Physical)
 116  *      Note - never used on the hypervisor.
 117  *


 118  * HAT_VICTIM - This is set while a hat is being examined for page table
 119  *      stealing and prevents it from being freed.
 120  *
 121  * HAT_SHARED - The hat has exported it's page tables via hat_share()
 122  *
 123  * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.



 124  */
 125 #define HAT_FREEING     (0x0001)
 126 #define HAT_VLP         (0x0002)
 127 #define HAT_VICTIM      (0x0004)
 128 #define HAT_SHARED      (0x0008)
 129 #define HAT_PINNED      (0x0010)


 130 
 131 /*
 132  * Additional platform attribute for hat_devload() to force no caching.
 133  */
 134 #define HAT_PLAT_NOCACHE        (0x100000)
 135 
 136 /*
 137  * Simple statistics for the HAT. These are just counters that are
 138  * atomically incremented. They can be reset directly from the kernel
 139  * debugger.
 140  */
 141 struct hatstats {
 142         ulong_t hs_reap_attempts;
 143         ulong_t hs_reaped;
 144         ulong_t hs_steals;
 145         ulong_t hs_ptable_allocs;
 146         ulong_t hs_ptable_frees;
 147         ulong_t hs_htable_rgets;        /* allocs from reserve */
 148         ulong_t hs_htable_rputs;        /* putbacks to reserve */
 149         ulong_t hs_htable_shared;       /* number of htables shared */
 150         ulong_t hs_htable_unshared;     /* number of htables unshared */
 151         ulong_t hs_hm_alloc;
 152         ulong_t hs_hm_free;
 153         ulong_t hs_hm_put_reserve;
 154         ulong_t hs_hm_get_reserve;
 155         ulong_t hs_hm_steals;
 156         ulong_t hs_hm_steal_exam;
 157         ulong_t hs_tlb_inval_delayed;



 158 };
 159 extern struct hatstats hatstat;
 160 #ifdef DEBUG
 161 #define HATSTAT_INC(x)  (++hatstat.x)
 162 #else
 163 #define HATSTAT_INC(x)  (0)
 164 #endif
 165 
 166 #if defined(_KERNEL)
 167 
 168 /*
 169  * Useful macro to align hat_XXX() address arguments to a page boundary
 170  */
 171 #define ALIGN2PAGE(a)           ((uintptr_t)(a) & MMU_PAGEMASK)
 172 #define IS_PAGEALIGNED(a)       (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
 173 
 174 extern uint_t   khat_running;   /* set at end of hat_kern_setup() */
 175 extern cpuset_t khat_cpuset;    /* cpuset for kernal address demap Xcalls */
 176 extern kmutex_t hat_list_lock;
 177 extern kcondvar_t hat_list_cv;


 211 extern void setup_vaddr_for_ppcopy(struct cpu *);
 212 extern void teardown_vaddr_for_ppcopy(struct cpu *);
 213 extern void clear_boot_mappings(uintptr_t, uintptr_t);
 214 
 215 /*
 216  * magic value to indicate that all TLB entries should be demapped.
 217  */
 218 #define DEMAP_ALL_ADDR  (~(uintptr_t)0)
 219 
 220 /*
 221  * not in any include file???
 222  */
 223 extern void halt(char *fmt);
 224 
 225 /*
 226  * x86 specific routines for use online in setup or i86pc/vm files
 227  */
 228 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
 229         caddr_t ekernelheap);
 230 extern void hat_kern_setup(void);
 231 extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
 232 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
 233         x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
 234 extern void hat_init_finish(void);
 235 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
 236 extern pfn_t hat_kpm_va2pfn(caddr_t);
 237 extern page_t *hat_kpm_vaddr2page(caddr_t);
 238 extern uintptr_t hat_kernelbase(uintptr_t);
 239 extern void hat_kmap_init(uintptr_t base, size_t len);
 240 
 241 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
 242 
 243 #if !defined(__xpv)
 244 /*
 245  * routines to deal with delayed TLB invalidations for idle CPUs
 246  */
 247 extern void tlb_going_idle(void);
 248 extern void tlb_service(void);
 249 #endif
 250 


 251 /*
 252  * Hat switch function invoked to load a new context into %cr3
 253  */
 254 extern void hat_switch(struct hat *hat);




 255 
 256 #ifdef __xpv







 257 /*
 258  * Interfaces to use around code that maps/unmaps grant table references.
 259  */
 260 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
 261 extern void hat_release_mapping(hat_t *, caddr_t);
 262 
 263 #define XPV_DISALLOW_MIGRATE()  xen_block_migrate()
 264 #define XPV_ALLOW_MIGRATE()     xen_allow_migrate()
 265 
 266 #else
 267 
 268 #define XPV_DISALLOW_MIGRATE()  /* nothing */
 269 #define XPV_ALLOW_MIGRATE()     /* nothing */
 270 
 271 #define pfn_is_foreign(pfn)     __lintzero
 272 
 273 #endif




 274 



 275 










 276 #endif  /* _KERNEL */
 277 
 278 #ifdef  __cplusplus
 279 }
 280 #endif
 281 
 282 #endif  /* _VM_HAT_I86_H */


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2014 by Delphix. All rights reserved.
  27  * Copyright 2018 Joyent, Inc.
  28  */
  29 
  30 #ifndef _VM_HAT_I86_H
  31 #define _VM_HAT_I86_H
  32 
  33 
  34 #ifdef  __cplusplus
  35 extern "C" {
  36 #endif
  37 
  38 /*
  39  * VM - Hardware Address Translation management.
  40  *
  41  * This file describes the contents of the x86_64 HAT data structures.
  42  */
  43 #include <sys/types.h>
  44 #include <sys/t_lock.h>
  45 #include <sys/cpuvar.h>
  46 #include <sys/x_call.h>
  47 #include <vm/seg.h>
  48 #include <vm/page.h>
  49 #include <sys/vmparam.h>
  50 #include <sys/vm_machparam.h>
  51 #include <sys/promif.h>
  52 #include <vm/hat_pte.h>
  53 #include <vm/htable.h>
  54 #include <vm/hment.h>
  55 
  56 /*
  57  * The essential data types involved:
  58  *
  59  * htable_t     - There is one of these for each page table and it is used
  60  *              by the HAT to manage the page table.
  61  *
  62  * hment_t      - Links together multiple PTEs to a single page.
  63  */
  64 
  65 /*
  66  * Maximum number of per-CPU pagetable entries that we'll need to cache in the
  67  * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
  68  * information.
  69  */
  70 #if defined(__xpv)
  71 /*
  72  * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
  73  * struct member for it at least to make life easier and not make the member
  74  * conditional.
  75  */
  76 #define MAX_COPIED_PTES 1
  77 #else
  78 /*
  79  * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
  80  */
  81 #define MAX_COPIED_PTES 512
  82 #endif  /* __xpv */
  83 
  84 #define TOP_LEVEL(h)    (((h)->hat_max_level))
  85 
  86 /*
  87  * The hat struct exists for each address space.
  88  */
  89 struct hat {
  90         kmutex_t        hat_mutex;
  91         struct as       *hat_as;
  92         uint_t          hat_stats;
  93         pgcnt_t         hat_pages_mapped[MAX_PAGE_LEVEL + 1];
  94         pgcnt_t         hat_ism_pgcnt;
  95         cpuset_t        hat_cpus;
  96         uint16_t        hat_flags;
  97         uint8_t         hat_max_level;  /* top level of this HAT */
  98         uint_t          hat_num_copied; /* Actual num of hat_copied_ptes[] */
  99         htable_t        *hat_htable;    /* top level htable */
 100         struct hat      *hat_next;
 101         struct hat      *hat_prev;
 102         uint_t          hat_num_hash;   /* number of htable hash buckets */
 103         htable_t        **hat_ht_hash;  /* htable hash buckets */
 104         htable_t        *hat_ht_cached; /* cached free htables */
 105         x86pte_t        hat_copied_ptes[MAX_COPIED_PTES];
 106 #if defined(__amd64) && defined(__xpv)
 107         pfn_t           hat_user_ptable; /* alt top ptable for user mode */
 108 #endif
 109 };
 110 typedef struct hat hat_t;
 111 
 112 #define PGCNT_INC(hat, level)   \
 113         atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
 114 #define PGCNT_DEC(hat, level)   \
 115         atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
 116 
 117 /*
 118  * Flags for the hat_flags field. For more information, please see the big
 119  * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
 120  *
 121  * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
 122  *      demap()s can be avoided.
 123  *
 124  * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
 125  *      big comment in hat_i86.c for a description.

 126  *
 127  * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
 128  *
 129  * HAT_VICTIM - This is set while a hat is being examined for page table
 130  *      stealing and prevents it from being freed.
 131  *
 132  * HAT_SHARED - The hat has exported it's page tables via hat_share()
 133  *
 134  * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
 135  *
 136  * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
 137  *      not a process).
 138  */
 139 #define HAT_FREEING     (0x0001)
 140 #define HAT_VICTIM      (0x0002)
 141 #define HAT_SHARED      (0x0004)
 142 #define HAT_PINNED      (0x0008)
 143 #define HAT_COPIED      (0x0010)
 144 #define HAT_COPIED_32   (0x0020)
 145 #define HAT_PCP         (0x0040)
 146 
 147 /*
 148  * Additional platform attribute for hat_devload() to force no caching.
 149  */
 150 #define HAT_PLAT_NOCACHE        (0x100000)
 151 
 152 /*
 153  * Simple statistics for the HAT. These are just counters that are
 154  * atomically incremented. They can be reset directly from the kernel
 155  * debugger.
 156  */
 157 struct hatstats {
 158         ulong_t hs_reap_attempts;
 159         ulong_t hs_reaped;
 160         ulong_t hs_steals;
 161         ulong_t hs_ptable_allocs;
 162         ulong_t hs_ptable_frees;
 163         ulong_t hs_htable_rgets;        /* allocs from reserve */
 164         ulong_t hs_htable_rputs;        /* putbacks to reserve */
 165         ulong_t hs_htable_shared;       /* number of htables shared */
 166         ulong_t hs_htable_unshared;     /* number of htables unshared */
 167         ulong_t hs_hm_alloc;
 168         ulong_t hs_hm_free;
 169         ulong_t hs_hm_put_reserve;
 170         ulong_t hs_hm_get_reserve;
 171         ulong_t hs_hm_steals;
 172         ulong_t hs_hm_steal_exam;
 173         ulong_t hs_tlb_inval_delayed;
 174         ulong_t hs_hat_copied64;
 175         ulong_t hs_hat_copied32;
 176         ulong_t hs_hat_normal64;
 177 };
 178 extern struct hatstats hatstat;
 179 #ifdef DEBUG
 180 #define HATSTAT_INC(x)  (++hatstat.x)
 181 #else
 182 #define HATSTAT_INC(x)  (0)
 183 #endif
 184 
 185 #if defined(_KERNEL)
 186 
 187 /*
 188  * Useful macro to align hat_XXX() address arguments to a page boundary
 189  */
 190 #define ALIGN2PAGE(a)           ((uintptr_t)(a) & MMU_PAGEMASK)
 191 #define IS_PAGEALIGNED(a)       (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
 192 
 193 extern uint_t   khat_running;   /* set at end of hat_kern_setup() */
 194 extern cpuset_t khat_cpuset;    /* cpuset for kernal address demap Xcalls */
 195 extern kmutex_t hat_list_lock;
 196 extern kcondvar_t hat_list_cv;


 230 extern void setup_vaddr_for_ppcopy(struct cpu *);
 231 extern void teardown_vaddr_for_ppcopy(struct cpu *);
 232 extern void clear_boot_mappings(uintptr_t, uintptr_t);
 233 
 234 /*
 235  * magic value to indicate that all TLB entries should be demapped.
 236  */
 237 #define DEMAP_ALL_ADDR  (~(uintptr_t)0)
 238 
 239 /*
 240  * not in any include file???
 241  */
 242 extern void halt(char *fmt);
 243 
 244 /*
 245  * x86 specific routines for use online in setup or i86pc/vm files
 246  */
 247 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
 248         caddr_t ekernelheap);
 249 extern void hat_kern_setup(void);

 250 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
 251         x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
 252 extern void hat_init_finish(void);
 253 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
 254 extern pfn_t hat_kpm_va2pfn(caddr_t);
 255 extern page_t *hat_kpm_vaddr2page(caddr_t);
 256 extern uintptr_t hat_kernelbase(uintptr_t);
 257 extern void hat_kmap_init(uintptr_t base, size_t len);
 258 
 259 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
 260 
 261 extern void mmu_calc_user_slots(void);
 262 extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
 263 extern void hat_switch(struct hat *hat);




 264 
 265 #define TLB_RANGE_LEN(r)        ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
 266 
 267 /*
 268  * A range of virtual pages for purposes of demapping.
 269  */
 270 typedef struct tlb_range {
 271         uintptr_t tr_va;        /* address of page */
 272         ulong_t tr_cnt;         /* number of pages in range */
 273         int8_t  tr_level;       /* page table level */
 274 } tlb_range_t;
 275 
 276 #if defined(__xpv)
 277 
 278 #define XPV_DISALLOW_MIGRATE()  xen_block_migrate()
 279 #define XPV_ALLOW_MIGRATE()     xen_allow_migrate()
 280 
 281 #define mmu_flush_tlb_page(va)  mmu_invlpg((caddr_t)va)
 282 #define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va)
 283 
 284 /*
 285  * Interfaces to use around code that maps/unmaps grant table references.
 286  */
 287 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
 288 extern void hat_release_mapping(hat_t *, caddr_t);
 289 



 290 #else
 291 
 292 #define XPV_DISALLOW_MIGRATE()  /* nothing */
 293 #define XPV_ALLOW_MIGRATE()     /* nothing */
 294 
 295 #define pfn_is_foreign(pfn)     __lintzero
 296 
 297 typedef enum flush_tlb_type {
 298         FLUSH_TLB_ALL = 1,
 299         FLUSH_TLB_NONGLOBAL = 2,
 300         FLUSH_TLB_RANGE = 3,
 301 } flush_tlb_type_t;
 302 
 303 extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
 304 extern void mmu_flush_tlb_kpage(uintptr_t);
 305 extern void mmu_flush_tlb_page(uintptr_t);
 306 
 307 extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
 308 
 309 /*
 310  * routines to deal with delayed TLB invalidations for idle CPUs
 311  */
 312 extern void tlb_going_idle(void);
 313 extern void tlb_service(void);
 314 
 315 #endif /* !__xpv */
 316 
 317 #endif  /* _KERNEL */
 318 
 319 #ifdef  __cplusplus
 320 }
 321 #endif
 322 
 323 #endif  /* _VM_HAT_I86_H */