Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/vm/hat_i86.h
+++ new/usr/src/uts/i86pc/vm/hat_i86.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 + * Copyright 2018 Joyent, Inc.
27 28 */
28 29
29 30 #ifndef _VM_HAT_I86_H
30 31 #define _VM_HAT_I86_H
31 32
32 33
33 34 #ifdef __cplusplus
34 35 extern "C" {
35 36 #endif
36 37
37 38 /*
38 39 * VM - Hardware Address Translation management.
39 40 *
40 41 * This file describes the contents of the x86_64 HAT data structures.
41 42 */
42 43 #include <sys/types.h>
43 44 #include <sys/t_lock.h>
44 45 #include <sys/cpuvar.h>
45 46 #include <sys/x_call.h>
46 47 #include <vm/seg.h>
47 48 #include <vm/page.h>
48 49 #include <sys/vmparam.h>
49 50 #include <sys/vm_machparam.h>
50 51 #include <sys/promif.h>
51 52 #include <vm/hat_pte.h>
52 53 #include <vm/htable.h>
53 54 #include <vm/hment.h>
54 55
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
55 56 /*
56 57 * The essential data types involved:
57 58 *
58 59 * htable_t - There is one of these for each page table and it is used
59 60 * by the HAT to manage the page table.
60 61 *
61 62 * hment_t - Links together multiple PTEs to a single page.
62 63 */
63 64
64 65 /*
65 - * VLP processes have a 32 bit address range, so their top level is 2 and
66 - * with only 4 PTEs in that table.
66 + * Maximum number of per-CPU pagetable entries that we'll need to cache in the
67 + * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
68 + * information.
67 69 */
68 -#define VLP_LEVEL (2)
69 -#define VLP_NUM_PTES (4)
70 -#define VLP_SIZE (VLP_NUM_PTES * sizeof (x86pte_t))
71 -#define TOP_LEVEL(h) (((h)->hat_flags & HAT_VLP) ? VLP_LEVEL : mmu.max_level)
72 -#define VLP_COPY(fromptep, toptep) { \
73 - toptep[0] = fromptep[0]; \
74 - toptep[1] = fromptep[1]; \
75 - toptep[2] = fromptep[2]; \
76 - toptep[3] = fromptep[3]; \
77 -}
70 +#if defined(__xpv)
71 +/*
72 + * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
73 + * struct member for it at least to make life easier and not make the member
74 + * conditional.
75 + */
76 +#define MAX_COPIED_PTES 1
77 +#else
78 +/*
79 + * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
80 + */
81 +#define MAX_COPIED_PTES 512
82 +#endif /* __xpv */
78 83
84 +#define TOP_LEVEL(h) (((h)->hat_max_level))
85 +
79 86 /*
80 87 * The hat struct exists for each address space.
81 88 */
82 89 struct hat {
83 90 kmutex_t hat_mutex;
84 91 struct as *hat_as;
85 92 uint_t hat_stats;
86 93 pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1];
87 94 pgcnt_t hat_ism_pgcnt;
88 95 cpuset_t hat_cpus;
89 96 uint16_t hat_flags;
97 + uint8_t hat_max_level; /* top level of this HAT */
98 + uint_t hat_num_copied; /* Actual num of hat_copied_ptes[] */
90 99 htable_t *hat_htable; /* top level htable */
91 100 struct hat *hat_next;
92 101 struct hat *hat_prev;
93 102 uint_t hat_num_hash; /* number of htable hash buckets */
94 103 htable_t **hat_ht_hash; /* htable hash buckets */
95 104 htable_t *hat_ht_cached; /* cached free htables */
96 - x86pte_t hat_vlp_ptes[VLP_NUM_PTES];
105 + x86pte_t hat_copied_ptes[MAX_COPIED_PTES];
97 106 #if defined(__amd64) && defined(__xpv)
98 107 pfn_t hat_user_ptable; /* alt top ptable for user mode */
99 108 #endif
100 109 };
101 110 typedef struct hat hat_t;
102 111
103 112 #define PGCNT_INC(hat, level) \
104 113 atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
105 114 #define PGCNT_DEC(hat, level) \
106 115 atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
107 116
108 117 /*
109 - * Flags for the hat_flags field
118 + * Flags for the hat_flags field. For more information, please see the big
119 + * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
110 120 *
111 121 * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
112 122 * demap()s can be avoided.
113 123 *
114 - * HAT_VLP - indicates a 32 bit process has a virtual address range less than
115 - * the hardware's physical address range. (VLP->Virtual Less-than Physical)
116 - * Note - never used on the hypervisor.
124 + * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
125 + * big comment in hat_i86.c for a description.
117 126 *
127 + * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
128 + *
118 129 * HAT_VICTIM - This is set while a hat is being examined for page table
119 130 * stealing and prevents it from being freed.
120 131 *
121 132 * HAT_SHARED - The hat has exported it's page tables via hat_share()
122 133 *
123 134 * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
135 + *
136 + * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
137 + * not a process).
124 138 */
125 139 #define HAT_FREEING (0x0001)
126 -#define HAT_VLP (0x0002)
127 -#define HAT_VICTIM (0x0004)
128 -#define HAT_SHARED (0x0008)
129 -#define HAT_PINNED (0x0010)
140 +#define HAT_VICTIM (0x0002)
141 +#define HAT_SHARED (0x0004)
142 +#define HAT_PINNED (0x0008)
143 +#define HAT_COPIED (0x0010)
144 +#define HAT_COPIED_32 (0x0020)
145 +#define HAT_PCP (0x0040)
130 146
131 147 /*
132 148 * Additional platform attribute for hat_devload() to force no caching.
133 149 */
134 150 #define HAT_PLAT_NOCACHE (0x100000)
135 151
136 152 /*
137 153 * Simple statistics for the HAT. These are just counters that are
138 154 * atomically incremented. They can be reset directly from the kernel
139 155 * debugger.
140 156 */
141 157 struct hatstats {
142 158 ulong_t hs_reap_attempts;
143 159 ulong_t hs_reaped;
144 160 ulong_t hs_steals;
145 161 ulong_t hs_ptable_allocs;
146 162 ulong_t hs_ptable_frees;
147 163 ulong_t hs_htable_rgets; /* allocs from reserve */
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
148 164 ulong_t hs_htable_rputs; /* putbacks to reserve */
149 165 ulong_t hs_htable_shared; /* number of htables shared */
150 166 ulong_t hs_htable_unshared; /* number of htables unshared */
151 167 ulong_t hs_hm_alloc;
152 168 ulong_t hs_hm_free;
153 169 ulong_t hs_hm_put_reserve;
154 170 ulong_t hs_hm_get_reserve;
155 171 ulong_t hs_hm_steals;
156 172 ulong_t hs_hm_steal_exam;
157 173 ulong_t hs_tlb_inval_delayed;
174 + ulong_t hs_hat_copied64;
175 + ulong_t hs_hat_copied32;
176 + ulong_t hs_hat_normal64;
158 177 };
159 178 extern struct hatstats hatstat;
160 179 #ifdef DEBUG
161 180 #define HATSTAT_INC(x) (++hatstat.x)
162 181 #else
163 182 #define HATSTAT_INC(x) (0)
164 183 #endif
165 184
166 185 #if defined(_KERNEL)
167 186
168 187 /*
169 188 * Useful macro to align hat_XXX() address arguments to a page boundary
170 189 */
171 190 #define ALIGN2PAGE(a) ((uintptr_t)(a) & MMU_PAGEMASK)
172 191 #define IS_PAGEALIGNED(a) (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
173 192
174 193 extern uint_t khat_running; /* set at end of hat_kern_setup() */
175 194 extern cpuset_t khat_cpuset; /* cpuset for kernal address demap Xcalls */
176 195 extern kmutex_t hat_list_lock;
177 196 extern kcondvar_t hat_list_cv;
178 197
179 198
180 199
181 200 /*
182 201 * Interfaces to setup a cpu private mapping (ie. preemption disabled).
183 202 * The attr and flags arguments are the same as for hat_devload().
184 203 * setup() must be called once, then any number of calls to remap(),
185 204 * followed by a final call to release()
186 205 *
187 206 * Used by ppcopy(), page_zero(), the memscrubber, and the kernel debugger.
188 207 */
189 208 typedef paddr_t hat_mempte_t; /* phys addr of PTE */
190 209 extern hat_mempte_t hat_mempte_setup(caddr_t addr);
191 210 extern void hat_mempte_remap(pfn_t, caddr_t, hat_mempte_t,
192 211 uint_t attr, uint_t flags);
193 212 extern void hat_mempte_release(caddr_t addr, hat_mempte_t);
194 213
195 214 /*
196 215 * Interfaces to manage which thread has access to htable and hment reserves.
197 216 * The USE_HAT_RESERVES macro should always be recomputed in full. Its value
198 217 * (due to curthread) can change after any call into kmem/vmem.
199 218 */
200 219 extern uint_t can_steal_post_boot;
201 220 extern uint_t use_boot_reserve;
202 221 #define USE_HAT_RESERVES() \
203 222 (use_boot_reserve || curthread->t_hatdepth > 1 || \
204 223 panicstr != NULL || vmem_is_populator())
205 224
206 225 /*
207 226 * initialization stuff needed by by startup, mp_startup...
208 227 */
209 228 extern void hat_cpu_online(struct cpu *);
210 229 extern void hat_cpu_offline(struct cpu *);
211 230 extern void setup_vaddr_for_ppcopy(struct cpu *);
212 231 extern void teardown_vaddr_for_ppcopy(struct cpu *);
213 232 extern void clear_boot_mappings(uintptr_t, uintptr_t);
214 233
215 234 /*
216 235 * magic value to indicate that all TLB entries should be demapped.
217 236 */
218 237 #define DEMAP_ALL_ADDR (~(uintptr_t)0)
219 238
220 239 /*
↓ open down ↓ |
53 lines elided |
↑ open up ↑ |
221 240 * not in any include file???
222 241 */
223 242 extern void halt(char *fmt);
224 243
225 244 /*
226 245 * x86 specific routines for use online in setup or i86pc/vm files
227 246 */
228 247 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
229 248 caddr_t ekernelheap);
230 249 extern void hat_kern_setup(void);
231 -extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
232 250 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
233 251 x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
234 252 extern void hat_init_finish(void);
235 253 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
236 254 extern pfn_t hat_kpm_va2pfn(caddr_t);
237 255 extern page_t *hat_kpm_vaddr2page(caddr_t);
238 256 extern uintptr_t hat_kernelbase(uintptr_t);
239 257 extern void hat_kmap_init(uintptr_t base, size_t len);
240 258
241 259 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
242 260
243 -#if !defined(__xpv)
244 -/*
245 - * routines to deal with delayed TLB invalidations for idle CPUs
246 - */
247 -extern void tlb_going_idle(void);
248 -extern void tlb_service(void);
249 -#endif
261 +extern void mmu_calc_user_slots(void);
262 +extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
263 +extern void hat_switch(struct hat *hat);
250 264
265 +#define TLB_RANGE_LEN(r) ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
266 +
251 267 /*
252 - * Hat switch function invoked to load a new context into %cr3
268 + * A range of virtual pages for purposes of demapping.
253 269 */
254 -extern void hat_switch(struct hat *hat);
270 +typedef struct tlb_range {
271 + uintptr_t tr_va; /* address of page */
272 + ulong_t tr_cnt; /* number of pages in range */
273 + int8_t tr_level; /* page table level */
274 +} tlb_range_t;
255 275
256 -#ifdef __xpv
276 +#if defined(__xpv)
277 +
278 +#define XPV_DISALLOW_MIGRATE() xen_block_migrate()
279 +#define XPV_ALLOW_MIGRATE() xen_allow_migrate()
280 +
281 +#define mmu_flush_tlb_page(va) mmu_invlpg((caddr_t)va)
282 +#define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va)
283 +
257 284 /*
258 285 * Interfaces to use around code that maps/unmaps grant table references.
259 286 */
260 287 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
261 288 extern void hat_release_mapping(hat_t *, caddr_t);
262 289
263 -#define XPV_DISALLOW_MIGRATE() xen_block_migrate()
264 -#define XPV_ALLOW_MIGRATE() xen_allow_migrate()
265 -
266 290 #else
267 291
268 292 #define XPV_DISALLOW_MIGRATE() /* nothing */
269 293 #define XPV_ALLOW_MIGRATE() /* nothing */
270 294
271 295 #define pfn_is_foreign(pfn) __lintzero
272 296
273 -#endif
297 +typedef enum flush_tlb_type {
298 + FLUSH_TLB_ALL = 1,
299 + FLUSH_TLB_NONGLOBAL = 2,
300 + FLUSH_TLB_RANGE = 3,
301 +} flush_tlb_type_t;
274 302
303 +extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
304 +extern void mmu_flush_tlb_kpage(uintptr_t);
305 +extern void mmu_flush_tlb_page(uintptr_t);
275 306
307 +extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
308 +
309 +/*
310 + * routines to deal with delayed TLB invalidations for idle CPUs
311 + */
312 +extern void tlb_going_idle(void);
313 +extern void tlb_service(void);
314 +
315 +#endif /* !__xpv */
316 +
276 317 #endif /* _KERNEL */
277 318
278 319 #ifdef __cplusplus
279 320 }
280 321 #endif
281 322
282 323 #endif /* _VM_HAT_I86_H */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX