Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
9208 hati_demap_func should take pagesize into account
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Tim Kordas <tim.kordas@joyent.com>
Reviewed by: Yuri Pankov <yuripv@yuripv.net>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/vm/hat_pte.h
+++ new/usr/src/uts/i86pc/vm/hat_pte.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 + * Copyright 2018 Joyent, Inc.
24 25 */
25 26
26 27 #ifndef _VM_HAT_PTE_H
27 28 #define _VM_HAT_PTE_H
28 29
29 30 #ifdef __cplusplus
30 31 extern "C" {
31 32 #endif
32 33
33 34 #include <sys/types.h>
34 35 #include <sys/mach_mmu.h>
35 36
36 37 /*
37 38 * macros to get/set/clear the PTE fields
38 39 */
39 40 #define PTE_SET(p, f) ((p) |= (f))
40 41 #define PTE_CLR(p, f) ((p) &= ~(x86pte_t)(f))
41 42 #define PTE_GET(p, f) ((p) & (f))
42 43
43 44 /*
44 45 * Handy macro to check if a pagetable entry or pointer is valid
45 46 */
46 47 #define PTE_ISVALID(p) PTE_GET(p, PT_VALID)
47 48
48 49 /*
49 50 * Does a PTE map a large page.
50 51 */
51 52 #define PTE_IS_LGPG(p, l) ((l) > 0 && PTE_GET((p), PT_PAGESIZE))
52 53
53 54 /*
54 55 * does this PTE represent a page (not a pointer to another page table)?
55 56 */
56 57 #define PTE_ISPAGE(p, l) \
57 58 (PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
58 59
59 60 /*
60 61 * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
61 62 * On the 64 bit hypervisor we also have to ignore the high order
62 63 * software bits and the global/user bit which are set/cleared
63 64 * capriciously (by the hypervisor!)
64 65 */
65 66 #if defined(__amd64) && defined(__xpv)
66 67 #define PT_IGNORE ((0x7fful << 52) | PT_GLOBAL | PT_USER)
67 68 #else
68 69 #define PT_IGNORE (0)
69 70 #endif
70 71 #define PTE_EQUIV(a, b) (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
71 72 ((b) | (PT_IGNORE | PT_REF | PT_MOD)))
72 73
73 74 /*
74 75 * Shorthand for converting a PTE to it's pfn.
75 76 */
76 77 #define PTE2MFN(p, l) \
77 78 mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
78 79 #ifdef __xpv
79 80 #define PTE2PFN(p, l) pte2pfn(p, l)
80 81 #else
81 82 #define PTE2PFN(p, l) PTE2MFN(p, l)
82 83 #endif
83 84
84 85 #define PT_NX (0x8000000000000000ull)
85 86 #define PT_PADDR (0x000ffffffffff000ull)
86 87 #define PT_PADDR_LGPG (0x000fffffffffe000ull) /* phys addr for large pages */
87 88
88 89 /*
89 90 * Macros to create a PTP or PTE from the pfn and level
90 91 */
91 92 #ifdef __xpv
92 93
93 94 /*
94 95 * we use the highest order bit in physical address pfns to mark foreign mfns
95 96 */
96 97 #ifdef _LP64
97 98 #define PFN_IS_FOREIGN_MFN (1ul << 51)
98 99 #else
99 100 #define PFN_IS_FOREIGN_MFN (1ul << 31)
100 101 #endif
101 102
102 103 #define MAKEPTP(pfn, l) \
103 104 (pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
104 105 #define MAKEPTE(pfn, l) \
105 106 ((pfn & PFN_IS_FOREIGN_MFN) ? \
106 107 ((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
107 108 PT_FOREIGN | PT_REF | PT_MOD) : \
108 109 (pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
109 110 #else
110 111 #define MAKEPTP(pfn, l) \
111 112 (pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
112 113 #define MAKEPTE(pfn, l) \
113 114 (pfn_to_pa(pfn) | mmu.pte_bits[l])
114 115 #endif
115 116
116 117 /*
117 118 * The idea of "level" refers to the level where the page table is used in the
118 119 * the hardware address translation steps. The level values correspond to the
119 120 * following names of tables used in AMD/Intel architecture documents:
120 121 *
121 122 * AMD/INTEL name Level #
122 123 * ---------------------- -------
123 124 * Page Map Level 4 3
124 125 * Page Directory Pointer 2
125 126 * Page Directory 1
126 127 * Page Table 0
127 128 *
128 129 * The numbering scheme is such that the values of 0 and 1 can correspond to
129 130 * the pagesize codes used for MPSS support. For now the Maximum level at
130 131 * which you can have a large page is a constant, that may change in
↓ open down ↓ |
97 lines elided |
↑ open up ↑ |
131 132 * future processors.
132 133 *
133 134 * The type of "level_t" is signed so that it can be used like:
134 135 * level_t l;
135 136 * ...
136 137 * while (--l >= 0)
137 138 * ...
138 139 */
139 140 #define MAX_NUM_LEVEL 4
140 141 #define MAX_PAGE_LEVEL 2
142 +#define MIN_PAGE_LEVEL 0
141 143 typedef int8_t level_t;
142 144 #define LEVEL_SHIFT(l) (mmu.level_shift[l])
143 145 #define LEVEL_SIZE(l) (mmu.level_size[l])
144 146 #define LEVEL_OFFSET(l) (mmu.level_offset[l])
145 147 #define LEVEL_MASK(l) (mmu.level_mask[l])
146 148
147 149 /*
148 150 * Macros to:
149 151 * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
150 152 */
151 153 #define PFN_4G (4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
152 154 #define PFN_64G (64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
153 155 #define PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
154 156 #define PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
155 157
156 158 /*
157 - * The CR3 register holds the physical address of the top level page table.
159 + * The CR3 register holds the physical address of the top level page table,
160 + * along with the current PCID if any.
158 161 */
159 -#define MAKECR3(pfn) mmu_ptob(pfn)
162 +#define MAKECR3(pfn, pcid) (mmu_ptob(pfn) | pcid)
160 163
161 164 /*
162 165 * HAT/MMU parameters that depend on kernel mode and/or processor type
163 166 */
164 167 struct htable;
165 168 struct hat_mmu_info {
166 169 x86pte_t pt_nx; /* either 0 or PT_NX */
167 170 x86pte_t pt_global; /* either 0 or PT_GLOBAL */
168 171
169 172 pfn_t highest_pfn;
170 173
171 174 uint_t num_level; /* number of page table levels in use */
172 175 uint_t max_level; /* just num_level - 1 */
173 176 uint_t max_page_level; /* maximum level at which we can map a page */
174 177 uint_t umax_page_level; /* max user page map level */
175 178 uint_t ptes_per_table; /* # of entries in lower level page tables */
176 - uint_t top_level_count; /* # of entries in top most level page table */
179 + uint_t top_level_count; /* # of entries in top-level page table */
180 + uint_t top_level_uslots; /* # of user slots in top-level page table */
181 + uint_t num_copied_ents; /* # of PCP-copied PTEs to create */
182 + /* 32-bit versions of values */
183 + uint_t top_level_uslots32;
184 + uint_t max_level32;
185 + uint_t num_copied_ents32;
177 186
178 - uint_t hash_cnt; /* cnt of entries in htable_hash_cache */
179 - uint_t vlp_hash_cnt; /* cnt of entries in vlp htable_hash_cache */
187 + uint_t hash_cnt; /* cnt of entries in htable_hash_cache */
188 + uint_t hat32_hash_cnt; /* cnt of entries in 32-bit htable_hash_cache */
180 189
181 190 uint_t pae_hat; /* either 0 or 1 */
182 191
183 192 uintptr_t hole_start; /* start of VA hole (or -1 if none) */
184 193 uintptr_t hole_end; /* end of VA hole (or 0 if none) */
185 194
186 195 struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
187 196 x86pte_t *kmap_ptes; /* mapping of pagetables that map kmap */
188 197 uintptr_t kmap_addr; /* start addr of kmap */
189 198 uintptr_t kmap_eaddr; /* end addr of kmap */
190 199
191 200 uint_t pte_size; /* either 4 or 8 */
192 201 uint_t pte_size_shift; /* either 2 or 3 */
193 202 x86pte_t ptp_bits[MAX_NUM_LEVEL]; /* bits set for interior PTP */
194 203 x86pte_t pte_bits[MAX_NUM_LEVEL]; /* bits set for leaf PTE */
195 204
196 205 /*
197 206 * A range of VA used to window pages in the i86pc/vm code.
198 207 * See PWIN_XXX macros.
199 208 */
200 209 caddr_t pwin_base;
201 210 caddr_t pwin_pte_va;
202 211 paddr_t pwin_pte_pa;
203 212
204 213 /*
205 214 * The following tables are equivalent to PAGEXXXXX at different levels
206 215 * in the page table hierarchy.
207 216 */
208 217 uint_t level_shift[MAX_NUM_LEVEL]; /* PAGESHIFT for given level */
209 218 uintptr_t level_size[MAX_NUM_LEVEL]; /* PAGESIZE for given level */
210 219 uintptr_t level_offset[MAX_NUM_LEVEL]; /* PAGEOFFSET for given level */
211 220 uintptr_t level_mask[MAX_NUM_LEVEL]; /* PAGEMASK for given level */
212 221 };
213 222
214 223
215 224 #if defined(_KERNEL)
216 225
217 226 /*
218 227 * Macros to access the HAT's private page windows. They're used for
219 228 * accessing pagetables, ppcopy() and page_zero().
220 229 * The 1st two macros are used to get an index for the particular use.
221 230 * The next three give you:
222 231 * - the virtual address of the window
223 232 * - the virtual address of the pte that maps the window
224 233 * - the physical address of the pte that map the window
225 234 */
226 235 #define PWIN_TABLE(cpuid) ((cpuid) * 2)
227 236 #define PWIN_SRC(cpuid) ((cpuid) * 2 + 1) /* for x86pte_copy() */
228 237 #define PWIN_VA(x) (mmu.pwin_base + ((x) << MMU_PAGESHIFT))
229 238 #define PWIN_PTE_VA(x) (mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
230 239 #define PWIN_PTE_PA(x) (mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))
231 240
232 241 /*
233 242 * The concept of a VA hole exists in AMD64. This might need to be made
234 243 * model specific eventually.
235 244 *
236 245 * In the 64 bit kernel PTE loads are atomic, but need atomic_cas_64 on 32
237 246 * bit kernel.
238 247 */
239 248 #if defined(__amd64)
240 249
241 250 #ifdef lint
242 251 #define IN_VA_HOLE(va) (__lintzero)
243 252 #else
244 253 #define IN_VA_HOLE(va) (mmu.hole_start <= (va) && (va) < mmu.hole_end)
245 254 #endif
246 255
247 256 #define FMT_PTE "0x%lx"
248 257 #define GET_PTE(ptr) (*(x86pte_t *)(ptr))
249 258 #define SET_PTE(ptr, pte) (*(x86pte_t *)(ptr) = pte)
250 259 #define CAS_PTE(ptr, x, y) atomic_cas_64(ptr, x, y)
251 260
252 261 #elif defined(__i386)
253 262
254 263 #define IN_VA_HOLE(va) (__lintzero)
255 264
256 265 #define FMT_PTE "0x%llx"
257 266
258 267 /* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
259 268 extern x86pte_t get_pte64(x86pte_t *ptr);
260 269 #define GET_PTE(ptr) (mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
261 270 #define SET_PTE(ptr, pte) \
262 271 ((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0), \
263 272 *(x86pte32_t *)(ptr) = pte)
264 273 #define CAS_PTE(ptr, x, y) \
265 274 (mmu.pae_hat ? atomic_cas_64(ptr, x, y) : \
266 275 atomic_cas_32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))
267 276
268 277 #endif /* __i386 */
269 278
270 279 /*
271 280 * Return a pointer to the pte entry at the given index within a page table.
272 281 */
273 282 #define PT_INDEX_PTR(p, x) \
274 283 ((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))
275 284
276 285 /*
277 286 * Return the physical address of the pte entry at the given index within a
278 287 * page table.
279 288 */
280 289 #define PT_INDEX_PHYSADDR(p, x) \
281 290 ((paddr_t)(p) + ((x) << mmu.pte_size_shift))
282 291
283 292 /*
284 293 * From pfn to bytes, careful not to lose bits on PAE.
285 294 */
286 295 #define pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))
287 296
288 297 #ifdef __xpv
289 298 extern pfn_t pte2pfn(x86pte_t, level_t);
290 299 #endif
291 300
292 301 extern struct hat_mmu_info mmu;
293 302
294 303 #endif /* _KERNEL */
295 304
296 305
297 306 #ifdef __cplusplus
298 307 }
299 308 #endif
300 309
301 310 #endif /* _VM_HAT_PTE_H */
↓ open down ↓ |
112 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX