Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/vm/htable.h
+++ new/usr/src/uts/i86pc/vm/htable.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25 /*
26 26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 + * Copyright 2018 Joyent, Inc.
27 28 */
28 29
29 30 #ifndef _VM_HTABLE_H
30 31 #define _VM_HTABLE_H
31 32
32 33 #ifdef __cplusplus
33 34 extern "C" {
34 35 #endif
35 36
36 37 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
37 38 #include <asm/htable.h>
38 39 #endif
39 40
40 41 extern void atomic_andb(uint8_t *addr, uint8_t value);
41 42 extern void atomic_orb(uint8_t *addr, uint8_t value);
42 43 extern void atomic_inc16(uint16_t *addr);
43 44 extern void atomic_dec16(uint16_t *addr);
44 -extern void mmu_tlbflush_entry(caddr_t addr);
45 45
46 46 /*
47 47 * Each hardware page table has an htable_t describing it.
48 48 *
49 49 * We use a reference counter mechanism to detect when we can free an htable.
50 50 * In the implmentation the reference count is split into 2 separate counters:
51 51 *
52 52 * ht_busy is a traditional reference count of uses of the htable pointer
53 53 *
54 54 * ht_valid_cnt is a count of how references are implied by valid PTE/PTP
55 55 * entries in the pagetable
56 56 *
57 57 * ht_busy is only incremented by htable_lookup() or htable_create()
58 58 * while holding the appropriate hash_table mutex. While installing a new
59 59 * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
60 60 * done an htable_lookup() or htable_create() but not the htable_release yet.
61 61 *
62 62 * htable_release(), while holding the mutex, can know that if
63 63 * busy == 1 and valid_cnt == 0, the htable can be free'd.
64 64 *
65 65 * The fields have been ordered to make htable_lookup() fast. Hence,
66 66 * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together.
67 67 */
68 68 struct htable {
69 69 struct htable *ht_next; /* forward link for hash table */
70 70 struct hat *ht_hat; /* hat this mapping comes from */
71 71 uintptr_t ht_vaddr; /* virt addr at start of this table */
72 72 int8_t ht_level; /* page table level: 0=4K, 1=2M, ... */
73 73 uint8_t ht_flags; /* see below */
74 74 int16_t ht_busy; /* implements locking protocol */
75 75 int16_t ht_valid_cnt; /* # of valid entries in this table */
76 76 uint32_t ht_lock_cnt; /* # of locked entries in this table */
77 77 /* never used for kernel hat */
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
78 78 pfn_t ht_pfn; /* pfn of page of the pagetable */
79 79 struct htable *ht_prev; /* backward link for hash table */
80 80 struct htable *ht_parent; /* htable that points to this htable */
81 81 struct htable *ht_shares; /* for HTABLE_SHARED_PFN only */
82 82 };
83 83 typedef struct htable htable_t;
84 84
85 85 /*
86 86 * Flags values for htable ht_flags field:
87 87 *
88 - * HTABLE_VLP - this is the top level htable of a VLP HAT.
88 + * HTABLE_COPIED - This is the top level htable of a HAT being used with per-CPU
89 + * pagetables.
89 90 *
90 91 * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
91 92 * htable. Used by hat_share() for ISM.
92 93 */
93 -#define HTABLE_VLP (0x01)
94 +#define HTABLE_COPIED (0x01)
94 95 #define HTABLE_SHARED_PFN (0x02)
95 96
96 97 /*
97 98 * The htable hash table hashing function. The 28 is so that high
98 99 * order bits are include in the hash index to skew the wrap
99 100 * around of addresses. Even though the hash buckets are stored per
100 101 * hat we include the value of hat pointer in the hash function so
101 102 * that the secondary hash for the htable mutex winds up begin different in
102 103 * every address space.
103 104 */
104 105 #define HTABLE_HASH(hat, va, lvl) \
105 106 ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) + \
106 107 ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
107 108
108 109 /*
109 - * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.
110 + * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info. For more
111 + * information on its use and members, see uts/i86pc/vm/hat_i86.c.
110 112 */
111 113 struct hat_cpu_info {
112 114 kmutex_t hci_mutex; /* mutex to ensure sequential usage */
113 115 #if defined(__amd64)
114 - pfn_t hci_vlp_pfn; /* pfn of hci_vlp_l3ptes */
115 - x86pte_t *hci_vlp_l3ptes; /* VLP Level==3 pagetable (top) */
116 - x86pte_t *hci_vlp_l2ptes; /* VLP Level==2 pagetable */
116 + pfn_t hci_pcp_l3pfn; /* pfn of hci_pcp_l3ptes */
117 + pfn_t hci_pcp_l2pfn; /* pfn of hci_pcp_l2ptes */
118 + x86pte_t *hci_pcp_l3ptes; /* PCP Level==3 pagetable (top) */
119 + x86pte_t *hci_pcp_l2ptes; /* PCP Level==2 pagetable */
120 + struct hat *hci_user_hat; /* CPU specific HAT */
121 + pfn_t hci_user_l3pfn; /* pfn of hci_user_l3ptes */
122 + x86pte_t *hci_user_l3ptes; /* PCP User L3 pagetable */
117 123 #endif /* __amd64 */
118 124 };
119 125
120 126
121 127 /*
122 128 * Compute the last page aligned VA mapped by an htable.
123 129 *
124 130 * Given a va and a level, compute the virtual address of the start of the
125 131 * next page at that level.
126 132 *
127 133 * XX64 - The check for the VA hole needs to be better generalized.
128 134 */
129 135 #if defined(__amd64)
130 -#define HTABLE_NUM_PTES(ht) (((ht)->ht_flags & HTABLE_VLP) ? 4 : 512)
136 +#define HTABLE_NUM_PTES(ht) (((ht)->ht_flags & HTABLE_COPIED) ? \
137 + (((ht)->ht_level == mmu.max_level) ? 512 : 4) : 512)
131 138
132 139 #define HTABLE_LAST_PAGE(ht) \
133 140 ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
134 141 ((ht)->ht_vaddr - MMU_PAGESIZE + \
135 142 ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
136 143
137 144 #define NEXT_ENTRY_VA(va, l) \
138 145 ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ? \
139 146 mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
140 147
141 148 #elif defined(__i386)
142 149
143 150 #define HTABLE_NUM_PTES(ht) \
144 151 (!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512))
145 152
146 153 #define HTABLE_LAST_PAGE(ht) ((ht)->ht_vaddr - MMU_PAGESIZE + \
147 154 ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
148 155
149 156 #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
150 157
151 158 #endif
152 159
153 160 #if defined(_KERNEL)
154 161
155 162 /*
156 163 * initialization function called from hat_init()
157 164 */
158 165 extern void htable_init(void);
159 166
160 167 /*
161 168 * Functions to lookup, or "lookup and create", the htable corresponding
162 169 * to the virtual address "vaddr" in the "hat" at the given "level" of
163 170 * page tables. htable_lookup() may return NULL if no such entry exists.
164 171 *
165 172 * On return the given htable is marked busy (a shared lock) - this prevents
166 173 * the htable from being stolen or freed) until htable_release() is called.
167 174 *
168 175 * If kalloc_flag is set on an htable_create() we can't call kmem allocation
169 176 * routines for this htable, since it's for the kernel hat itself.
170 177 *
171 178 * htable_acquire() is used when an htable pointer has been extracted from
172 179 * an hment and we need to get a reference to the htable.
173 180 */
174 181 extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level);
175 182 extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level,
176 183 htable_t *shared);
177 184 extern void htable_acquire(htable_t *);
178 185
179 186 extern void htable_release(htable_t *ht);
180 187 extern void htable_destroy(htable_t *ht);
181 188
182 189 /*
183 190 * Code to free all remaining htables for a hat. Called after the hat is no
184 191 * longer in use by any thread.
185 192 */
186 193 extern void htable_purge_hat(struct hat *hat);
187 194
188 195 /*
189 196 * Find the htable, page table entry index, and PTE of the given virtual
190 197 * address. If not found returns NULL. When found, returns the htable_t *,
191 198 * sets entry, and has a hold on the htable.
192 199 */
193 200 extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *,
194 201 level_t);
195 202
196 203 /*
197 204 * Similar to hat_getpte(), except that this only succeeds if a valid
198 205 * page mapping is present.
199 206 */
200 207 extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry);
201 208
202 209 /*
203 210 * Called to allocate initial/additional htables for reserve.
204 211 */
205 212 extern void htable_initial_reserve(uint_t);
206 213 extern void htable_reserve(uint_t);
207 214
208 215 /*
209 216 * Used to readjust the htable reserve after the reserve list has been used.
210 217 * Also called after boot to release left over boot reserves.
211 218 */
212 219 extern void htable_adjust_reserve(void);
213 220
214 221 /*
215 222 * return number of bytes mapped by all the htables in a given hat
216 223 */
217 224 extern size_t htable_mapped(struct hat *);
218 225
219 226
220 227 /*
221 228 * Attach initial pagetables as htables
222 229 */
223 230 extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *,
224 231 pfn_t);
225 232
226 233 /*
227 234 * Routine to find the next populated htable at or above a given virtual
228 235 * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate
229 236 * that it should search the entire address space. Similar to
230 237 * hat_getpte(), but used for walking through address ranges. It can be
231 238 * used like this:
232 239 *
233 240 * va = ...
234 241 * ht = NULL;
235 242 * while (va < end_va) {
236 243 * pte = htable_walk(hat, &ht, &va, end_va);
237 244 * if (!pte)
238 245 * break;
239 246 *
240 247 * ... code to operate on page at va ...
241 248 *
242 249 * va += LEVEL_SIZE(ht->ht_level);
243 250 * }
244 251 * if (ht)
245 252 * htable_release(ht);
246 253 *
247 254 */
248 255 extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va,
249 256 uintptr_t eaddr);
250 257
251 258 #define HTABLE_WALK_TO_END ((uintptr_t)-1)
252 259
253 260 /*
254 261 * Utilities convert between virtual addresses and page table entry indeces.
255 262 */
256 263 extern uint_t htable_va2entry(uintptr_t va, htable_t *ht);
257 264 extern uintptr_t htable_e2va(htable_t *ht, uint_t entry);
258 265
259 266 /*
260 267 * Interfaces that provide access to page table entries via the htable.
261 268 *
262 269 * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic.
263 270 */
264 271 extern void x86pte_cpu_init(cpu_t *);
265 272 extern void x86pte_cpu_fini(cpu_t *);
266 273
267 274 extern x86pte_t x86pte_get(htable_t *, uint_t entry);
268 275
269 276 /*
270 277 * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table
271 278 * link with a large page mapping.
272 279 */
273 280 #define LPAGE_ERROR (-(x86pte_t)1)
274 281 extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
275 282
276 283 extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
277 284 x86pte_t old, x86pte_t *ptr, boolean_t tlb);
278 285
279 286 extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
280 287 x86pte_t old, x86pte_t new);
281 288
282 289 extern void x86pte_copy(htable_t *src, htable_t *dest, uint_t entry,
283 290 uint_t cnt);
284 291
285 292 /*
286 293 * access to a pagetable knowing only the pfn
287 294 */
288 295 extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *);
289 296 extern void x86pte_mapout(void);
290 297
291 298 /*
292 299 * these are actually inlines for "lock; incw", "lock; decw", etc. instructions.
293 300 */
294 301 #define HTABLE_INC(x) atomic_inc16((uint16_t *)&x)
295 302 #define HTABLE_DEC(x) atomic_dec16((uint16_t *)&x)
296 303 #define HTABLE_LOCK_INC(ht) atomic_inc_32(&(ht)->ht_lock_cnt)
297 304 #define HTABLE_LOCK_DEC(ht) atomic_dec_32(&(ht)->ht_lock_cnt)
298 305
299 306 #ifdef __xpv
300 307 extern void xen_flush_va(caddr_t va);
301 308 extern void xen_gflush_va(caddr_t va, cpuset_t);
302 309 extern void xen_flush_tlb(void);
303 310 extern void xen_gflush_tlb(cpuset_t);
304 311 extern void xen_pin(pfn_t, level_t);
305 312 extern void xen_unpin(pfn_t);
306 313 extern int xen_kpm_page(pfn_t, uint_t);
307 314
308 315 /*
309 316 * The hypervisor maps all page tables into our address space read-only.
310 317 * Under normal circumstances, the hypervisor then handles all updates to
311 318 * the page tables underneath the covers for us. However, when we are
312 319 * trying to dump core after a hypervisor panic, the hypervisor is no
313 320 * longer available to do these updates. To work around the protection
314 321 * problem, we simply disable write-protect checking for the duration of a
315 322 * pagetable update operation.
316 323 */
317 324 #define XPV_ALLOW_PAGETABLE_UPDATES() \
318 325 { \
319 326 if (IN_XPV_PANIC()) \
320 327 setcr0((getcr0() & ~CR0_WP) & 0xffffffff); \
321 328 }
322 329 #define XPV_DISALLOW_PAGETABLE_UPDATES() \
323 330 { \
324 331 if (IN_XPV_PANIC() > 0) \
325 332 setcr0((getcr0() | CR0_WP) & 0xffffffff); \
326 333 }
327 334
328 335 #else /* __xpv */
329 336
330 337 #define XPV_ALLOW_PAGETABLE_UPDATES()
331 338 #define XPV_DISALLOW_PAGETABLE_UPDATES()
332 339
333 340 #endif
334 341
335 342 #endif /* _KERNEL */
336 343
337 344
338 345 #ifdef __cplusplus
339 346 }
340 347 #endif
341 348
342 349 #endif /* _VM_HTABLE_H */
↓ open down ↓ |
202 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX