Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/vm/kboot_mmu.c
+++ new/usr/src/uts/i86pc/vm/kboot_mmu.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 + *
26 + * Copyright 2018 Joyent, Inc.
25 27 */
26 28
27 29 #include <sys/types.h>
28 30 #include <sys/systm.h>
29 31 #include <sys/archsystm.h>
30 32 #include <sys/debug.h>
31 33 #include <sys/bootconf.h>
32 34 #include <sys/bootsvcs.h>
33 35 #include <sys/bootinfo.h>
34 36 #include <sys/mman.h>
35 37 #include <sys/cmn_err.h>
36 38 #include <sys/param.h>
37 39 #include <sys/machparam.h>
38 40 #include <sys/machsystm.h>
39 41 #include <sys/promif.h>
40 42 #include <sys/kobj.h>
41 43 #ifdef __xpv
42 44 #include <sys/hypervisor.h>
43 45 #endif
44 46 #include <vm/kboot_mmu.h>
45 47 #include <vm/hat_pte.h>
46 48 #include <vm/hat_i86.h>
47 49 #include <vm/seg_kmem.h>
48 50
49 51 #if 0
50 52 /*
51 53 * Joe's debug printing
52 54 */
53 55 #define DBG(x) \
54 56 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
55 57 #else
56 58 #define DBG(x) /* naught */
57 59 #endif
58 60
59 61 /*
60 62 * Page table and memory stuff.
61 63 */
62 64 static caddr_t window;
63 65 static caddr_t pte_to_window;
64 66
65 67 /*
66 68 * this are needed by mmu_init()
67 69 */
68 70 int kbm_nx_support = 0; /* NX bit in PTEs is in use */
69 71 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */
70 72 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */
71 73 int kbm_largepage_support = 0;
72 74 uint_t kbm_nucleus_size = 0;
73 75
74 76 #define BOOT_SHIFT(l) (shift_amt[l])
75 77 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l))
76 78 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1)
77 79 #define BOOT_MASK(l) (~BOOT_OFFSET(l))
78 80
79 81 /*
80 82 * Initialize memory management parameters for boot time page table management
81 83 */
82 84 void
83 85 kbm_init(struct xboot_info *bi)
84 86 {
85 87 /*
86 88 * configure mmu information
87 89 */
88 90 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
89 91 kbm_largepage_support = bi->bi_use_largepage;
90 92 kbm_nx_support = bi->bi_use_nx;
91 93 kbm_pae_support = bi->bi_use_pae;
92 94 kbm_pge_support = bi->bi_use_pge;
93 95 window = bi->bi_pt_window;
94 96 DBG(window);
95 97 pte_to_window = bi->bi_pte_to_pt_window;
96 98 DBG(pte_to_window);
97 99 if (kbm_pae_support) {
98 100 shift_amt = shift_amt_pae;
99 101 ptes_per_table = 512;
100 102 pte_size = 8;
101 103 lpagesize = TWO_MEG;
102 104 #ifdef __amd64
103 105 top_level = 3;
104 106 #else
105 107 top_level = 2;
106 108 #endif
107 109 } else {
108 110 shift_amt = shift_amt_nopae;
109 111 ptes_per_table = 1024;
110 112 pte_size = 4;
111 113 lpagesize = FOUR_MEG;
112 114 top_level = 1;
113 115 }
114 116
115 117 #ifdef __xpv
116 118 xen_info = bi->bi_xen_start_info;
117 119 mfn_list = (mfn_t *)xen_info->mfn_list;
118 120 DBG(mfn_list);
119 121 mfn_count = xen_info->nr_pages;
120 122 DBG(mfn_count);
121 123 #endif
122 124 top_page_table = bi->bi_top_page_table;
123 125 DBG(top_page_table);
124 126 }
125 127
126 128 /*
127 129 * Change the addressible page table window to point at a given page
128 130 */
129 131 /*ARGSUSED*/
130 132 void *
131 133 kbm_remap_window(paddr_t physaddr, int writeable)
132 134 {
133 135 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
134 136
135 137 DBG(physaddr);
136 138
137 139 #ifdef __xpv
↓ open down ↓ |
103 lines elided |
↑ open up ↑ |
138 140 if (!writeable)
139 141 pt_bits &= ~PT_WRITABLE;
140 142 if (HYPERVISOR_update_va_mapping((uintptr_t)window,
141 143 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
142 144 bop_panic("HYPERVISOR_update_va_mapping() failed");
143 145 #else
144 146 if (kbm_pae_support)
145 147 *((x86pte_t *)pte_to_window) = physaddr | pt_bits;
146 148 else
147 149 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
148 - mmu_tlbflush_entry(window);
150 + mmu_invlpg(window);
149 151 #endif
150 152 DBG(window);
151 153 return (window);
152 154 }
153 155
154 156 /*
155 157 * Add a mapping for the physical page at the given virtual address.
156 158 */
157 159 void
158 160 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
159 161 {
160 162 x86pte_t *ptep;
161 163 paddr_t pte_physaddr;
162 164 x86pte_t pteval;
163 165
164 166 if (khat_running)
165 167 panic("kbm_map() called too late");
166 168
167 169 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
168 170 if (level >= 1)
169 171 pteval |= PT_PAGESIZE;
170 172 if (kbm_pge_support && is_kernel)
171 173 pteval |= PT_GLOBAL;
172 174
173 175 #ifdef __xpv
174 176 /*
175 177 * try update_va_mapping first - fails if page table is missing.
176 178 */
177 179 if (HYPERVISOR_update_va_mapping(va, pteval,
178 180 UVMF_INVLPG | UVMF_LOCAL) == 0)
179 181 return;
180 182 #endif
181 183
182 184 /*
183 185 * Find the pte that will map this address. This creates any
184 186 * missing intermediate level page tables.
185 187 */
186 188 ptep = find_pte(va, &pte_physaddr, level, 0);
187 189 if (ptep == NULL)
↓ open down ↓ |
29 lines elided |
↑ open up ↑ |
188 190 bop_panic("kbm_map: find_pte returned NULL");
189 191
190 192 #ifdef __xpv
191 193 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
192 194 bop_panic("HYPERVISOR_update_va_mapping() failed");
193 195 #else
194 196 if (kbm_pae_support)
195 197 *ptep = pteval;
196 198 else
197 199 *((x86pte32_t *)ptep) = pteval;
198 - mmu_tlbflush_entry((caddr_t)va);
200 + mmu_invlpg((caddr_t)va);
199 201 #endif
200 202 }
201 203
202 204 #ifdef __xpv
203 205
204 206 /*
205 207 * Add a mapping for the machine page at the given virtual address.
206 208 */
207 209 void
208 210 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
209 211 {
210 212 paddr_t pte_physaddr;
211 213 x86pte_t pteval;
212 214
213 215 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
214 216 if (level == 1)
215 217 pteval |= PT_PAGESIZE;
216 218
217 219 /*
218 220 * try update_va_mapping first - fails if page table is missing.
219 221 */
220 222 if (HYPERVISOR_update_va_mapping(va,
221 223 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
222 224 return;
223 225
224 226 /*
225 227 * Find the pte that will map this address. This creates any
226 228 * missing intermediate level page tables
227 229 */
228 230 (void) find_pte(va, &pte_physaddr, level, 0);
229 231
230 232 if (HYPERVISOR_update_va_mapping(va,
231 233 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
232 234 bop_panic("HYPERVISOR_update_va_mapping failed");
233 235 }
234 236
235 237 #endif /* __xpv */
236 238
237 239
238 240 /*
239 241 * Probe the boot time page tables to find the first mapping
240 242 * including va (or higher) and return non-zero if one is found.
241 243 * va is updated to the starting address and len to the pagesize.
242 244 * pp will be set to point to the 1st page_t of the mapped page(s).
243 245 *
244 246 * Note that if va is in the middle of a large page, the returned va
245 247 * will be less than what was asked for.
246 248 */
247 249 int
248 250 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
249 251 {
250 252 uintptr_t probe_va;
251 253 x86pte_t *ptep;
252 254 paddr_t pte_physaddr;
253 255 x86pte_t pte_val;
254 256 level_t l;
255 257
256 258 if (khat_running)
257 259 panic("kbm_probe() called too late");
258 260 *len = 0;
259 261 *pfn = PFN_INVALID;
260 262 *prot = 0;
261 263 probe_va = *va;
262 264 restart_new_va:
263 265 l = top_level;
264 266 for (;;) {
265 267 if (IN_VA_HOLE(probe_va))
266 268 probe_va = mmu.hole_end;
267 269
268 270 if (IN_HYPERVISOR_VA(probe_va))
269 271 #if defined(__amd64) && defined(__xpv)
270 272 probe_va = HYPERVISOR_VIRT_END;
271 273 #else
272 274 return (0);
273 275 #endif
274 276
275 277 /*
276 278 * If we don't have a valid PTP/PTE at this level
277 279 * then we can bump VA by this level's pagesize and try again.
278 280 * When the probe_va wraps around, we are done.
279 281 */
280 282 ptep = find_pte(probe_va, &pte_physaddr, l, 1);
281 283 if (ptep == NULL)
282 284 bop_panic("kbm_probe: find_pte returned NULL");
283 285 if (kbm_pae_support)
284 286 pte_val = *ptep;
285 287 else
286 288 pte_val = *((x86pte32_t *)ptep);
287 289 if (!PTE_ISVALID(pte_val)) {
288 290 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
289 291 if (probe_va <= *va)
290 292 return (0);
291 293 goto restart_new_va;
292 294 }
293 295
294 296 /*
295 297 * If this entry is a pointer to a lower level page table
296 298 * go down to it.
297 299 */
298 300 if (!PTE_ISPAGE(pte_val, l)) {
299 301 ASSERT(l > 0);
300 302 --l;
301 303 continue;
302 304 }
303 305
304 306 /*
305 307 * We found a boot level page table entry
306 308 */
307 309 *len = BOOT_SZ(l);
308 310 *va = probe_va & ~(*len - 1);
309 311 *pfn = PTE2PFN(pte_val, l);
310 312
311 313
312 314 *prot = PROT_READ | PROT_EXEC;
313 315 if (PTE_GET(pte_val, PT_WRITABLE))
314 316 *prot |= PROT_WRITE;
315 317
316 318 /*
317 319 * pt_nx is cleared if processor doesn't support NX bit
318 320 */
319 321 if (PTE_GET(pte_val, mmu.pt_nx))
320 322 *prot &= ~PROT_EXEC;
321 323
322 324 return (1);
323 325 }
324 326 }
325 327
326 328
327 329 /*
328 330 * Destroy a boot loader page table 4K mapping.
329 331 */
330 332 void
331 333 kbm_unmap(uintptr_t va)
332 334 {
333 335 if (khat_running)
334 336 panic("kbm_unmap() called too late");
335 337 else {
336 338 #ifdef __xpv
337 339 (void) HYPERVISOR_update_va_mapping(va, 0,
338 340 UVMF_INVLPG | UVMF_LOCAL);
339 341 #else
340 342 x86pte_t *ptep;
341 343 level_t level = 0;
↓ open down ↓ |
133 lines elided |
↑ open up ↑ |
342 344 uint_t probe_only = 1;
343 345
344 346 ptep = find_pte(va, NULL, level, probe_only);
345 347 if (ptep == NULL)
346 348 return;
347 349
348 350 if (kbm_pae_support)
349 351 *ptep = 0;
350 352 else
351 353 *((x86pte32_t *)ptep) = 0;
352 - mmu_tlbflush_entry((caddr_t)va);
354 + mmu_invlpg((caddr_t)va);
353 355 #endif
354 356 }
355 357 }
356 358
357 359
358 360 /*
359 361 * Change a boot loader page table 4K mapping.
360 362 * Returns the pfn of the old mapping.
361 363 */
362 364 pfn_t
363 365 kbm_remap(uintptr_t va, pfn_t pfn)
364 366 {
365 367 x86pte_t *ptep;
366 368 level_t level = 0;
367 369 uint_t probe_only = 1;
368 370 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
369 371 PT_NOCONSIST | PT_VALID;
370 372 x86pte_t old_pte;
371 373
372 374 if (khat_running)
373 375 panic("kbm_remap() called too late");
374 376 ptep = find_pte(va, NULL, level, probe_only);
375 377 if (ptep == NULL)
376 378 bop_panic("kbm_remap: find_pte returned NULL");
377 379
378 380 if (kbm_pae_support)
379 381 old_pte = *ptep;
380 382 else
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
381 383 old_pte = *((x86pte32_t *)ptep);
382 384
383 385 #ifdef __xpv
384 386 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
385 387 bop_panic("HYPERVISOR_update_va_mapping() failed");
386 388 #else
387 389 if (kbm_pae_support)
388 390 *((x86pte_t *)ptep) = pte_val;
389 391 else
390 392 *((x86pte32_t *)ptep) = pte_val;
391 - mmu_tlbflush_entry((caddr_t)va);
393 + mmu_invlpg((caddr_t)va);
392 394 #endif
393 395
394 396 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
395 397 return (PFN_INVALID);
396 398 return (mmu_btop(ma_to_pa(old_pte)));
397 399 }
398 400
399 401
400 402 /*
401 403 * Change a boot loader page table 4K mapping to read only.
402 404 */
403 405 void
404 406 kbm_read_only(uintptr_t va, paddr_t pa)
405 407 {
406 408 x86pte_t pte_val = pa_to_ma(pa) |
407 409 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
408 410
409 411 #ifdef __xpv
410 412 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
411 413 bop_panic("HYPERVISOR_update_va_mapping() failed");
412 414 #else
413 415 x86pte_t *ptep;
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
414 416 level_t level = 0;
415 417
416 418 ptep = find_pte(va, NULL, level, 0);
417 419 if (ptep == NULL)
418 420 bop_panic("kbm_read_only: find_pte returned NULL");
419 421
420 422 if (kbm_pae_support)
421 423 *ptep = pte_val;
422 424 else
423 425 *((x86pte32_t *)ptep) = pte_val;
424 - mmu_tlbflush_entry((caddr_t)va);
426 + mmu_invlpg((caddr_t)va);
425 427 #endif
426 428 }
427 429
428 430 /*
429 431 * interfaces for kernel debugger to access physical memory
430 432 */
431 433 static x86pte_t save_pte;
432 434
433 435 void *
434 436 kbm_push(paddr_t pa)
435 437 {
436 438 static int first_time = 1;
437 439
438 440 if (first_time) {
439 441 first_time = 0;
440 442 return (window);
441 443 }
442 444
443 445 if (kbm_pae_support)
444 446 save_pte = *((x86pte_t *)pte_to_window);
445 447 else
446 448 save_pte = *((x86pte32_t *)pte_to_window);
447 449 return (kbm_remap_window(pa, 0));
448 450 }
449 451
450 452 void
451 453 kbm_pop(void)
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
452 454 {
453 455 #ifdef __xpv
454 456 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
455 457 UVMF_INVLPG | UVMF_LOCAL) < 0)
456 458 bop_panic("HYPERVISOR_update_va_mapping() failed");
457 459 #else
458 460 if (kbm_pae_support)
459 461 *((x86pte_t *)pte_to_window) = save_pte;
460 462 else
461 463 *((x86pte32_t *)pte_to_window) = save_pte;
462 - mmu_tlbflush_entry(window);
464 + mmu_invlpg(window);
463 465 #endif
464 466 }
465 467
466 468 x86pte_t
467 469 get_pteval(paddr_t table, uint_t index)
468 470 {
469 471 void *table_ptr = kbm_remap_window(table, 0);
470 472
471 473 if (kbm_pae_support)
472 474 return (((x86pte_t *)table_ptr)[index]);
473 475 return (((x86pte32_t *)table_ptr)[index]);
474 476 }
475 477
476 478 #ifndef __xpv
477 479 void
478 480 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
479 481 {
480 482 void *table_ptr = kbm_remap_window(table, 0);
481 483 if (kbm_pae_support)
482 484 ((x86pte_t *)table_ptr)[index] = pteval;
483 485 else
484 486 ((x86pte32_t *)table_ptr)[index] = pteval;
485 487 if (level == top_level && level == 2)
486 488 reload_cr3();
487 489 }
488 490 #endif
489 491
490 492 paddr_t
491 493 make_ptable(x86pte_t *pteval, uint_t level)
492 494 {
493 495 paddr_t new_table;
494 496 void *table_ptr;
495 497
496 498 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
497 499 table_ptr = kbm_remap_window(new_table, 1);
498 500 bzero(table_ptr, MMU_PAGESIZE);
499 501 #ifdef __xpv
500 502 /* Remove write permission to the new page table. */
501 503 (void) kbm_remap_window(new_table, 0);
502 504 #endif
503 505
504 506 if (level == top_level && level == 2)
505 507 *pteval = pa_to_ma(new_table) | PT_VALID;
506 508 else
507 509 *pteval = pa_to_ma(new_table) |
508 510 PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
509 511
510 512 return (new_table);
511 513 }
512 514
513 515 x86pte_t *
514 516 map_pte(paddr_t table, uint_t index)
515 517 {
516 518 void *table_ptr = kbm_remap_window(table, 0);
517 519 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
518 520 }
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX