8956 Implement KPTI Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com>
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2018 Joyent, Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/archsystm.h> 32 #include <sys/debug.h> 33 #include <sys/bootconf.h> 34 #include <sys/bootsvcs.h> 35 #include <sys/bootinfo.h> 36 #include <sys/mman.h> 37 #include <sys/cmn_err.h> 38 #include <sys/param.h> 39 #include <sys/machparam.h> 40 #include <sys/machsystm.h> 41 #include <sys/promif.h> 42 #include <sys/kobj.h> 43 #ifdef __xpv 44 #include <sys/hypervisor.h> 45 #endif 46 #include <vm/kboot_mmu.h> 47 #include <vm/hat_pte.h> 48 #include <vm/hat_i86.h> 49 #include <vm/seg_kmem.h> 50 51 #if 0 52 /* 53 * Joe's debug printing 54 */ 55 #define DBG(x) \ 56 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x)); 57 #else 58 #define DBG(x) /* naught */ 59 #endif 60 61 /* 62 * Page table and memory stuff. 63 */ 64 static caddr_t window; 65 static caddr_t pte_to_window; 66 67 /* 68 * this are needed by mmu_init() 69 */ 70 int kbm_nx_support = 0; /* NX bit in PTEs is in use */ 71 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */ 72 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */ 73 int kbm_largepage_support = 0; 74 uint_t kbm_nucleus_size = 0; 75 76 #define BOOT_SHIFT(l) (shift_amt[l]) 77 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l)) 78 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1) 79 #define BOOT_MASK(l) (~BOOT_OFFSET(l)) 80 81 /* 82 * Initialize memory management parameters for boot time page table management 83 */ 84 void 85 kbm_init(struct xboot_info *bi) 86 { 87 /* 88 * configure mmu information 89 */ 90 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size; 91 kbm_largepage_support = bi->bi_use_largepage; 92 kbm_nx_support = bi->bi_use_nx; 93 kbm_pae_support = bi->bi_use_pae; 94 kbm_pge_support = bi->bi_use_pge; 95 window = bi->bi_pt_window; 96 DBG(window); 97 pte_to_window = bi->bi_pte_to_pt_window; 98 DBG(pte_to_window); 99 if (kbm_pae_support) { 100 shift_amt = shift_amt_pae; 101 ptes_per_table = 512; 102 pte_size = 8; 103 lpagesize = TWO_MEG; 104 #ifdef __amd64 105 top_level = 3; 106 #else 107 top_level = 2; 108 #endif 109 } else { 110 shift_amt = shift_amt_nopae; 111 ptes_per_table = 1024; 112 pte_size = 4; 113 lpagesize = FOUR_MEG; 114 top_level = 1; 115 } 116 117 #ifdef __xpv 118 xen_info = bi->bi_xen_start_info; 119 mfn_list = (mfn_t *)xen_info->mfn_list; 120 DBG(mfn_list); 121 mfn_count = xen_info->nr_pages; 122 DBG(mfn_count); 123 #endif 124 top_page_table = bi->bi_top_page_table; 125 DBG(top_page_table); 126 } 127 128 /* 129 * Change the addressible page table window to point at a given page 130 */ 131 /*ARGSUSED*/ 132 void * 133 kbm_remap_window(paddr_t physaddr, int writeable) 134 { 135 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE; 136 137 DBG(physaddr); 138 139 #ifdef __xpv 140 if (!writeable) 141 pt_bits &= ~PT_WRITABLE; 142 if (HYPERVISOR_update_va_mapping((uintptr_t)window, 143 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0) 144 bop_panic("HYPERVISOR_update_va_mapping() failed"); 145 #else 146 if (kbm_pae_support) 147 *((x86pte_t *)pte_to_window) = physaddr | pt_bits; 148 else 149 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits; 150 mmu_invlpg(window); 151 #endif 152 DBG(window); 153 return (window); 154 } 155 156 /* 157 * Add a mapping for the physical page at the given virtual address. 158 */ 159 void 160 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) 161 { 162 x86pte_t *ptep; 163 paddr_t pte_physaddr; 164 x86pte_t pteval; 165 166 if (khat_running) 167 panic("kbm_map() called too late"); 168 169 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; 170 if (level >= 1) 171 pteval |= PT_PAGESIZE; 172 if (kbm_pge_support && is_kernel) 173 pteval |= PT_GLOBAL; 174 175 #ifdef __xpv 176 /* 177 * try update_va_mapping first - fails if page table is missing. 178 */ 179 if (HYPERVISOR_update_va_mapping(va, pteval, 180 UVMF_INVLPG | UVMF_LOCAL) == 0) 181 return; 182 #endif 183 184 /* 185 * Find the pte that will map this address. This creates any 186 * missing intermediate level page tables. 187 */ 188 ptep = find_pte(va, &pte_physaddr, level, 0); 189 if (ptep == NULL) 190 bop_panic("kbm_map: find_pte returned NULL"); 191 192 #ifdef __xpv 193 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 194 bop_panic("HYPERVISOR_update_va_mapping() failed"); 195 #else 196 if (kbm_pae_support) 197 *ptep = pteval; 198 else 199 *((x86pte32_t *)ptep) = pteval; 200 mmu_invlpg((caddr_t)va); 201 #endif 202 } 203 204 #ifdef __xpv 205 206 /* 207 * Add a mapping for the machine page at the given virtual address. 208 */ 209 void 210 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 211 { 212 paddr_t pte_physaddr; 213 x86pte_t pteval; 214 215 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE; 216 if (level == 1) 217 pteval |= PT_PAGESIZE; 218 219 /* 220 * try update_va_mapping first - fails if page table is missing. 221 */ 222 if (HYPERVISOR_update_va_mapping(va, 223 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0) 224 return; 225 226 /* 227 * Find the pte that will map this address. This creates any 228 * missing intermediate level page tables 229 */ 230 (void) find_pte(va, &pte_physaddr, level, 0); 231 232 if (HYPERVISOR_update_va_mapping(va, 233 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0) 234 bop_panic("HYPERVISOR_update_va_mapping failed"); 235 } 236 237 #endif /* __xpv */ 238 239 240 /* 241 * Probe the boot time page tables to find the first mapping 242 * including va (or higher) and return non-zero if one is found. 243 * va is updated to the starting address and len to the pagesize. 244 * pp will be set to point to the 1st page_t of the mapped page(s). 245 * 246 * Note that if va is in the middle of a large page, the returned va 247 * will be less than what was asked for. 248 */ 249 int 250 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot) 251 { 252 uintptr_t probe_va; 253 x86pte_t *ptep; 254 paddr_t pte_physaddr; 255 x86pte_t pte_val; 256 level_t l; 257 258 if (khat_running) 259 panic("kbm_probe() called too late"); 260 *len = 0; 261 *pfn = PFN_INVALID; 262 *prot = 0; 263 probe_va = *va; 264 restart_new_va: 265 l = top_level; 266 for (;;) { 267 if (IN_VA_HOLE(probe_va)) 268 probe_va = mmu.hole_end; 269 270 if (IN_HYPERVISOR_VA(probe_va)) 271 #if defined(__amd64) && defined(__xpv) 272 probe_va = HYPERVISOR_VIRT_END; 273 #else 274 return (0); 275 #endif 276 277 /* 278 * If we don't have a valid PTP/PTE at this level 279 * then we can bump VA by this level's pagesize and try again. 280 * When the probe_va wraps around, we are done. 281 */ 282 ptep = find_pte(probe_va, &pte_physaddr, l, 1); 283 if (ptep == NULL) 284 bop_panic("kbm_probe: find_pte returned NULL"); 285 if (kbm_pae_support) 286 pte_val = *ptep; 287 else 288 pte_val = *((x86pte32_t *)ptep); 289 if (!PTE_ISVALID(pte_val)) { 290 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l); 291 if (probe_va <= *va) 292 return (0); 293 goto restart_new_va; 294 } 295 296 /* 297 * If this entry is a pointer to a lower level page table 298 * go down to it. 299 */ 300 if (!PTE_ISPAGE(pte_val, l)) { 301 ASSERT(l > 0); 302 --l; 303 continue; 304 } 305 306 /* 307 * We found a boot level page table entry 308 */ 309 *len = BOOT_SZ(l); 310 *va = probe_va & ~(*len - 1); 311 *pfn = PTE2PFN(pte_val, l); 312 313 314 *prot = PROT_READ | PROT_EXEC; 315 if (PTE_GET(pte_val, PT_WRITABLE)) 316 *prot |= PROT_WRITE; 317 318 /* 319 * pt_nx is cleared if processor doesn't support NX bit 320 */ 321 if (PTE_GET(pte_val, mmu.pt_nx)) 322 *prot &= ~PROT_EXEC; 323 324 return (1); 325 } 326 } 327 328 329 /* 330 * Destroy a boot loader page table 4K mapping. 331 */ 332 void 333 kbm_unmap(uintptr_t va) 334 { 335 if (khat_running) 336 panic("kbm_unmap() called too late"); 337 else { 338 #ifdef __xpv 339 (void) HYPERVISOR_update_va_mapping(va, 0, 340 UVMF_INVLPG | UVMF_LOCAL); 341 #else 342 x86pte_t *ptep; 343 level_t level = 0; 344 uint_t probe_only = 1; 345 346 ptep = find_pte(va, NULL, level, probe_only); 347 if (ptep == NULL) 348 return; 349 350 if (kbm_pae_support) 351 *ptep = 0; 352 else 353 *((x86pte32_t *)ptep) = 0; 354 mmu_invlpg((caddr_t)va); 355 #endif 356 } 357 } 358 359 360 /* 361 * Change a boot loader page table 4K mapping. 362 * Returns the pfn of the old mapping. 363 */ 364 pfn_t 365 kbm_remap(uintptr_t va, pfn_t pfn) 366 { 367 x86pte_t *ptep; 368 level_t level = 0; 369 uint_t probe_only = 1; 370 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE | 371 PT_NOCONSIST | PT_VALID; 372 x86pte_t old_pte; 373 374 if (khat_running) 375 panic("kbm_remap() called too late"); 376 ptep = find_pte(va, NULL, level, probe_only); 377 if (ptep == NULL) 378 bop_panic("kbm_remap: find_pte returned NULL"); 379 380 if (kbm_pae_support) 381 old_pte = *ptep; 382 else 383 old_pte = *((x86pte32_t *)ptep); 384 385 #ifdef __xpv 386 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 387 bop_panic("HYPERVISOR_update_va_mapping() failed"); 388 #else 389 if (kbm_pae_support) 390 *((x86pte_t *)ptep) = pte_val; 391 else 392 *((x86pte32_t *)ptep) = pte_val; 393 mmu_invlpg((caddr_t)va); 394 #endif 395 396 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1) 397 return (PFN_INVALID); 398 return (mmu_btop(ma_to_pa(old_pte))); 399 } 400 401 402 /* 403 * Change a boot loader page table 4K mapping to read only. 404 */ 405 void 406 kbm_read_only(uintptr_t va, paddr_t pa) 407 { 408 x86pte_t pte_val = pa_to_ma(pa) | 409 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID; 410 411 #ifdef __xpv 412 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 413 bop_panic("HYPERVISOR_update_va_mapping() failed"); 414 #else 415 x86pte_t *ptep; 416 level_t level = 0; 417 418 ptep = find_pte(va, NULL, level, 0); 419 if (ptep == NULL) 420 bop_panic("kbm_read_only: find_pte returned NULL"); 421 422 if (kbm_pae_support) 423 *ptep = pte_val; 424 else 425 *((x86pte32_t *)ptep) = pte_val; 426 mmu_invlpg((caddr_t)va); 427 #endif 428 } 429 430 /* 431 * interfaces for kernel debugger to access physical memory 432 */ 433 static x86pte_t save_pte; 434 435 void * 436 kbm_push(paddr_t pa) 437 { 438 static int first_time = 1; 439 440 if (first_time) { 441 first_time = 0; 442 return (window); 443 } 444 445 if (kbm_pae_support) 446 save_pte = *((x86pte_t *)pte_to_window); 447 else 448 save_pte = *((x86pte32_t *)pte_to_window); 449 return (kbm_remap_window(pa, 0)); 450 } 451 452 void 453 kbm_pop(void) 454 { 455 #ifdef __xpv 456 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte, 457 UVMF_INVLPG | UVMF_LOCAL) < 0) 458 bop_panic("HYPERVISOR_update_va_mapping() failed"); 459 #else 460 if (kbm_pae_support) 461 *((x86pte_t *)pte_to_window) = save_pte; 462 else 463 *((x86pte32_t *)pte_to_window) = save_pte; 464 mmu_invlpg(window); 465 #endif 466 } 467 468 x86pte_t 469 get_pteval(paddr_t table, uint_t index) 470 { 471 void *table_ptr = kbm_remap_window(table, 0); 472 473 if (kbm_pae_support) 474 return (((x86pte_t *)table_ptr)[index]); 475 return (((x86pte32_t *)table_ptr)[index]); 476 } 477 478 #ifndef __xpv 479 void 480 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 481 { 482 void *table_ptr = kbm_remap_window(table, 0); 483 if (kbm_pae_support) 484 ((x86pte_t *)table_ptr)[index] = pteval; 485 else 486 ((x86pte32_t *)table_ptr)[index] = pteval; 487 if (level == top_level && level == 2) 488 reload_cr3(); 489 } 490 #endif 491 492 paddr_t 493 make_ptable(x86pte_t *pteval, uint_t level) 494 { 495 paddr_t new_table; 496 void *table_ptr; 497 498 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE); 499 table_ptr = kbm_remap_window(new_table, 1); 500 bzero(table_ptr, MMU_PAGESIZE); 501 #ifdef __xpv 502 /* Remove write permission to the new page table. */ 503 (void) kbm_remap_window(new_table, 0); 504 #endif 505 506 if (level == top_level && level == 2) 507 *pteval = pa_to_ma(new_table) | PT_VALID; 508 else 509 *pteval = pa_to_ma(new_table) | 510 PT_VALID | PT_REF | PT_USER | PT_WRITABLE; 511 512 return (new_table); 513 } 514 515 x86pte_t * 516 map_pte(paddr_t table, uint_t index) 517 { 518 void *table_ptr = kbm_remap_window(table, 0); 519 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size)); 520 } --- EOF ---