8956 Implement KPTI Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com>
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/archsystm.h> 30 #include <sys/debug.h> 31 #include <sys/bootconf.h> 32 #include <sys/bootsvcs.h> 33 #include <sys/bootinfo.h> 34 #include <sys/mman.h> 35 #include <sys/cmn_err.h> 36 #include <sys/param.h> 37 #include <sys/machparam.h> 38 #include <sys/machsystm.h> 39 #include <sys/promif.h> 40 #include <sys/kobj.h> 41 #ifdef __xpv 42 #include <sys/hypervisor.h> 43 #endif 44 #include <vm/kboot_mmu.h> 45 #include <vm/hat_pte.h> 46 #include <vm/hat_i86.h> 47 #include <vm/seg_kmem.h> 48 49 #if 0 50 /* 51 * Joe's debug printing 52 */ 53 #define DBG(x) \ 54 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x)); 55 #else 56 #define DBG(x) /* naught */ 57 #endif 58 59 /* 60 * Page table and memory stuff. 61 */ 62 static caddr_t window; 63 static caddr_t pte_to_window; 64 65 /* 66 * this are needed by mmu_init() 67 */ 68 int kbm_nx_support = 0; /* NX bit in PTEs is in use */ 69 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */ 70 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */ 71 int kbm_largepage_support = 0; 72 uint_t kbm_nucleus_size = 0; 73 74 #define BOOT_SHIFT(l) (shift_amt[l]) 75 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l)) 76 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1) 77 #define BOOT_MASK(l) (~BOOT_OFFSET(l)) 78 79 /* 80 * Initialize memory management parameters for boot time page table management 81 */ 82 void 83 kbm_init(struct xboot_info *bi) 84 { 85 /* 86 * configure mmu information 87 */ 88 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size; 89 kbm_largepage_support = bi->bi_use_largepage; 90 kbm_nx_support = bi->bi_use_nx; 91 kbm_pae_support = bi->bi_use_pae; 92 kbm_pge_support = bi->bi_use_pge; 93 window = bi->bi_pt_window; 94 DBG(window); 95 pte_to_window = bi->bi_pte_to_pt_window; 96 DBG(pte_to_window); 97 if (kbm_pae_support) { 98 shift_amt = shift_amt_pae; 99 ptes_per_table = 512; 100 pte_size = 8; 101 lpagesize = TWO_MEG; 102 #ifdef __amd64 103 top_level = 3; 104 #else 105 top_level = 2; 106 #endif 107 } else { 108 shift_amt = shift_amt_nopae; 109 ptes_per_table = 1024; 110 pte_size = 4; 111 lpagesize = FOUR_MEG; 112 top_level = 1; 113 } 114 115 #ifdef __xpv 116 xen_info = bi->bi_xen_start_info; 117 mfn_list = (mfn_t *)xen_info->mfn_list; 118 DBG(mfn_list); 119 mfn_count = xen_info->nr_pages; 120 DBG(mfn_count); 121 #endif 122 top_page_table = bi->bi_top_page_table; 123 DBG(top_page_table); 124 } 125 126 /* 127 * Change the addressible page table window to point at a given page 128 */ 129 /*ARGSUSED*/ 130 void * 131 kbm_remap_window(paddr_t physaddr, int writeable) 132 { 133 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE; 134 135 DBG(physaddr); 136 137 #ifdef __xpv 138 if (!writeable) 139 pt_bits &= ~PT_WRITABLE; 140 if (HYPERVISOR_update_va_mapping((uintptr_t)window, 141 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0) 142 bop_panic("HYPERVISOR_update_va_mapping() failed"); 143 #else 144 if (kbm_pae_support) 145 *((x86pte_t *)pte_to_window) = physaddr | pt_bits; 146 else 147 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits; 148 mmu_tlbflush_entry(window); 149 #endif 150 DBG(window); 151 return (window); 152 } 153 154 /* 155 * Add a mapping for the physical page at the given virtual address. 156 */ 157 void 158 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel) 159 { 160 x86pte_t *ptep; 161 paddr_t pte_physaddr; 162 x86pte_t pteval; 163 164 if (khat_running) 165 panic("kbm_map() called too late"); 166 167 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE; 168 if (level >= 1) 169 pteval |= PT_PAGESIZE; 170 if (kbm_pge_support && is_kernel) 171 pteval |= PT_GLOBAL; 172 173 #ifdef __xpv 174 /* 175 * try update_va_mapping first - fails if page table is missing. 176 */ 177 if (HYPERVISOR_update_va_mapping(va, pteval, 178 UVMF_INVLPG | UVMF_LOCAL) == 0) 179 return; 180 #endif 181 182 /* 183 * Find the pte that will map this address. This creates any 184 * missing intermediate level page tables. 185 */ 186 ptep = find_pte(va, &pte_physaddr, level, 0); 187 if (ptep == NULL) 188 bop_panic("kbm_map: find_pte returned NULL"); 189 190 #ifdef __xpv 191 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 192 bop_panic("HYPERVISOR_update_va_mapping() failed"); 193 #else 194 if (kbm_pae_support) 195 *ptep = pteval; 196 else 197 *((x86pte32_t *)ptep) = pteval; 198 mmu_tlbflush_entry((caddr_t)va); 199 #endif 200 } 201 202 #ifdef __xpv 203 204 /* 205 * Add a mapping for the machine page at the given virtual address. 206 */ 207 void 208 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 209 { 210 paddr_t pte_physaddr; 211 x86pte_t pteval; 212 213 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE; 214 if (level == 1) 215 pteval |= PT_PAGESIZE; 216 217 /* 218 * try update_va_mapping first - fails if page table is missing. 219 */ 220 if (HYPERVISOR_update_va_mapping(va, 221 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0) 222 return; 223 224 /* 225 * Find the pte that will map this address. This creates any 226 * missing intermediate level page tables 227 */ 228 (void) find_pte(va, &pte_physaddr, level, 0); 229 230 if (HYPERVISOR_update_va_mapping(va, 231 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0) 232 bop_panic("HYPERVISOR_update_va_mapping failed"); 233 } 234 235 #endif /* __xpv */ 236 237 238 /* 239 * Probe the boot time page tables to find the first mapping 240 * including va (or higher) and return non-zero if one is found. 241 * va is updated to the starting address and len to the pagesize. 242 * pp will be set to point to the 1st page_t of the mapped page(s). 243 * 244 * Note that if va is in the middle of a large page, the returned va 245 * will be less than what was asked for. 246 */ 247 int 248 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot) 249 { 250 uintptr_t probe_va; 251 x86pte_t *ptep; 252 paddr_t pte_physaddr; 253 x86pte_t pte_val; 254 level_t l; 255 256 if (khat_running) 257 panic("kbm_probe() called too late"); 258 *len = 0; 259 *pfn = PFN_INVALID; 260 *prot = 0; 261 probe_va = *va; 262 restart_new_va: 263 l = top_level; 264 for (;;) { 265 if (IN_VA_HOLE(probe_va)) 266 probe_va = mmu.hole_end; 267 268 if (IN_HYPERVISOR_VA(probe_va)) 269 #if defined(__amd64) && defined(__xpv) 270 probe_va = HYPERVISOR_VIRT_END; 271 #else 272 return (0); 273 #endif 274 275 /* 276 * If we don't have a valid PTP/PTE at this level 277 * then we can bump VA by this level's pagesize and try again. 278 * When the probe_va wraps around, we are done. 279 */ 280 ptep = find_pte(probe_va, &pte_physaddr, l, 1); 281 if (ptep == NULL) 282 bop_panic("kbm_probe: find_pte returned NULL"); 283 if (kbm_pae_support) 284 pte_val = *ptep; 285 else 286 pte_val = *((x86pte32_t *)ptep); 287 if (!PTE_ISVALID(pte_val)) { 288 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l); 289 if (probe_va <= *va) 290 return (0); 291 goto restart_new_va; 292 } 293 294 /* 295 * If this entry is a pointer to a lower level page table 296 * go down to it. 297 */ 298 if (!PTE_ISPAGE(pte_val, l)) { 299 ASSERT(l > 0); 300 --l; 301 continue; 302 } 303 304 /* 305 * We found a boot level page table entry 306 */ 307 *len = BOOT_SZ(l); 308 *va = probe_va & ~(*len - 1); 309 *pfn = PTE2PFN(pte_val, l); 310 311 312 *prot = PROT_READ | PROT_EXEC; 313 if (PTE_GET(pte_val, PT_WRITABLE)) 314 *prot |= PROT_WRITE; 315 316 /* 317 * pt_nx is cleared if processor doesn't support NX bit 318 */ 319 if (PTE_GET(pte_val, mmu.pt_nx)) 320 *prot &= ~PROT_EXEC; 321 322 return (1); 323 } 324 } 325 326 327 /* 328 * Destroy a boot loader page table 4K mapping. 329 */ 330 void 331 kbm_unmap(uintptr_t va) 332 { 333 if (khat_running) 334 panic("kbm_unmap() called too late"); 335 else { 336 #ifdef __xpv 337 (void) HYPERVISOR_update_va_mapping(va, 0, 338 UVMF_INVLPG | UVMF_LOCAL); 339 #else 340 x86pte_t *ptep; 341 level_t level = 0; 342 uint_t probe_only = 1; 343 344 ptep = find_pte(va, NULL, level, probe_only); 345 if (ptep == NULL) 346 return; 347 348 if (kbm_pae_support) 349 *ptep = 0; 350 else 351 *((x86pte32_t *)ptep) = 0; 352 mmu_tlbflush_entry((caddr_t)va); 353 #endif 354 } 355 } 356 357 358 /* 359 * Change a boot loader page table 4K mapping. 360 * Returns the pfn of the old mapping. 361 */ 362 pfn_t 363 kbm_remap(uintptr_t va, pfn_t pfn) 364 { 365 x86pte_t *ptep; 366 level_t level = 0; 367 uint_t probe_only = 1; 368 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE | 369 PT_NOCONSIST | PT_VALID; 370 x86pte_t old_pte; 371 372 if (khat_running) 373 panic("kbm_remap() called too late"); 374 ptep = find_pte(va, NULL, level, probe_only); 375 if (ptep == NULL) 376 bop_panic("kbm_remap: find_pte returned NULL"); 377 378 if (kbm_pae_support) 379 old_pte = *ptep; 380 else 381 old_pte = *((x86pte32_t *)ptep); 382 383 #ifdef __xpv 384 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 385 bop_panic("HYPERVISOR_update_va_mapping() failed"); 386 #else 387 if (kbm_pae_support) 388 *((x86pte_t *)ptep) = pte_val; 389 else 390 *((x86pte32_t *)ptep) = pte_val; 391 mmu_tlbflush_entry((caddr_t)va); 392 #endif 393 394 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1) 395 return (PFN_INVALID); 396 return (mmu_btop(ma_to_pa(old_pte))); 397 } 398 399 400 /* 401 * Change a boot loader page table 4K mapping to read only. 402 */ 403 void 404 kbm_read_only(uintptr_t va, paddr_t pa) 405 { 406 x86pte_t pte_val = pa_to_ma(pa) | 407 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID; 408 409 #ifdef __xpv 410 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL)) 411 bop_panic("HYPERVISOR_update_va_mapping() failed"); 412 #else 413 x86pte_t *ptep; 414 level_t level = 0; 415 416 ptep = find_pte(va, NULL, level, 0); 417 if (ptep == NULL) 418 bop_panic("kbm_read_only: find_pte returned NULL"); 419 420 if (kbm_pae_support) 421 *ptep = pte_val; 422 else 423 *((x86pte32_t *)ptep) = pte_val; 424 mmu_tlbflush_entry((caddr_t)va); 425 #endif 426 } 427 428 /* 429 * interfaces for kernel debugger to access physical memory 430 */ 431 static x86pte_t save_pte; 432 433 void * 434 kbm_push(paddr_t pa) 435 { 436 static int first_time = 1; 437 438 if (first_time) { 439 first_time = 0; 440 return (window); 441 } 442 443 if (kbm_pae_support) 444 save_pte = *((x86pte_t *)pte_to_window); 445 else 446 save_pte = *((x86pte32_t *)pte_to_window); 447 return (kbm_remap_window(pa, 0)); 448 } 449 450 void 451 kbm_pop(void) 452 { 453 #ifdef __xpv 454 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte, 455 UVMF_INVLPG | UVMF_LOCAL) < 0) 456 bop_panic("HYPERVISOR_update_va_mapping() failed"); 457 #else 458 if (kbm_pae_support) 459 *((x86pte_t *)pte_to_window) = save_pte; 460 else 461 *((x86pte32_t *)pte_to_window) = save_pte; 462 mmu_tlbflush_entry(window); 463 #endif 464 } 465 466 x86pte_t 467 get_pteval(paddr_t table, uint_t index) 468 { 469 void *table_ptr = kbm_remap_window(table, 0); 470 471 if (kbm_pae_support) 472 return (((x86pte_t *)table_ptr)[index]); 473 return (((x86pte32_t *)table_ptr)[index]); 474 } 475 476 #ifndef __xpv 477 void 478 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 479 { 480 void *table_ptr = kbm_remap_window(table, 0); 481 if (kbm_pae_support) 482 ((x86pte_t *)table_ptr)[index] = pteval; 483 else 484 ((x86pte32_t *)table_ptr)[index] = pteval; 485 if (level == top_level && level == 2) 486 reload_cr3(); 487 } 488 #endif 489 490 paddr_t 491 make_ptable(x86pte_t *pteval, uint_t level) 492 { 493 paddr_t new_table; 494 void *table_ptr; 495 496 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE); 497 table_ptr = kbm_remap_window(new_table, 1); 498 bzero(table_ptr, MMU_PAGESIZE); 499 #ifdef __xpv 500 /* Remove write permission to the new page table. */ 501 (void) kbm_remap_window(new_table, 0); 502 #endif 503 504 if (level == top_level && level == 2) 505 *pteval = pa_to_ma(new_table) | PT_VALID; 506 else 507 *pteval = pa_to_ma(new_table) | 508 PT_VALID | PT_REF | PT_USER | PT_WRITABLE; 509 510 return (new_table); 511 } 512 513 x86pte_t * 514 map_pte(paddr_t table, uint_t index) 515 { 516 void *table_ptr = kbm_remap_window(table, 0); 517 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size)); 518 } --- EOF ---