1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright 2018 Joyent, Inc.
27 */
28
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/archsystm.h>
32 #include <sys/debug.h>
33 #include <sys/bootconf.h>
34 #include <sys/bootsvcs.h>
35 #include <sys/bootinfo.h>
36 #include <sys/mman.h>
37 #include <sys/cmn_err.h>
38 #include <sys/param.h>
39 #include <sys/machparam.h>
40 #include <sys/machsystm.h>
41 #include <sys/promif.h>
42 #include <sys/kobj.h>
43 #ifdef __xpv
44 #include <sys/hypervisor.h>
45 #endif
46 #include <vm/kboot_mmu.h>
47 #include <vm/hat_pte.h>
48 #include <vm/hat_i86.h>
49 #include <vm/seg_kmem.h>
50
51 #if 0
52 /*
53 * Joe's debug printing
54 */
55 #define DBG(x) \
56 bop_printf(NULL, "kboot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
57 #else
58 #define DBG(x) /* naught */
59 #endif
60
61 /*
62 * Page table and memory stuff.
63 */
64 static caddr_t window;
65 static caddr_t pte_to_window;
66
67 /*
68 * this are needed by mmu_init()
69 */
70 int kbm_nx_support = 0; /* NX bit in PTEs is in use */
71 int kbm_pae_support = 0; /* PAE is 64 bit Page table entries */
72 int kbm_pge_support = 0; /* PGE is Page table global bit enabled */
73 int kbm_largepage_support = 0;
74 uint_t kbm_nucleus_size = 0;
75
76 #define BOOT_SHIFT(l) (shift_amt[l])
77 #define BOOT_SZ(l) ((size_t)1 << BOOT_SHIFT(l))
78 #define BOOT_OFFSET(l) (BOOT_SZ(l) - 1)
79 #define BOOT_MASK(l) (~BOOT_OFFSET(l))
80
81 /*
82 * Initialize memory management parameters for boot time page table management
83 */
84 void
85 kbm_init(struct xboot_info *bi)
86 {
87 /*
88 * configure mmu information
89 */
90 kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
91 kbm_largepage_support = bi->bi_use_largepage;
92 kbm_nx_support = bi->bi_use_nx;
93 kbm_pae_support = bi->bi_use_pae;
94 kbm_pge_support = bi->bi_use_pge;
95 window = bi->bi_pt_window;
96 DBG(window);
97 pte_to_window = bi->bi_pte_to_pt_window;
98 DBG(pte_to_window);
99 if (kbm_pae_support) {
100 shift_amt = shift_amt_pae;
101 ptes_per_table = 512;
102 pte_size = 8;
103 lpagesize = TWO_MEG;
104 #ifdef __amd64
105 top_level = 3;
106 #else
107 top_level = 2;
108 #endif
109 } else {
110 shift_amt = shift_amt_nopae;
111 ptes_per_table = 1024;
112 pte_size = 4;
113 lpagesize = FOUR_MEG;
114 top_level = 1;
115 }
116
117 #ifdef __xpv
118 xen_info = bi->bi_xen_start_info;
119 mfn_list = (mfn_t *)xen_info->mfn_list;
120 DBG(mfn_list);
121 mfn_count = xen_info->nr_pages;
122 DBG(mfn_count);
123 #endif
124 top_page_table = bi->bi_top_page_table;
125 DBG(top_page_table);
126 }
127
128 /*
129 * Change the addressible page table window to point at a given page
130 */
131 /*ARGSUSED*/
132 void *
133 kbm_remap_window(paddr_t physaddr, int writeable)
134 {
135 x86pte_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
136
137 DBG(physaddr);
138
139 #ifdef __xpv
140 if (!writeable)
141 pt_bits &= ~PT_WRITABLE;
142 if (HYPERVISOR_update_va_mapping((uintptr_t)window,
143 pa_to_ma(physaddr) | pt_bits, UVMF_INVLPG | UVMF_LOCAL) < 0)
144 bop_panic("HYPERVISOR_update_va_mapping() failed");
145 #else
146 if (kbm_pae_support)
147 *((x86pte_t *)pte_to_window) = physaddr | pt_bits;
148 else
149 *((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
150 mmu_invlpg(window);
151 #endif
152 DBG(window);
153 return (window);
154 }
155
156 /*
157 * Add a mapping for the physical page at the given virtual address.
158 */
159 void
160 kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
161 {
162 x86pte_t *ptep;
163 paddr_t pte_physaddr;
164 x86pte_t pteval;
165
166 if (khat_running)
167 panic("kbm_map() called too late");
168
169 pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
170 if (level >= 1)
171 pteval |= PT_PAGESIZE;
172 if (kbm_pge_support && is_kernel)
173 pteval |= PT_GLOBAL;
174
175 #ifdef __xpv
176 /*
177 * try update_va_mapping first - fails if page table is missing.
178 */
179 if (HYPERVISOR_update_va_mapping(va, pteval,
180 UVMF_INVLPG | UVMF_LOCAL) == 0)
181 return;
182 #endif
183
184 /*
185 * Find the pte that will map this address. This creates any
186 * missing intermediate level page tables.
187 */
188 ptep = find_pte(va, &pte_physaddr, level, 0);
189 if (ptep == NULL)
190 bop_panic("kbm_map: find_pte returned NULL");
191
192 #ifdef __xpv
193 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
194 bop_panic("HYPERVISOR_update_va_mapping() failed");
195 #else
196 if (kbm_pae_support)
197 *ptep = pteval;
198 else
199 *((x86pte32_t *)ptep) = pteval;
200 mmu_invlpg((caddr_t)va);
201 #endif
202 }
203
204 #ifdef __xpv
205
206 /*
207 * Add a mapping for the machine page at the given virtual address.
208 */
209 void
210 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level)
211 {
212 paddr_t pte_physaddr;
213 x86pte_t pteval;
214
215 pteval = ma | PT_NOCONSIST | PT_VALID | PT_REF | PT_WRITABLE;
216 if (level == 1)
217 pteval |= PT_PAGESIZE;
218
219 /*
220 * try update_va_mapping first - fails if page table is missing.
221 */
222 if (HYPERVISOR_update_va_mapping(va,
223 pteval, UVMF_INVLPG | UVMF_LOCAL) == 0)
224 return;
225
226 /*
227 * Find the pte that will map this address. This creates any
228 * missing intermediate level page tables
229 */
230 (void) find_pte(va, &pte_physaddr, level, 0);
231
232 if (HYPERVISOR_update_va_mapping(va,
233 pteval, UVMF_INVLPG | UVMF_LOCAL) != 0)
234 bop_panic("HYPERVISOR_update_va_mapping failed");
235 }
236
237 #endif /* __xpv */
238
239
240 /*
241 * Probe the boot time page tables to find the first mapping
242 * including va (or higher) and return non-zero if one is found.
243 * va is updated to the starting address and len to the pagesize.
244 * pp will be set to point to the 1st page_t of the mapped page(s).
245 *
246 * Note that if va is in the middle of a large page, the returned va
247 * will be less than what was asked for.
248 */
249 int
250 kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
251 {
252 uintptr_t probe_va;
253 x86pte_t *ptep;
254 paddr_t pte_physaddr;
255 x86pte_t pte_val;
256 level_t l;
257
258 if (khat_running)
259 panic("kbm_probe() called too late");
260 *len = 0;
261 *pfn = PFN_INVALID;
262 *prot = 0;
263 probe_va = *va;
264 restart_new_va:
265 l = top_level;
266 for (;;) {
267 if (IN_VA_HOLE(probe_va))
268 probe_va = mmu.hole_end;
269
270 if (IN_HYPERVISOR_VA(probe_va))
271 #if defined(__amd64) && defined(__xpv)
272 probe_va = HYPERVISOR_VIRT_END;
273 #else
274 return (0);
275 #endif
276
277 /*
278 * If we don't have a valid PTP/PTE at this level
279 * then we can bump VA by this level's pagesize and try again.
280 * When the probe_va wraps around, we are done.
281 */
282 ptep = find_pte(probe_va, &pte_physaddr, l, 1);
283 if (ptep == NULL)
284 bop_panic("kbm_probe: find_pte returned NULL");
285 if (kbm_pae_support)
286 pte_val = *ptep;
287 else
288 pte_val = *((x86pte32_t *)ptep);
289 if (!PTE_ISVALID(pte_val)) {
290 probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
291 if (probe_va <= *va)
292 return (0);
293 goto restart_new_va;
294 }
295
296 /*
297 * If this entry is a pointer to a lower level page table
298 * go down to it.
299 */
300 if (!PTE_ISPAGE(pte_val, l)) {
301 ASSERT(l > 0);
302 --l;
303 continue;
304 }
305
306 /*
307 * We found a boot level page table entry
308 */
309 *len = BOOT_SZ(l);
310 *va = probe_va & ~(*len - 1);
311 *pfn = PTE2PFN(pte_val, l);
312
313
314 *prot = PROT_READ | PROT_EXEC;
315 if (PTE_GET(pte_val, PT_WRITABLE))
316 *prot |= PROT_WRITE;
317
318 /*
319 * pt_nx is cleared if processor doesn't support NX bit
320 */
321 if (PTE_GET(pte_val, mmu.pt_nx))
322 *prot &= ~PROT_EXEC;
323
324 return (1);
325 }
326 }
327
328
329 /*
330 * Destroy a boot loader page table 4K mapping.
331 */
332 void
333 kbm_unmap(uintptr_t va)
334 {
335 if (khat_running)
336 panic("kbm_unmap() called too late");
337 else {
338 #ifdef __xpv
339 (void) HYPERVISOR_update_va_mapping(va, 0,
340 UVMF_INVLPG | UVMF_LOCAL);
341 #else
342 x86pte_t *ptep;
343 level_t level = 0;
344 uint_t probe_only = 1;
345
346 ptep = find_pte(va, NULL, level, probe_only);
347 if (ptep == NULL)
348 return;
349
350 if (kbm_pae_support)
351 *ptep = 0;
352 else
353 *((x86pte32_t *)ptep) = 0;
354 mmu_invlpg((caddr_t)va);
355 #endif
356 }
357 }
358
359
360 /*
361 * Change a boot loader page table 4K mapping.
362 * Returns the pfn of the old mapping.
363 */
364 pfn_t
365 kbm_remap(uintptr_t va, pfn_t pfn)
366 {
367 x86pte_t *ptep;
368 level_t level = 0;
369 uint_t probe_only = 1;
370 x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
371 PT_NOCONSIST | PT_VALID;
372 x86pte_t old_pte;
373
374 if (khat_running)
375 panic("kbm_remap() called too late");
376 ptep = find_pte(va, NULL, level, probe_only);
377 if (ptep == NULL)
378 bop_panic("kbm_remap: find_pte returned NULL");
379
380 if (kbm_pae_support)
381 old_pte = *ptep;
382 else
383 old_pte = *((x86pte32_t *)ptep);
384
385 #ifdef __xpv
386 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
387 bop_panic("HYPERVISOR_update_va_mapping() failed");
388 #else
389 if (kbm_pae_support)
390 *((x86pte_t *)ptep) = pte_val;
391 else
392 *((x86pte32_t *)ptep) = pte_val;
393 mmu_invlpg((caddr_t)va);
394 #endif
395
396 if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
397 return (PFN_INVALID);
398 return (mmu_btop(ma_to_pa(old_pte)));
399 }
400
401
402 /*
403 * Change a boot loader page table 4K mapping to read only.
404 */
405 void
406 kbm_read_only(uintptr_t va, paddr_t pa)
407 {
408 x86pte_t pte_val = pa_to_ma(pa) |
409 PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
410
411 #ifdef __xpv
412 if (HYPERVISOR_update_va_mapping(va, pte_val, UVMF_INVLPG | UVMF_LOCAL))
413 bop_panic("HYPERVISOR_update_va_mapping() failed");
414 #else
415 x86pte_t *ptep;
416 level_t level = 0;
417
418 ptep = find_pte(va, NULL, level, 0);
419 if (ptep == NULL)
420 bop_panic("kbm_read_only: find_pte returned NULL");
421
422 if (kbm_pae_support)
423 *ptep = pte_val;
424 else
425 *((x86pte32_t *)ptep) = pte_val;
426 mmu_invlpg((caddr_t)va);
427 #endif
428 }
429
430 /*
431 * interfaces for kernel debugger to access physical memory
432 */
433 static x86pte_t save_pte;
434
435 void *
436 kbm_push(paddr_t pa)
437 {
438 static int first_time = 1;
439
440 if (first_time) {
441 first_time = 0;
442 return (window);
443 }
444
445 if (kbm_pae_support)
446 save_pte = *((x86pte_t *)pte_to_window);
447 else
448 save_pte = *((x86pte32_t *)pte_to_window);
449 return (kbm_remap_window(pa, 0));
450 }
451
452 void
453 kbm_pop(void)
454 {
455 #ifdef __xpv
456 if (HYPERVISOR_update_va_mapping((uintptr_t)window, save_pte,
457 UVMF_INVLPG | UVMF_LOCAL) < 0)
458 bop_panic("HYPERVISOR_update_va_mapping() failed");
459 #else
460 if (kbm_pae_support)
461 *((x86pte_t *)pte_to_window) = save_pte;
462 else
463 *((x86pte32_t *)pte_to_window) = save_pte;
464 mmu_invlpg(window);
465 #endif
466 }
467
468 x86pte_t
469 get_pteval(paddr_t table, uint_t index)
470 {
471 void *table_ptr = kbm_remap_window(table, 0);
472
473 if (kbm_pae_support)
474 return (((x86pte_t *)table_ptr)[index]);
475 return (((x86pte32_t *)table_ptr)[index]);
476 }
477
478 #ifndef __xpv
479 void
480 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
481 {
482 void *table_ptr = kbm_remap_window(table, 0);
483 if (kbm_pae_support)
484 ((x86pte_t *)table_ptr)[index] = pteval;
485 else
486 ((x86pte32_t *)table_ptr)[index] = pteval;
487 if (level == top_level && level == 2)
488 reload_cr3();
489 }
490 #endif
491
492 paddr_t
493 make_ptable(x86pte_t *pteval, uint_t level)
494 {
495 paddr_t new_table;
496 void *table_ptr;
497
498 new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
499 table_ptr = kbm_remap_window(new_table, 1);
500 bzero(table_ptr, MMU_PAGESIZE);
501 #ifdef __xpv
502 /* Remove write permission to the new page table. */
503 (void) kbm_remap_window(new_table, 0);
504 #endif
505
506 if (level == top_level && level == 2)
507 *pteval = pa_to_ma(new_table) | PT_VALID;
508 else
509 *pteval = pa_to_ma(new_table) |
510 PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
511
512 return (new_table);
513 }
514
515 x86pte_t *
516 map_pte(paddr_t table, uint_t index)
517 {
518 void *table_ptr = kbm_remap_window(table, 0);
519 return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
520 }