Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/vm/hat_kdi.c
+++ new/usr/src/uts/i86pc/vm/hat_kdi.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 + *
26 + * Copyright 2018 Joyent, Inc.
25 27 */
26 28
27 29 /*
28 30 * HAT interfaces used by the kernel debugger to interact with the VM system.
29 31 * These interfaces are invoked when the world is stopped. As such, no blocking
30 32 * operations may be performed.
31 33 */
32 34
33 35 #include <sys/cpuvar.h>
34 36 #include <sys/kdi_impl.h>
35 37 #include <sys/errno.h>
36 38 #include <sys/systm.h>
37 39 #include <sys/sysmacros.h>
38 40 #include <sys/mman.h>
39 41 #include <sys/bootconf.h>
40 42 #include <sys/cmn_err.h>
41 43 #include <vm/seg_kmem.h>
42 44 #include <vm/hat_i86.h>
43 45 #if defined(__xpv)
44 46 #include <sys/hypervisor.h>
45 47 #endif
46 48 #include <sys/bootinfo.h>
47 49 #include <vm/kboot_mmu.h>
48 50 #include <sys/machsystm.h>
49 51
50 52 /*
51 53 * The debugger needs direct access to the PTE of one page table entry
52 54 * in order to implement vtop and physical read/writes
53 55 */
54 56 static uintptr_t hat_kdi_page = 0; /* vaddr for phsical page accesses */
55 57 static uint_t use_kbm = 1;
56 58 uint_t hat_kdi_use_pae; /* if 0, use x86pte32_t for pte type */
57 59
58 60 #if !defined(__xpv)
59 61 static x86pte_t *hat_kdi_pte = NULL; /* vaddr of pte for hat_kdi_page */
60 62 #endif
61 63
62 64 /*
63 65 * Get the address for remapping physical pages during boot
64 66 */
65 67 void
66 68 hat_boot_kdi_init(void)
67 69 {
68 70 hat_kdi_page = (uintptr_t)kbm_push(0); /* first call gets address... */
69 71 }
70 72
71 73 /*
72 74 * Switch to using a page in the kernel's va range for physical memory access.
73 75 * We need to allocate a virtual page, then permanently map in the page that
74 76 * contains the PTE to it.
75 77 */
76 78 void
77 79 hat_kdi_init(void)
78 80 {
79 81 /*LINTED:set but not used in function*/
80 82 htable_t *ht __unused;
81 83
82 84 /*
83 85 * Get an kernel page VA to use for phys mem access. Then make sure
84 86 * the VA has a page table.
85 87 */
86 88 hat_kdi_use_pae = mmu.pae_hat;
87 89 hat_kdi_page = (uintptr_t)vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
88 90 ht = htable_create(kas.a_hat, hat_kdi_page, 0, NULL);
89 91 use_kbm = 0;
90 92
91 93 #ifndef __xpv
92 94 /*
93 95 * Get an address at which to put the pagetable and devload it.
94 96 */
95 97 hat_kdi_pte = vmem_xalloc(heap_arena, MMU_PAGESIZE, MMU_PAGESIZE, 0,
96 98 0, NULL, NULL, VM_SLEEP);
97 99 hat_devload(kas.a_hat, (caddr_t)hat_kdi_pte, MMU_PAGESIZE, ht->ht_pfn,
98 100 PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK,
99 101 HAT_LOAD | HAT_LOAD_NOCONSIST);
100 102 hat_kdi_pte =
101 103 PT_INDEX_PTR(hat_kdi_pte, htable_va2entry(hat_kdi_page, ht));
102 104
103 105 HTABLE_INC(ht->ht_valid_cnt);
104 106 htable_release(ht);
105 107 #endif
106 108 }
107 109
108 110 #ifdef __xpv
109 111
110 112 /*
111 113 * translate machine address to physical address
112 114 */
113 115 static uint64_t
114 116 kdi_ptom(uint64_t pa)
115 117 {
116 118 extern pfn_t *mfn_list;
117 119 ulong_t mfn = mfn_list[mmu_btop(pa)];
118 120
119 121 return (pfn_to_pa(mfn) | (pa & MMU_PAGEOFFSET));
120 122 }
121 123
122 124 /*
123 125 * This is like mfn_to_pfn(), but we can't use ontrap() from kmdb.
124 126 * Instead we let the fault happen and kmdb deals with it.
125 127 */
126 128 static uint64_t
127 129 kdi_mtop(uint64_t ma)
128 130 {
129 131 pfn_t pfn;
130 132 mfn_t mfn = ma >> MMU_PAGESHIFT;
131 133
132 134 if (HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL) < mfn)
133 135 return (ma | PFN_IS_FOREIGN_MFN);
134 136
135 137 pfn = mfn_to_pfn_mapping[mfn];
136 138 if (pfn >= mfn_count || pfn_to_mfn(pfn) != mfn)
137 139 return (ma | PFN_IS_FOREIGN_MFN);
138 140 return (pfn_to_pa(pfn) | (ma & MMU_PAGEOFFSET));
139 141 }
140 142
141 143 #else
142 144 #define kdi_mtop(m) (m)
143 145 #define kdi_ptom(p) (p)
144 146 #endif
145 147
146 148 /*ARGSUSED*/
147 149 int
148 150 kdi_vtop(uintptr_t va, uint64_t *pap)
149 151 {
150 152 uintptr_t vaddr = va;
151 153 size_t len;
152 154 pfn_t pfn;
153 155 uint_t prot;
154 156 int level;
155 157 x86pte_t pte;
156 158 int index;
157 159
158 160 /*
159 161 * if the mmu struct isn't relevant yet, we need to probe
160 162 * the boot loader's pagetables.
161 163 */
162 164 if (!khat_running) {
163 165 if (kbm_probe(&vaddr, &len, &pfn, &prot) == 0)
164 166 return (ENOENT);
165 167 if (vaddr > va)
166 168 return (ENOENT);
167 169 if (vaddr < va)
168 170 pfn += mmu_btop(va - vaddr);
169 171 *pap = pfn_to_pa(pfn) + (vaddr & MMU_PAGEOFFSET);
↓ open down ↓ |
135 lines elided |
↑ open up ↑ |
170 172 return (0);
171 173 }
172 174
173 175 /*
174 176 * We can't go through normal hat routines, so we'll use
175 177 * kdi_pread() to walk the page tables
176 178 */
177 179 #if defined(__xpv)
178 180 *pap = pfn_to_pa(CPU->cpu_current_hat->hat_htable->ht_pfn);
179 181 #else
180 - *pap = getcr3() & MMU_PAGEMASK;
182 + *pap = getcr3_pa();
181 183 #endif
182 184 for (level = mmu.max_level; ; --level) {
183 185 index = (va >> LEVEL_SHIFT(level)) & (mmu.ptes_per_table - 1);
184 186 *pap += index << mmu.pte_size_shift;
185 187 pte = 0;
186 188 if (kdi_pread((caddr_t)&pte, mmu.pte_size, *pap, &len) != 0)
187 189 return (ENOENT);
188 190 if (pte == 0)
189 191 return (ENOENT);
190 192 if (level > 0 && level <= mmu.max_page_level &&
191 193 (pte & PT_PAGESIZE)) {
192 194 *pap = kdi_mtop(pte & PT_PADDR_LGPG);
193 195 break;
194 196 } else {
195 197 *pap = kdi_mtop(pte & PT_PADDR);
196 198 if (level == 0)
197 199 break;
198 200 }
199 201 }
200 202 *pap += va & LEVEL_OFFSET(level);
201 203 return (0);
202 204 }
203 205
204 206 static int
205 207 kdi_prw(caddr_t buf, size_t nbytes, uint64_t pa, size_t *ncopiedp, int doread)
206 208 {
207 209 size_t ncopied = 0;
208 210 off_t pgoff;
209 211 size_t sz;
210 212 caddr_t va;
211 213 caddr_t from;
212 214 caddr_t to;
213 215 x86pte_t pte;
214 216
215 217 /*
216 218 * if this is called before any initialization - fail
217 219 */
218 220 if (hat_kdi_page == 0)
219 221 return (EAGAIN);
220 222
221 223 while (nbytes > 0) {
222 224 /*
223 225 * figure out the addresses and construct a minimal PTE
224 226 */
225 227 pgoff = pa & MMU_PAGEOFFSET;
226 228 sz = MIN(nbytes, MMU_PAGESIZE - pgoff);
227 229 va = (caddr_t)hat_kdi_page + pgoff;
228 230 pte = kdi_ptom(mmu_ptob(mmu_btop(pa))) | PT_VALID;
229 231 if (doread) {
230 232 from = va;
231 233 to = buf;
232 234 } else {
233 235 PTE_SET(pte, PT_WRITABLE);
234 236 from = buf;
235 237 to = va;
236 238 }
237 239
238 240 /*
239 241 * map the physical page
240 242 */
241 243 if (use_kbm)
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
242 244 (void) kbm_push(pa);
243 245 #if defined(__xpv)
244 246 else
245 247 (void) HYPERVISOR_update_va_mapping(
246 248 (uintptr_t)va, pte, UVMF_INVLPG);
247 249 #else
248 250 else if (hat_kdi_use_pae)
249 251 *hat_kdi_pte = pte;
250 252 else
251 253 *(x86pte32_t *)hat_kdi_pte = pte;
252 - mmu_tlbflush_entry((caddr_t)hat_kdi_page);
254 + mmu_flush_tlb_kpage(hat_kdi_page);
253 255 #endif
254 256
255 257 bcopy(from, to, sz);
256 258
257 259 /*
258 260 * erase the mapping
259 261 */
260 262 if (use_kbm)
261 263 kbm_pop();
262 264 #if defined(__xpv)
263 265 else
264 266 (void) HYPERVISOR_update_va_mapping(
265 267 (uintptr_t)va, 0, UVMF_INVLPG);
266 268 #else
267 269 else if (hat_kdi_use_pae)
268 270 *hat_kdi_pte = 0;
269 271 else
270 272 *(x86pte32_t *)hat_kdi_pte = 0;
271 - mmu_tlbflush_entry((caddr_t)hat_kdi_page);
273 + mmu_flush_tlb_kpage(hat_kdi_page);
272 274 #endif
273 275
274 276 buf += sz;
275 277 pa += sz;
276 278 nbytes -= sz;
277 279 ncopied += sz;
278 280 }
279 281
280 282 if (ncopied == 0)
281 283 return (ENOENT);
282 284
283 285 *ncopiedp = ncopied;
284 286 return (0);
285 287 }
286 288
287 289 int
288 290 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
289 291 {
290 292 return (kdi_prw(buf, nbytes, addr, ncopiedp, 1));
291 293 }
292 294
293 295 int
294 296 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
295 297 {
296 298 return (kdi_prw(buf, nbytes, addr, ncopiedp, 0));
297 299 }
298 300
301 +#if !defined(__xpv)
302 +/*
303 + * This gets used for flushing the TLB on all the slaves just prior to doing a
304 + * kdi_prw(). It's unclear why this was originally done, since kdi_prw() itself
305 + * will flush any lingering hat_kdi_page mappings, but let's presume it was a
306 + * good idea.
307 + */
308 +void
309 +kdi_flush_caches(void)
310 +{
311 + mmu_flush_tlb(FLUSH_TLB_ALL, NULL);
312 +}
313 +#endif
299 314
300 315 /*
301 316 * Return the number of bytes, relative to the beginning of a given range, that
302 317 * are non-toxic (can be read from and written to with relative impunity).
303 318 */
304 319 /*ARGSUSED*/
305 320 size_t
306 321 kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
307 322 {
308 323 #if defined(__amd64)
309 324 extern uintptr_t toxic_addr;
310 325 extern size_t toxic_size;
311 326
312 327 /*
313 328 * Check 64 bit toxic range.
314 329 */
315 330 if (toxic_addr != 0 &&
316 331 va + sz >= toxic_addr &&
317 332 va < toxic_addr + toxic_size)
318 333 return (va < toxic_addr ? toxic_addr - va : 0);
319 334
320 335 /*
321 336 * avoid any Virtual Address hole
322 337 */
323 338 if (va + sz >= hole_start && va < hole_end)
324 339 return (va < hole_start ? hole_start - va : 0);
325 340
326 341 return (sz);
327 342
328 343 #elif defined(__i386)
329 344 extern void *device_arena_contains(void *, size_t, size_t *);
330 345 uintptr_t v;
331 346
332 347 v = (uintptr_t)device_arena_contains((void *)va, sz, NULL);
333 348 if (v == 0)
334 349 return (sz);
335 350 else if (v <= va)
336 351 return (0);
337 352 else
338 353 return (v - va);
339 354
340 355 #endif /* __i386 */
341 356 }
↓ open down ↓ |
33 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX