1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 * Copyright 2018 Joyent, Inc.
28 */
29
30 #ifndef _VM_HAT_I86_H
31 #define _VM_HAT_I86_H
32
33
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37
38 /*
39 * VM - Hardware Address Translation management.
40 *
41 * This file describes the contents of the x86_64 HAT data structures.
42 */
43 #include <sys/types.h>
44 #include <sys/t_lock.h>
45 #include <sys/cpuvar.h>
46 #include <sys/x_call.h>
47 #include <vm/seg.h>
48 #include <vm/page.h>
49 #include <sys/vmparam.h>
50 #include <sys/vm_machparam.h>
51 #include <sys/promif.h>
52 #include <vm/hat_pte.h>
53 #include <vm/htable.h>
54 #include <vm/hment.h>
55
56 /*
57 * The essential data types involved:
58 *
59 * htable_t - There is one of these for each page table and it is used
60 * by the HAT to manage the page table.
61 *
62 * hment_t - Links together multiple PTEs to a single page.
63 */
64
65 /*
66 * Maximum number of per-CPU pagetable entries that we'll need to cache in the
67 * HAT. See the big theory statement in uts/i86pc/vm/hat_i86.c for more
68 * information.
69 */
70 #if defined(__xpv)
71 /*
72 * The Xen hypervisor does not use per-CPU pagetables (PCP). Define a single
73 * struct member for it at least to make life easier and not make the member
74 * conditional.
75 */
76 #define MAX_COPIED_PTES 1
77 #else
78 /*
79 * The 64-bit kernel may have up to 512 PTEs present in it for a given process.
80 */
81 #define MAX_COPIED_PTES 512
82 #endif /* __xpv */
83
84 #define TOP_LEVEL(h) (((h)->hat_max_level))
85
86 /*
87 * The hat struct exists for each address space.
88 */
89 struct hat {
90 kmutex_t hat_mutex;
91 struct as *hat_as;
92 uint_t hat_stats;
93 pgcnt_t hat_pages_mapped[MAX_PAGE_LEVEL + 1];
94 pgcnt_t hat_ism_pgcnt;
95 cpuset_t hat_cpus;
96 uint16_t hat_flags;
97 uint8_t hat_max_level; /* top level of this HAT */
98 uint_t hat_num_copied; /* Actual num of hat_copied_ptes[] */
99 htable_t *hat_htable; /* top level htable */
100 struct hat *hat_next;
101 struct hat *hat_prev;
102 uint_t hat_num_hash; /* number of htable hash buckets */
103 htable_t **hat_ht_hash; /* htable hash buckets */
104 htable_t *hat_ht_cached; /* cached free htables */
105 x86pte_t hat_copied_ptes[MAX_COPIED_PTES];
106 #if defined(__amd64) && defined(__xpv)
107 pfn_t hat_user_ptable; /* alt top ptable for user mode */
108 #endif
109 };
110 typedef struct hat hat_t;
111
112 #define PGCNT_INC(hat, level) \
113 atomic_inc_ulong(&(hat)->hat_pages_mapped[level]);
114 #define PGCNT_DEC(hat, level) \
115 atomic_dec_ulong(&(hat)->hat_pages_mapped[level]);
116
117 /*
118 * Flags for the hat_flags field. For more information, please see the big
119 * theory statement on the HAT design in uts/i86pc/vm/hat_i86.c.
120 *
121 * HAT_FREEING - set when HAT is being destroyed - mostly used to detect that
122 * demap()s can be avoided.
123 *
124 * HAT_COPIED - Indicates this HAT is a source for per-cpu page tables: see the
125 * big comment in hat_i86.c for a description.
126 *
127 * HAT_COPIED_32 - HAT_COPIED, but for an ILP32 process.
128 *
129 * HAT_VICTIM - This is set while a hat is being examined for page table
130 * stealing and prevents it from being freed.
131 *
132 * HAT_SHARED - The hat has exported it's page tables via hat_share()
133 *
134 * HAT_PINNED - On the hypervisor, indicates the top page table has been pinned.
135 *
136 * HAT_PCP - Used for the per-cpu user page table (i.e. associated with a CPU,
137 * not a process).
138 */
139 #define HAT_FREEING (0x0001)
140 #define HAT_VICTIM (0x0002)
141 #define HAT_SHARED (0x0004)
142 #define HAT_PINNED (0x0008)
143 #define HAT_COPIED (0x0010)
144 #define HAT_COPIED_32 (0x0020)
145 #define HAT_PCP (0x0040)
146
147 /*
148 * Additional platform attribute for hat_devload() to force no caching.
149 */
150 #define HAT_PLAT_NOCACHE (0x100000)
151
152 /*
153 * Simple statistics for the HAT. These are just counters that are
154 * atomically incremented. They can be reset directly from the kernel
155 * debugger.
156 */
157 struct hatstats {
158 ulong_t hs_reap_attempts;
159 ulong_t hs_reaped;
160 ulong_t hs_steals;
161 ulong_t hs_ptable_allocs;
162 ulong_t hs_ptable_frees;
163 ulong_t hs_htable_rgets; /* allocs from reserve */
164 ulong_t hs_htable_rputs; /* putbacks to reserve */
165 ulong_t hs_htable_shared; /* number of htables shared */
166 ulong_t hs_htable_unshared; /* number of htables unshared */
167 ulong_t hs_hm_alloc;
168 ulong_t hs_hm_free;
169 ulong_t hs_hm_put_reserve;
170 ulong_t hs_hm_get_reserve;
171 ulong_t hs_hm_steals;
172 ulong_t hs_hm_steal_exam;
173 ulong_t hs_tlb_inval_delayed;
174 ulong_t hs_hat_copied64;
175 ulong_t hs_hat_copied32;
176 ulong_t hs_hat_normal64;
177 };
178 extern struct hatstats hatstat;
179 #ifdef DEBUG
180 #define HATSTAT_INC(x) (++hatstat.x)
181 #else
182 #define HATSTAT_INC(x) (0)
183 #endif
184
185 #if defined(_KERNEL)
186
187 /*
188 * Useful macro to align hat_XXX() address arguments to a page boundary
189 */
190 #define ALIGN2PAGE(a) ((uintptr_t)(a) & MMU_PAGEMASK)
191 #define IS_PAGEALIGNED(a) (((uintptr_t)(a) & MMU_PAGEOFFSET) == 0)
192
193 extern uint_t khat_running; /* set at end of hat_kern_setup() */
194 extern cpuset_t khat_cpuset; /* cpuset for kernal address demap Xcalls */
195 extern kmutex_t hat_list_lock;
196 extern kcondvar_t hat_list_cv;
197
198
199
200 /*
201 * Interfaces to setup a cpu private mapping (ie. preemption disabled).
202 * The attr and flags arguments are the same as for hat_devload().
203 * setup() must be called once, then any number of calls to remap(),
204 * followed by a final call to release()
205 *
206 * Used by ppcopy(), page_zero(), the memscrubber, and the kernel debugger.
207 */
208 typedef paddr_t hat_mempte_t; /* phys addr of PTE */
209 extern hat_mempte_t hat_mempte_setup(caddr_t addr);
210 extern void hat_mempte_remap(pfn_t, caddr_t, hat_mempte_t,
211 uint_t attr, uint_t flags);
212 extern void hat_mempte_release(caddr_t addr, hat_mempte_t);
213
214 /*
215 * Interfaces to manage which thread has access to htable and hment reserves.
216 * The USE_HAT_RESERVES macro should always be recomputed in full. Its value
217 * (due to curthread) can change after any call into kmem/vmem.
218 */
219 extern uint_t can_steal_post_boot;
220 extern uint_t use_boot_reserve;
221 #define USE_HAT_RESERVES() \
222 (use_boot_reserve || curthread->t_hatdepth > 1 || \
223 panicstr != NULL || vmem_is_populator())
224
225 /*
226 * initialization stuff needed by by startup, mp_startup...
227 */
228 extern void hat_cpu_online(struct cpu *);
229 extern void hat_cpu_offline(struct cpu *);
230 extern void setup_vaddr_for_ppcopy(struct cpu *);
231 extern void teardown_vaddr_for_ppcopy(struct cpu *);
232 extern void clear_boot_mappings(uintptr_t, uintptr_t);
233
234 /*
235 * magic value to indicate that all TLB entries should be demapped.
236 */
237 #define DEMAP_ALL_ADDR (~(uintptr_t)0)
238
239 /*
240 * not in any include file???
241 */
242 extern void halt(char *fmt);
243
244 /*
245 * x86 specific routines for use online in setup or i86pc/vm files
246 */
247 extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
248 caddr_t ekernelheap);
249 extern void hat_kern_setup(void);
250 extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
251 x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
252 extern void hat_init_finish(void);
253 extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
254 extern pfn_t hat_kpm_va2pfn(caddr_t);
255 extern page_t *hat_kpm_vaddr2page(caddr_t);
256 extern uintptr_t hat_kernelbase(uintptr_t);
257 extern void hat_kmap_init(uintptr_t base, size_t len);
258
259 extern hment_t *hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry);
260
261 extern void mmu_calc_user_slots(void);
262 extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
263 extern void hat_switch(struct hat *hat);
264
265 #define TLB_RANGE_LEN(r) ((r)->tr_cnt << LEVEL_SHIFT((r)->tr_level))
266
267 /*
268 * A range of virtual pages for purposes of demapping.
269 */
270 typedef struct tlb_range {
271 uintptr_t tr_va; /* address of page */
272 ulong_t tr_cnt; /* number of pages in range */
273 int8_t tr_level; /* page table level */
274 } tlb_range_t;
275
276 #if defined(__xpv)
277
278 #define XPV_DISALLOW_MIGRATE() xen_block_migrate()
279 #define XPV_ALLOW_MIGRATE() xen_allow_migrate()
280
281 #define mmu_flush_tlb_page(va) mmu_invlpg((caddr_t)va)
282 #define mmu_flush_tlb_kpage(va) mmu_invlpg((caddr_t)va)
283
284 /*
285 * Interfaces to use around code that maps/unmaps grant table references.
286 */
287 extern void hat_prepare_mapping(hat_t *, caddr_t, uint64_t *);
288 extern void hat_release_mapping(hat_t *, caddr_t);
289
290 #else
291
292 #define XPV_DISALLOW_MIGRATE() /* nothing */
293 #define XPV_ALLOW_MIGRATE() /* nothing */
294
295 #define pfn_is_foreign(pfn) __lintzero
296
297 typedef enum flush_tlb_type {
298 FLUSH_TLB_ALL = 1,
299 FLUSH_TLB_NONGLOBAL = 2,
300 FLUSH_TLB_RANGE = 3,
301 } flush_tlb_type_t;
302
303 extern void mmu_flush_tlb(flush_tlb_type_t, tlb_range_t *);
304 extern void mmu_flush_tlb_kpage(uintptr_t);
305 extern void mmu_flush_tlb_page(uintptr_t);
306
307 extern void hati_cpu_punchin(cpu_t *cpu, uintptr_t va, uint_t attrs);
308
309 /*
310 * routines to deal with delayed TLB invalidations for idle CPUs
311 */
312 extern void tlb_going_idle(void);
313 extern void tlb_service(void);
314
315 #endif /* !__xpv */
316
317 #endif /* _KERNEL */
318
319 #ifdef __cplusplus
320 }
321 #endif
322
323 #endif /* _VM_HAT_I86_H */