10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
31 */
32
33 /*
34 * VM - Hardware Address Translation management for i386 and amd64
35 *
36 * Implementation of the interfaces described in <common/vm/hat.h>
37 *
38 * Nearly all the details of how the hardware is managed should not be
39 * visible outside this layer except for misc. machine specific functions
40 * that work in conjunction with this code.
41 *
42 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
43 */
44
45 #include <sys/machparam.h>
46 #include <sys/machsystm.h>
47 #include <sys/mman.h>
48 #include <sys/types.h>
49 #include <sys/systm.h>
1897 if (ht->ht_lock_cnt < 1)
1898 panic("hat_unlock(): lock_cnt < 1, "
1899 "htable=%p, vaddr=%p\n", (void *)ht, (void *)vaddr);
1900 HTABLE_LOCK_DEC(ht);
1901
1902 vaddr += LEVEL_SIZE(ht->ht_level);
1903 }
1904 if (ht)
1905 htable_release(ht);
1906 XPV_ALLOW_MIGRATE();
1907 }
1908
1909 /* ARGSUSED */
1910 void
1911 hat_unlock_region(struct hat *hat, caddr_t addr, size_t len,
1912 hat_region_cookie_t rcookie)
1913 {
1914 panic("No shared region support on x86");
1915 }
1916
1917 #if !defined(__xpv)
1918 /*
1919 * Cross call service routine to demap a virtual page on
1920 * the current CPU or flush all mappings in TLB.
1921 */
1922 /*ARGSUSED*/
1923 static int
1924 hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
1925 {
1926 hat_t *hat = (hat_t *)a1;
1927 caddr_t addr = (caddr_t)a2;
1928 size_t len = (size_t)a3;
1929
1930 /*
1931 * If the target hat isn't the kernel and this CPU isn't operating
1932 * in the target hat, we can ignore the cross call.
1933 */
1934 if (hat != kas.a_hat && hat != CPU->cpu_current_hat)
1935 return (0);
1936
1937 /*
1938 * For a normal address, we flush a range of contiguous mappings
1939 */
1940 if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
1941 for (size_t i = 0; i < len; i += MMU_PAGESIZE)
1942 mmu_tlbflush_entry(addr + i);
1943 return (0);
1944 }
1945
1946 /*
1947 * Otherwise we reload cr3 to effect a complete TLB flush.
1948 *
1949 * A reload of cr3 on a VLP process also means we must also recopy in
1950 * the pte values from the struct hat
1951 */
1952 if (hat->hat_flags & HAT_VLP) {
1953 #if defined(__amd64)
1954 x86pte_t *vlpptep = CPU->cpu_hat_info->hci_vlp_l2ptes;
1955
1956 VLP_COPY(hat->hat_vlp_ptes, vlpptep);
1957 #elif defined(__i386)
1958 reload_pae32(hat, CPU);
1959 #endif
1960 }
1961 reload_cr3();
2017
2018 /*
2019 * Atomic clear and fetch of old state.
2020 */
2021 while ((found = CAS_TLB_INFO(CPU, tlb_info, 0)) != tlb_info) {
2022 ASSERT(found & TLB_CPU_HALTED);
2023 tlb_info = found;
2024 SMT_PAUSE();
2025 }
2026 if (tlb_info & TLB_INVAL_ALL)
2027 flush_all_tlb_entries();
2028 }
2029 }
2030 #endif /* !__xpv */
2031
2032 /*
2033 * Internal routine to do cross calls to invalidate a range of pages on
2034 * all CPUs using a given hat.
2035 */
2036 void
2037 hat_tlb_inval_range(hat_t *hat, uintptr_t va, size_t len)
2038 {
2039 extern int flushes_require_xcalls; /* from mp_startup.c */
2040 cpuset_t justme;
2041 cpuset_t cpus_to_shootdown;
2042 #ifndef __xpv
2043 cpuset_t check_cpus;
2044 cpu_t *cpup;
2045 int c;
2046 #endif
2047
2048 /*
2049 * If the hat is being destroyed, there are no more users, so
2050 * demap need not do anything.
2051 */
2052 if (hat->hat_flags & HAT_FREEING)
2053 return;
2054
2055 /*
2056 * If demapping from a shared pagetable, we best demap the
2057 * entire set of user TLBs, since we don't know what addresses
2058 * these were shared at.
2059 */
2060 if (hat->hat_flags & HAT_SHARED) {
2061 hat = kas.a_hat;
2062 va = DEMAP_ALL_ADDR;
2063 }
2064
2065 /*
2066 * if not running with multiple CPUs, don't use cross calls
2067 */
2068 if (panicstr || !flushes_require_xcalls) {
2069 #ifdef __xpv
2070 if (va == DEMAP_ALL_ADDR) {
2071 xen_flush_tlb();
2072 } else {
2073 for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2074 xen_flush_va((caddr_t)(va + i));
2075 }
2076 #else
2077 (void) hati_demap_func((xc_arg_t)hat,
2078 (xc_arg_t)va, (xc_arg_t)len);
2079 #endif
2080 return;
2081 }
2082
2083
2084 /*
2085 * Determine CPUs to shootdown. Kernel changes always do all CPUs.
2086 * Otherwise it's just CPUs currently executing in this hat.
2087 */
2088 kpreempt_disable();
2089 CPUSET_ONLY(justme, CPU->cpu_id);
2090 if (hat == kas.a_hat)
2091 cpus_to_shootdown = khat_cpuset;
2092 else
2093 cpus_to_shootdown = hat->hat_cpus;
2094
2095 #ifndef __xpv
2096 /*
2097 * If any CPUs in the set are idle, just request a delayed flush
2098 * and avoid waking them up.
2117 }
2118 if (tlb_info == (TLB_CPU_HALTED | TLB_INVAL_ALL)) {
2119 HATSTAT_INC(hs_tlb_inval_delayed);
2120 CPUSET_DEL(cpus_to_shootdown, c);
2121 }
2122 }
2123 #endif
2124
2125 if (CPUSET_ISNULL(cpus_to_shootdown) ||
2126 CPUSET_ISEQUAL(cpus_to_shootdown, justme)) {
2127
2128 #ifdef __xpv
2129 if (va == DEMAP_ALL_ADDR) {
2130 xen_flush_tlb();
2131 } else {
2132 for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2133 xen_flush_va((caddr_t)(va + i));
2134 }
2135 #else
2136 (void) hati_demap_func((xc_arg_t)hat,
2137 (xc_arg_t)va, (xc_arg_t)len);
2138 #endif
2139
2140 } else {
2141
2142 CPUSET_ADD(cpus_to_shootdown, CPU->cpu_id);
2143 #ifdef __xpv
2144 if (va == DEMAP_ALL_ADDR) {
2145 xen_gflush_tlb(cpus_to_shootdown);
2146 } else {
2147 for (size_t i = 0; i < len; i += MMU_PAGESIZE) {
2148 xen_gflush_va((caddr_t)(va + i),
2149 cpus_to_shootdown);
2150 }
2151 }
2152 #else
2153 xc_call((xc_arg_t)hat, (xc_arg_t)va, (xc_arg_t)len,
2154 CPUSET2BV(cpus_to_shootdown), hati_demap_func);
2155 #endif
2156
2157 }
2158 kpreempt_enable();
2159 }
2160
2161 void
2162 hat_tlb_inval(hat_t *hat, uintptr_t va)
2163 {
2164 hat_tlb_inval_range(hat, va, MMU_PAGESIZE);
2165 }
2166
2167 /*
2168 * Interior routine for HAT_UNLOADs from hat_unload_callback(),
2169 * hat_kmap_unload() OR from hat_steal() code. This routine doesn't
2170 * handle releasing of the htables.
2171 */
2172 void
2173 hat_pte_unmap(
2174 htable_t *ht,
2175 uint_t entry,
2176 uint_t flags,
2177 x86pte_t old_pte,
2178 void *pte_ptr,
2179 boolean_t tlb)
2180 {
2181 hat_t *hat = ht->ht_hat;
2182 hment_t *hm = NULL;
2183 page_t *pp = NULL;
2184 level_t l = ht->ht_level;
2311 hat_unload(hat_t *hat, caddr_t addr, size_t len, uint_t flags)
2312 {
2313 uintptr_t va = (uintptr_t)addr;
2314
2315 XPV_DISALLOW_MIGRATE();
2316 ASSERT(hat == kas.a_hat || va + len <= _userlimit);
2317
2318 /*
2319 * special case for performance.
2320 */
2321 if (mmu.kmap_addr <= va && va < mmu.kmap_eaddr) {
2322 ASSERT(hat == kas.a_hat);
2323 hat_kmap_unload(addr, len, flags);
2324 } else {
2325 hat_unload_callback(hat, addr, len, flags, NULL);
2326 }
2327 XPV_ALLOW_MIGRATE();
2328 }
2329
2330 /*
2331 * Do the callbacks for ranges being unloaded.
2332 */
2333 typedef struct range_info {
2334 uintptr_t rng_va;
2335 ulong_t rng_cnt;
2336 level_t rng_level;
2337 } range_info_t;
2338
2339 /*
2340 * Invalidate the TLB, and perform the callback to the upper level VM system,
2341 * for the specified ranges of contiguous pages.
2342 */
2343 static void
2344 handle_ranges(hat_t *hat, hat_callback_t *cb, uint_t cnt, range_info_t *range)
2345 {
2346 while (cnt > 0) {
2347 size_t len;
2348
2349 --cnt;
2350 len = range[cnt].rng_cnt << LEVEL_SHIFT(range[cnt].rng_level);
2351 hat_tlb_inval_range(hat, (uintptr_t)range[cnt].rng_va, len);
2352
2353 if (cb != NULL) {
2354 cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
2355 cb->hcb_end_addr = cb->hcb_start_addr;
2356 cb->hcb_end_addr += len;
2357 cb->hcb_function(cb);
2358 }
2359 }
2360 }
2361
2362 /*
2363 * Unload a given range of addresses (has optional callback)
2364 *
2365 * Flags:
2366 * define HAT_UNLOAD 0x00
2367 * define HAT_UNLOAD_NOSYNC 0x02
2368 * define HAT_UNLOAD_UNLOCK 0x04
2369 * define HAT_UNLOAD_OTHER 0x08 - not used
2370 * define HAT_UNLOAD_UNMAP 0x10 - same as HAT_UNLOAD
2371 */
2372 #define MAX_UNLOAD_CNT (8)
2373 void
2374 hat_unload_callback(
2375 hat_t *hat,
2376 caddr_t addr,
|
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2010, Intel Corporation.
26 * All rights reserved.
27 */
28 /*
29 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30 * Copyright 2017 Joyent, Inc. All rights reserved.
31 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
32 */
33
34 /*
35 * VM - Hardware Address Translation management for i386 and amd64
36 *
37 * Implementation of the interfaces described in <common/vm/hat.h>
38 *
39 * Nearly all the details of how the hardware is managed should not be
40 * visible outside this layer except for misc. machine specific functions
41 * that work in conjunction with this code.
42 *
43 * Routines used only inside of i86pc/vm start with hati_ for HAT Internal.
44 */
45
46 #include <sys/machparam.h>
47 #include <sys/machsystm.h>
48 #include <sys/mman.h>
49 #include <sys/types.h>
50 #include <sys/systm.h>
1898 if (ht->ht_lock_cnt < 1)
1899 panic("hat_unlock(): lock_cnt < 1, "
1900 "htable=%p, vaddr=%p\n", (void *)ht, (void *)vaddr);
1901 HTABLE_LOCK_DEC(ht);
1902
1903 vaddr += LEVEL_SIZE(ht->ht_level);
1904 }
1905 if (ht)
1906 htable_release(ht);
1907 XPV_ALLOW_MIGRATE();
1908 }
1909
1910 /* ARGSUSED */
1911 void
1912 hat_unlock_region(struct hat *hat, caddr_t addr, size_t len,
1913 hat_region_cookie_t rcookie)
1914 {
1915 panic("No shared region support on x86");
1916 }
1917
1918 /*
1919 * A range of virtual pages for purposes of demapping.
1920 */
1921 typedef struct range_info {
1922 uintptr_t rng_va; /* address of page */
1923 ulong_t rng_cnt; /* number of pages in range */
1924 level_t rng_level; /* page table level */
1925 } range_info_t;
1926
1927 #if !defined(__xpv)
1928 /*
1929 * Cross call service routine to demap a range of virtual
1930 * pages on the current CPU or flush all mappings in TLB.
1931 */
1932 /*ARGSUSED*/
1933 static int
1934 hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
1935 {
1936 hat_t *hat = (hat_t *)a1;
1937 range_info_t *range = (range_info_t *)a2;
1938 size_t len = (size_t)a3;
1939 caddr_t addr = (caddr_t)range->rng_va;
1940 size_t pgsz = LEVEL_SIZE(range->rng_level);
1941
1942 /*
1943 * If the target hat isn't the kernel and this CPU isn't operating
1944 * in the target hat, we can ignore the cross call.
1945 */
1946 if (hat != kas.a_hat && hat != CPU->cpu_current_hat)
1947 return (0);
1948
1949 /*
1950 * For a normal address, we flush a range of contiguous mappings
1951 */
1952 if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
1953 for (size_t i = 0; i < len; i += pgsz)
1954 mmu_tlbflush_entry(addr + i);
1955 return (0);
1956 }
1957
1958 /*
1959 * Otherwise we reload cr3 to effect a complete TLB flush.
1960 *
1961 * A reload of cr3 on a VLP process also means we must also recopy in
1962 * the pte values from the struct hat
1963 */
1964 if (hat->hat_flags & HAT_VLP) {
1965 #if defined(__amd64)
1966 x86pte_t *vlpptep = CPU->cpu_hat_info->hci_vlp_l2ptes;
1967
1968 VLP_COPY(hat->hat_vlp_ptes, vlpptep);
1969 #elif defined(__i386)
1970 reload_pae32(hat, CPU);
1971 #endif
1972 }
1973 reload_cr3();
2029
2030 /*
2031 * Atomic clear and fetch of old state.
2032 */
2033 while ((found = CAS_TLB_INFO(CPU, tlb_info, 0)) != tlb_info) {
2034 ASSERT(found & TLB_CPU_HALTED);
2035 tlb_info = found;
2036 SMT_PAUSE();
2037 }
2038 if (tlb_info & TLB_INVAL_ALL)
2039 flush_all_tlb_entries();
2040 }
2041 }
2042 #endif /* !__xpv */
2043
2044 /*
2045 * Internal routine to do cross calls to invalidate a range of pages on
2046 * all CPUs using a given hat.
2047 */
2048 void
2049 hat_tlb_inval_range(hat_t *hat, range_info_t *range)
2050 {
2051 extern int flushes_require_xcalls; /* from mp_startup.c */
2052 cpuset_t justme;
2053 cpuset_t cpus_to_shootdown;
2054 uintptr_t va = range->rng_va;
2055 size_t len = range->rng_cnt << LEVEL_SHIFT(range->rng_level);
2056 #ifndef __xpv
2057 cpuset_t check_cpus;
2058 cpu_t *cpup;
2059 int c;
2060 #endif
2061
2062 /*
2063 * If the hat is being destroyed, there are no more users, so
2064 * demap need not do anything.
2065 */
2066 if (hat->hat_flags & HAT_FREEING)
2067 return;
2068
2069 /*
2070 * If demapping from a shared pagetable, we best demap the
2071 * entire set of user TLBs, since we don't know what addresses
2072 * these were shared at.
2073 */
2074 if (hat->hat_flags & HAT_SHARED) {
2075 hat = kas.a_hat;
2076 va = DEMAP_ALL_ADDR;
2077 }
2078
2079 /*
2080 * if not running with multiple CPUs, don't use cross calls
2081 */
2082 if (panicstr || !flushes_require_xcalls) {
2083 #ifdef __xpv
2084 if (va == DEMAP_ALL_ADDR) {
2085 xen_flush_tlb();
2086 } else {
2087 for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2088 xen_flush_va((caddr_t)(va + i));
2089 }
2090 #else
2091 (void) hati_demap_func((xc_arg_t)hat,
2092 (xc_arg_t)range, (xc_arg_t)len);
2093 #endif
2094 return;
2095 }
2096
2097
2098 /*
2099 * Determine CPUs to shootdown. Kernel changes always do all CPUs.
2100 * Otherwise it's just CPUs currently executing in this hat.
2101 */
2102 kpreempt_disable();
2103 CPUSET_ONLY(justme, CPU->cpu_id);
2104 if (hat == kas.a_hat)
2105 cpus_to_shootdown = khat_cpuset;
2106 else
2107 cpus_to_shootdown = hat->hat_cpus;
2108
2109 #ifndef __xpv
2110 /*
2111 * If any CPUs in the set are idle, just request a delayed flush
2112 * and avoid waking them up.
2131 }
2132 if (tlb_info == (TLB_CPU_HALTED | TLB_INVAL_ALL)) {
2133 HATSTAT_INC(hs_tlb_inval_delayed);
2134 CPUSET_DEL(cpus_to_shootdown, c);
2135 }
2136 }
2137 #endif
2138
2139 if (CPUSET_ISNULL(cpus_to_shootdown) ||
2140 CPUSET_ISEQUAL(cpus_to_shootdown, justme)) {
2141
2142 #ifdef __xpv
2143 if (va == DEMAP_ALL_ADDR) {
2144 xen_flush_tlb();
2145 } else {
2146 for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2147 xen_flush_va((caddr_t)(va + i));
2148 }
2149 #else
2150 (void) hati_demap_func((xc_arg_t)hat,
2151 (xc_arg_t)range, (xc_arg_t)len);
2152 #endif
2153
2154 } else {
2155
2156 CPUSET_ADD(cpus_to_shootdown, CPU->cpu_id);
2157 #ifdef __xpv
2158 if (va == DEMAP_ALL_ADDR) {
2159 xen_gflush_tlb(cpus_to_shootdown);
2160 } else {
2161 for (size_t i = 0; i < len; i += MMU_PAGESIZE) {
2162 xen_gflush_va((caddr_t)(va + i),
2163 cpus_to_shootdown);
2164 }
2165 }
2166 #else
2167 xc_call((xc_arg_t)hat, (xc_arg_t)range, (xc_arg_t)len,
2168 CPUSET2BV(cpus_to_shootdown), hati_demap_func);
2169 #endif
2170
2171 }
2172 kpreempt_enable();
2173 }
2174
2175 void
2176 hat_tlb_inval(hat_t *hat, uintptr_t va)
2177 {
2178 /*
2179 * Create range for a single page.
2180 */
2181 range_info_t range;
2182 range.rng_va = va;
2183 range.rng_cnt = 1; /* one page */
2184 range.rng_level = MIN_PAGE_LEVEL; /* pages are MMU_PAGESIZE */
2185
2186 hat_tlb_inval_range(hat, &range);
2187 }
2188
2189 /*
2190 * Interior routine for HAT_UNLOADs from hat_unload_callback(),
2191 * hat_kmap_unload() OR from hat_steal() code. This routine doesn't
2192 * handle releasing of the htables.
2193 */
2194 void
2195 hat_pte_unmap(
2196 htable_t *ht,
2197 uint_t entry,
2198 uint_t flags,
2199 x86pte_t old_pte,
2200 void *pte_ptr,
2201 boolean_t tlb)
2202 {
2203 hat_t *hat = ht->ht_hat;
2204 hment_t *hm = NULL;
2205 page_t *pp = NULL;
2206 level_t l = ht->ht_level;
2333 hat_unload(hat_t *hat, caddr_t addr, size_t len, uint_t flags)
2334 {
2335 uintptr_t va = (uintptr_t)addr;
2336
2337 XPV_DISALLOW_MIGRATE();
2338 ASSERT(hat == kas.a_hat || va + len <= _userlimit);
2339
2340 /*
2341 * special case for performance.
2342 */
2343 if (mmu.kmap_addr <= va && va < mmu.kmap_eaddr) {
2344 ASSERT(hat == kas.a_hat);
2345 hat_kmap_unload(addr, len, flags);
2346 } else {
2347 hat_unload_callback(hat, addr, len, flags, NULL);
2348 }
2349 XPV_ALLOW_MIGRATE();
2350 }
2351
2352 /*
2353 * Invalidate the TLB, and perform the callback to the upper level VM system,
2354 * for the specified ranges of contiguous pages.
2355 */
2356 static void
2357 handle_ranges(hat_t *hat, hat_callback_t *cb, uint_t cnt, range_info_t *range)
2358 {
2359 while (cnt > 0) {
2360 --cnt;
2361 hat_tlb_inval_range(hat, &range[cnt]);
2362
2363 if (cb != NULL) {
2364 cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
2365 cb->hcb_end_addr = cb->hcb_start_addr;
2366 cb->hcb_end_addr += range[cnt].rng_cnt <<
2367 LEVEL_SHIFT(range[cnt].rng_level);
2368 cb->hcb_function(cb);
2369 }
2370 }
2371 }
2372
2373 /*
2374 * Unload a given range of addresses (has optional callback)
2375 *
2376 * Flags:
2377 * define HAT_UNLOAD 0x00
2378 * define HAT_UNLOAD_NOSYNC 0x02
2379 * define HAT_UNLOAD_UNLOCK 0x04
2380 * define HAT_UNLOAD_OTHER 0x08 - not used
2381 * define HAT_UNLOAD_UNMAP 0x10 - same as HAT_UNLOAD
2382 */
2383 #define MAX_UNLOAD_CNT (8)
2384 void
2385 hat_unload_callback(
2386 hat_t *hat,
2387 caddr_t addr,
|