1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright 2017 Joyent, Inc.  All rights reserved.
  25  */
  26 
  27 #ifndef _VM_HAT_PTE_H
  28 #define _VM_HAT_PTE_H
  29 
  30 #ifdef  __cplusplus
  31 extern "C" {
  32 #endif
  33 
  34 #include <sys/types.h>
  35 #include <sys/mach_mmu.h>
  36 
  37 /*
  38  * macros to get/set/clear the PTE fields
  39  */
  40 #define PTE_SET(p, f)   ((p) |= (f))
  41 #define PTE_CLR(p, f)   ((p) &= ~(x86pte_t)(f))
  42 #define PTE_GET(p, f)   ((p) & (f))
  43 
  44 /*
  45  * Handy macro to check if a pagetable entry or pointer is valid
  46  */
  47 #define PTE_ISVALID(p)          PTE_GET(p, PT_VALID)
  48 
  49 /*
  50  * Does a PTE map a large page.
  51  */
  52 #define PTE_IS_LGPG(p, l)       ((l) > 0 && PTE_GET((p), PT_PAGESIZE))
  53 
  54 /*
  55  * does this PTE represent a page (not a pointer to another page table)?
  56  */
  57 #define PTE_ISPAGE(p, l)        \
  58         (PTE_ISVALID(p) && ((l) == 0 || PTE_GET(p, PT_PAGESIZE)))
  59 
  60 /*
  61  * Handy macro to check if 2 PTE's are the same - ignores REF/MOD bits.
  62  * On the 64 bit hypervisor we also have to ignore the high order
  63  * software bits and the global/user bit which are set/cleared
  64  * capriciously (by the hypervisor!)
  65  */
  66 #if defined(__amd64) && defined(__xpv)
  67 #define PT_IGNORE       ((0x7fful << 52) | PT_GLOBAL | PT_USER)
  68 #else
  69 #define PT_IGNORE       (0)
  70 #endif
  71 #define PTE_EQUIV(a, b)  (((a) | (PT_IGNORE | PT_REF | PT_MOD)) == \
  72         ((b) | (PT_IGNORE | PT_REF | PT_MOD)))
  73 
  74 /*
  75  * Shorthand for converting a PTE to it's pfn.
  76  */
  77 #define PTE2MFN(p, l)   \
  78         mmu_btop(PTE_GET((p), PTE_IS_LGPG((p), (l)) ? PT_PADDR_LGPG : PT_PADDR))
  79 #ifdef __xpv
  80 #define PTE2PFN(p, l) pte2pfn(p, l)
  81 #else
  82 #define PTE2PFN(p, l) PTE2MFN(p, l)
  83 #endif
  84 
  85 #define PT_NX           (0x8000000000000000ull)
  86 #define PT_PADDR        (0x000ffffffffff000ull)
  87 #define PT_PADDR_LGPG   (0x000fffffffffe000ull) /* phys addr for large pages */
  88 
  89 /*
  90  * Macros to create a PTP or PTE from the pfn and level
  91  */
  92 #ifdef __xpv
  93 
  94 /*
  95  * we use the highest order bit in physical address pfns to mark foreign mfns
  96  */
  97 #ifdef _LP64
  98 #define PFN_IS_FOREIGN_MFN (1ul << 51)
  99 #else
 100 #define PFN_IS_FOREIGN_MFN (1ul << 31)
 101 #endif
 102 
 103 #define MAKEPTP(pfn, l) \
 104         (pa_to_ma(pfn_to_pa(pfn)) | mmu.ptp_bits[(l) + 1])
 105 #define MAKEPTE(pfn, l) \
 106         ((pfn & PFN_IS_FOREIGN_MFN) ? \
 107         ((pfn_to_pa(pfn & ~PFN_IS_FOREIGN_MFN) | mmu.pte_bits[l]) | \
 108         PT_FOREIGN | PT_REF | PT_MOD) : \
 109         (pa_to_ma(pfn_to_pa(pfn)) | mmu.pte_bits[l]))
 110 #else
 111 #define MAKEPTP(pfn, l) \
 112         (pfn_to_pa(pfn) | mmu.ptp_bits[(l) + 1])
 113 #define MAKEPTE(pfn, l) \
 114         (pfn_to_pa(pfn) | mmu.pte_bits[l])
 115 #endif
 116 
 117 /*
 118  * The idea of "level" refers to the level where the page table is used in the
 119  * the hardware address translation steps. The level values correspond to the
 120  * following names of tables used in AMD/Intel architecture documents:
 121  *
 122  *      AMD/INTEL name          Level #
 123  *      ----------------------  -------
 124  *      Page Map Level 4           3
 125  *      Page Directory Pointer     2
 126  *      Page Directory             1
 127  *      Page Table                 0
 128  *
 129  * The numbering scheme is such that the values of 0 and 1 can correspond to
 130  * the pagesize codes used for MPSS support. For now the Maximum level at
 131  * which you can have a large page is a constant, that may change in
 132  * future processors.
 133  *
 134  * The type of "level_t" is signed so that it can be used like:
 135  *      level_t l;
 136  *      ...
 137  *      while (--l >= 0)
 138  *              ...
 139  */
 140 #define MAX_NUM_LEVEL           4
 141 #define MAX_PAGE_LEVEL          2
 142 #define MIN_PAGE_LEVEL          0
 143 typedef int8_t level_t;
 144 #define LEVEL_SHIFT(l)  (mmu.level_shift[l])
 145 #define LEVEL_SIZE(l)   (mmu.level_size[l])
 146 #define LEVEL_OFFSET(l) (mmu.level_offset[l])
 147 #define LEVEL_MASK(l)   (mmu.level_mask[l])
 148 
 149 /*
 150  * Macros to:
 151  * Check for a PFN above 4Gig and 64Gig for 32 bit PAE support
 152  */
 153 #define PFN_4G          (4ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
 154 #define PFN_64G         (64ull * (1024 * 1024 * 1024 / MMU_PAGESIZE))
 155 #define PFN_ABOVE4G(pfn) ((pfn) >= PFN_4G)
 156 #define PFN_ABOVE64G(pfn) ((pfn) >= PFN_64G)
 157 
 158 /*
 159  * The CR3 register holds the physical address of the top level page table.
 160  */
 161 #define MAKECR3(pfn)    mmu_ptob(pfn)
 162 
 163 /*
 164  * HAT/MMU parameters that depend on kernel mode and/or processor type
 165  */
 166 struct htable;
 167 struct hat_mmu_info {
 168         x86pte_t pt_nx;         /* either 0 or PT_NX */
 169         x86pte_t pt_global;     /* either 0 or PT_GLOBAL */
 170 
 171         pfn_t highest_pfn;
 172 
 173         uint_t num_level;       /* number of page table levels in use */
 174         uint_t max_level;       /* just num_level - 1 */
 175         uint_t max_page_level;  /* maximum level at which we can map a page */
 176         uint_t umax_page_level; /* max user page map level */
 177         uint_t ptes_per_table;  /* # of entries in lower level page tables */
 178         uint_t top_level_count; /* # of entries in top most level page table */
 179 
 180         uint_t  hash_cnt;       /* cnt of entries in htable_hash_cache */
 181         uint_t  vlp_hash_cnt;   /* cnt of entries in vlp htable_hash_cache */
 182 
 183         uint_t pae_hat;         /* either 0 or 1 */
 184 
 185         uintptr_t hole_start;   /* start of VA hole (or -1 if none) */
 186         uintptr_t hole_end;     /* end of VA hole (or 0 if none) */
 187 
 188         struct htable **kmap_htables; /* htables for segmap + 32 bit heap */
 189         x86pte_t *kmap_ptes;    /* mapping of pagetables that map kmap */
 190         uintptr_t kmap_addr;    /* start addr of kmap */
 191         uintptr_t kmap_eaddr;   /* end addr of kmap */
 192 
 193         uint_t pte_size;        /* either 4 or 8 */
 194         uint_t pte_size_shift;  /* either 2 or 3 */
 195         x86pte_t ptp_bits[MAX_NUM_LEVEL];       /* bits set for interior PTP */
 196         x86pte_t pte_bits[MAX_NUM_LEVEL];       /* bits set for leaf PTE */
 197 
 198         /*
 199          * A range of VA used to window pages in the i86pc/vm code.
 200          * See PWIN_XXX macros.
 201          */
 202         caddr_t pwin_base;
 203         caddr_t pwin_pte_va;
 204         paddr_t pwin_pte_pa;
 205 
 206         /*
 207          * The following tables are equivalent to PAGEXXXXX at different levels
 208          * in the page table hierarchy.
 209          */
 210         uint_t level_shift[MAX_NUM_LEVEL];      /* PAGESHIFT for given level */
 211         uintptr_t level_size[MAX_NUM_LEVEL];    /* PAGESIZE for given level */
 212         uintptr_t level_offset[MAX_NUM_LEVEL];  /* PAGEOFFSET for given level */
 213         uintptr_t level_mask[MAX_NUM_LEVEL];    /* PAGEMASK for given level */
 214 };
 215 
 216 
 217 #if defined(_KERNEL)
 218 
 219 /*
 220  * Macros to access the HAT's private page windows. They're used for
 221  * accessing pagetables, ppcopy() and page_zero().
 222  * The 1st two macros are used to get an index for the particular use.
 223  * The next three give you:
 224  * - the virtual address of the window
 225  * - the virtual address of the pte that maps the window
 226  * - the physical address of the pte that map the window
 227  */
 228 #define PWIN_TABLE(cpuid)       ((cpuid) * 2)
 229 #define PWIN_SRC(cpuid)         ((cpuid) * 2 + 1)       /* for x86pte_copy() */
 230 #define PWIN_VA(x)              (mmu.pwin_base + ((x) << MMU_PAGESHIFT))
 231 #define PWIN_PTE_VA(x)          (mmu.pwin_pte_va + ((x) << mmu.pte_size_shift))
 232 #define PWIN_PTE_PA(x)          (mmu.pwin_pte_pa + ((x) << mmu.pte_size_shift))
 233 
 234 /*
 235  * The concept of a VA hole exists in AMD64. This might need to be made
 236  * model specific eventually.
 237  *
 238  * In the 64 bit kernel PTE loads are atomic, but need atomic_cas_64 on 32
 239  * bit kernel.
 240  */
 241 #if defined(__amd64)
 242 
 243 #ifdef lint
 244 #define IN_VA_HOLE(va)  (__lintzero)
 245 #else
 246 #define IN_VA_HOLE(va)  (mmu.hole_start <= (va) && (va) < mmu.hole_end)
 247 #endif
 248 
 249 #define FMT_PTE "0x%lx"
 250 #define GET_PTE(ptr)            (*(x86pte_t *)(ptr))
 251 #define SET_PTE(ptr, pte)       (*(x86pte_t *)(ptr) = pte)
 252 #define CAS_PTE(ptr, x, y)      atomic_cas_64(ptr, x, y)
 253 
 254 #elif defined(__i386)
 255 
 256 #define IN_VA_HOLE(va)  (__lintzero)
 257 
 258 #define FMT_PTE "0x%llx"
 259 
 260 /* on 32 bit kernels, 64 bit loads aren't atomic, use get_pte64() */
 261 extern x86pte_t get_pte64(x86pte_t *ptr);
 262 #define GET_PTE(ptr)    (mmu.pae_hat ? get_pte64(ptr) : *(x86pte32_t *)(ptr))
 263 #define SET_PTE(ptr, pte)                                               \
 264         ((mmu.pae_hat ? ((x86pte32_t *)(ptr))[1] = (pte >> 32) : 0),      \
 265         *(x86pte32_t *)(ptr) = pte)
 266 #define CAS_PTE(ptr, x, y)                      \
 267         (mmu.pae_hat ? atomic_cas_64(ptr, x, y) :       \
 268         atomic_cas_32((uint32_t *)(ptr), (uint32_t)(x), (uint32_t)(y)))
 269 
 270 #endif  /* __i386 */
 271 
 272 /*
 273  * Return a pointer to the pte entry at the given index within a page table.
 274  */
 275 #define PT_INDEX_PTR(p, x) \
 276         ((x86pte_t *)((uintptr_t)(p) + ((x) << mmu.pte_size_shift)))
 277 
 278 /*
 279  * Return the physical address of the pte entry at the given index within a
 280  * page table.
 281  */
 282 #define PT_INDEX_PHYSADDR(p, x) \
 283         ((paddr_t)(p) + ((x) << mmu.pte_size_shift))
 284 
 285 /*
 286  * From pfn to bytes, careful not to lose bits on PAE.
 287  */
 288 #define pfn_to_pa(pfn) (mmu_ptob((paddr_t)(pfn)))
 289 
 290 #ifdef __xpv
 291 extern pfn_t pte2pfn(x86pte_t, level_t);
 292 #endif
 293 
 294 extern struct hat_mmu_info mmu;
 295 
 296 #endif  /* _KERNEL */
 297 
 298 
 299 #ifdef  __cplusplus
 300 }
 301 #endif
 302 
 303 #endif  /* _VM_HAT_PTE_H */