Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2014 by Delphix. All rights reserved.

  27  */
  28 
  29 #ifndef _VM_HTABLE_H
  30 #define _VM_HTABLE_H
  31 
  32 #ifdef  __cplusplus
  33 extern "C" {
  34 #endif
  35 
  36 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
  37 #include <asm/htable.h>
  38 #endif
  39 
  40 extern void atomic_andb(uint8_t *addr, uint8_t value);
  41 extern void atomic_orb(uint8_t *addr, uint8_t value);
  42 extern void atomic_inc16(uint16_t *addr);
  43 extern void atomic_dec16(uint16_t *addr);
  44 extern void mmu_tlbflush_entry(caddr_t addr);
  45 
  46 /*
  47  * Each hardware page table has an htable_t describing it.
  48  *
  49  * We use a reference counter mechanism to detect when we can free an htable.
  50  * In the implmentation the reference count is split into 2 separate counters:
  51  *
  52  *      ht_busy is a traditional reference count of uses of the htable pointer
  53  *
  54  *      ht_valid_cnt is a count of how references are implied by valid PTE/PTP
  55  *               entries in the pagetable
  56  *
  57  * ht_busy is only incremented by htable_lookup() or htable_create()
  58  * while holding the appropriate hash_table mutex. While installing a new
  59  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
  60  * done an htable_lookup() or htable_create() but not the htable_release yet.
  61  *
  62  * htable_release(), while holding the mutex, can know that if
  63  * busy == 1 and valid_cnt == 0, the htable can be free'd.
  64  *


  68 struct htable {
  69         struct htable   *ht_next;       /* forward link for hash table */
  70         struct hat      *ht_hat;        /* hat this mapping comes from */
  71         uintptr_t       ht_vaddr;       /* virt addr at start of this table */
  72         int8_t          ht_level;       /* page table level: 0=4K, 1=2M, ... */
  73         uint8_t         ht_flags;       /* see below */
  74         int16_t         ht_busy;        /* implements locking protocol */
  75         int16_t         ht_valid_cnt;   /* # of valid entries in this table */
  76         uint32_t        ht_lock_cnt;    /* # of locked entries in this table */
  77                                         /* never used for kernel hat */
  78         pfn_t           ht_pfn;         /* pfn of page of the pagetable */
  79         struct htable   *ht_prev;       /* backward link for hash table */
  80         struct htable   *ht_parent;     /* htable that points to this htable */
  81         struct htable   *ht_shares;     /* for HTABLE_SHARED_PFN only */
  82 };
  83 typedef struct htable htable_t;
  84 
  85 /*
  86  * Flags values for htable ht_flags field:
  87  *
  88  * HTABLE_VLP - this is the top level htable of a VLP HAT.

  89  *
  90  * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
  91  *      htable. Used by hat_share() for ISM.
  92  */
  93 #define HTABLE_VLP              (0x01)
  94 #define HTABLE_SHARED_PFN       (0x02)
  95 
  96 /*
  97  * The htable hash table hashing function.  The 28 is so that high
  98  * order bits are include in the hash index to skew the wrap
  99  * around of addresses. Even though the hash buckets are stored per
 100  * hat we include the value of hat pointer in the hash function so
 101  * that the secondary hash for the htable mutex winds up begin different in
 102  * every address space.
 103  */
 104 #define HTABLE_HASH(hat, va, lvl)                                       \
 105         ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +         \
 106         ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
 107 
 108 /*
 109  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.

 110  */
 111 struct hat_cpu_info {
 112         kmutex_t hci_mutex;             /* mutex to ensure sequential usage */
 113 #if defined(__amd64)
 114         pfn_t   hci_vlp_pfn;            /* pfn of hci_vlp_l3ptes */
 115         x86pte_t *hci_vlp_l3ptes;       /* VLP Level==3 pagetable (top) */
 116         x86pte_t *hci_vlp_l2ptes;       /* VLP Level==2 pagetable */




 117 #endif  /* __amd64 */
 118 };
 119 
 120 
 121 /*
 122  * Compute the last page aligned VA mapped by an htable.
 123  *
 124  * Given a va and a level, compute the virtual address of the start of the
 125  * next page at that level.
 126  *
 127  * XX64 - The check for the VA hole needs to be better generalized.
 128  */
 129 #if defined(__amd64)
 130 #define HTABLE_NUM_PTES(ht)     (((ht)->ht_flags & HTABLE_VLP) ? 4 : 512)

 131 
 132 #define HTABLE_LAST_PAGE(ht)                                            \
 133         ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
 134         ((ht)->ht_vaddr - MMU_PAGESIZE +                             \
 135         ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
 136 
 137 #define NEXT_ENTRY_VA(va, l)    \
 138         ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?   \
 139         mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
 140 
 141 #elif defined(__i386)
 142 
 143 #define HTABLE_NUM_PTES(ht)     \
 144         (!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512))
 145 
 146 #define HTABLE_LAST_PAGE(ht)    ((ht)->ht_vaddr - MMU_PAGESIZE + \
 147         ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
 148 
 149 #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
 150 




   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2014 by Delphix. All rights reserved.
  27  * Copyright 2018 Joyent, Inc.
  28  */
  29 
  30 #ifndef _VM_HTABLE_H
  31 #define _VM_HTABLE_H
  32 
  33 #ifdef  __cplusplus
  34 extern "C" {
  35 #endif
  36 
  37 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
  38 #include <asm/htable.h>
  39 #endif
  40 
  41 extern void atomic_andb(uint8_t *addr, uint8_t value);
  42 extern void atomic_orb(uint8_t *addr, uint8_t value);
  43 extern void atomic_inc16(uint16_t *addr);
  44 extern void atomic_dec16(uint16_t *addr);

  45 
  46 /*
  47  * Each hardware page table has an htable_t describing it.
  48  *
  49  * We use a reference counter mechanism to detect when we can free an htable.
  50  * In the implmentation the reference count is split into 2 separate counters:
  51  *
  52  *      ht_busy is a traditional reference count of uses of the htable pointer
  53  *
  54  *      ht_valid_cnt is a count of how references are implied by valid PTE/PTP
  55  *               entries in the pagetable
  56  *
  57  * ht_busy is only incremented by htable_lookup() or htable_create()
  58  * while holding the appropriate hash_table mutex. While installing a new
  59  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
  60  * done an htable_lookup() or htable_create() but not the htable_release yet.
  61  *
  62  * htable_release(), while holding the mutex, can know that if
  63  * busy == 1 and valid_cnt == 0, the htable can be free'd.
  64  *


  68 struct htable {
  69         struct htable   *ht_next;       /* forward link for hash table */
  70         struct hat      *ht_hat;        /* hat this mapping comes from */
  71         uintptr_t       ht_vaddr;       /* virt addr at start of this table */
  72         int8_t          ht_level;       /* page table level: 0=4K, 1=2M, ... */
  73         uint8_t         ht_flags;       /* see below */
  74         int16_t         ht_busy;        /* implements locking protocol */
  75         int16_t         ht_valid_cnt;   /* # of valid entries in this table */
  76         uint32_t        ht_lock_cnt;    /* # of locked entries in this table */
  77                                         /* never used for kernel hat */
  78         pfn_t           ht_pfn;         /* pfn of page of the pagetable */
  79         struct htable   *ht_prev;       /* backward link for hash table */
  80         struct htable   *ht_parent;     /* htable that points to this htable */
  81         struct htable   *ht_shares;     /* for HTABLE_SHARED_PFN only */
  82 };
  83 typedef struct htable htable_t;
  84 
  85 /*
  86  * Flags values for htable ht_flags field:
  87  *
  88  * HTABLE_COPIED - This is the top level htable of a HAT being used with per-CPU
  89  *      pagetables.
  90  *
  91  * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
  92  *      htable. Used by hat_share() for ISM.
  93  */
  94 #define HTABLE_COPIED           (0x01)
  95 #define HTABLE_SHARED_PFN       (0x02)
  96 
  97 /*
  98  * The htable hash table hashing function.  The 28 is so that high
  99  * order bits are include in the hash index to skew the wrap
 100  * around of addresses. Even though the hash buckets are stored per
 101  * hat we include the value of hat pointer in the hash function so
 102  * that the secondary hash for the htable mutex winds up begin different in
 103  * every address space.
 104  */
 105 #define HTABLE_HASH(hat, va, lvl)                                       \
 106         ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +         \
 107         ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
 108 
 109 /*
 110  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info. For more
 111  * information on its use and members, see uts/i86pc/vm/hat_i86.c.
 112  */
 113 struct hat_cpu_info {
 114         kmutex_t hci_mutex;             /* mutex to ensure sequential usage */
 115 #if defined(__amd64)
 116         pfn_t   hci_pcp_l3pfn;          /* pfn of hci_pcp_l3ptes */
 117         pfn_t   hci_pcp_l2pfn;          /* pfn of hci_pcp_l2ptes */
 118         x86pte_t *hci_pcp_l3ptes;       /* PCP Level==3 pagetable (top) */
 119         x86pte_t *hci_pcp_l2ptes;       /* PCP Level==2 pagetable */
 120         struct hat *hci_user_hat;       /* CPU specific HAT */
 121         pfn_t   hci_user_l3pfn;         /* pfn of hci_user_l3ptes */
 122         x86pte_t *hci_user_l3ptes;      /* PCP User L3 pagetable */
 123 #endif  /* __amd64 */
 124 };
 125 
 126 
 127 /*
 128  * Compute the last page aligned VA mapped by an htable.
 129  *
 130  * Given a va and a level, compute the virtual address of the start of the
 131  * next page at that level.
 132  *
 133  * XX64 - The check for the VA hole needs to be better generalized.
 134  */
 135 #if defined(__amd64)
 136 #define HTABLE_NUM_PTES(ht)     (((ht)->ht_flags & HTABLE_COPIED) ? \
 137         (((ht)->ht_level == mmu.max_level) ? 512 : 4) : 512)
 138 
 139 #define HTABLE_LAST_PAGE(ht)                                            \
 140         ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
 141         ((ht)->ht_vaddr - MMU_PAGESIZE +                             \
 142         ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
 143 
 144 #define NEXT_ENTRY_VA(va, l)    \
 145         ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?   \
 146         mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
 147 
 148 #elif defined(__i386)
 149 
 150 #define HTABLE_NUM_PTES(ht)     \
 151         (!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512))
 152 
 153 #define HTABLE_LAST_PAGE(ht)    ((ht)->ht_vaddr - MMU_PAGESIZE + \
 154         ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
 155 
 156 #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
 157