1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright 2018 Joyent, Inc.
  27  */
  28 
  29 /*
  30  * HAT interfaces used by the kernel debugger to interact with the VM system.
  31  * These interfaces are invoked when the world is stopped.  As such, no blocking
  32  * operations may be performed.
  33  */
  34 
  35 #include <sys/cpuvar.h>
  36 #include <sys/kdi_impl.h>
  37 #include <sys/errno.h>
  38 #include <sys/systm.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/mman.h>
  41 #include <sys/bootconf.h>
  42 #include <sys/cmn_err.h>
  43 #include <vm/seg_kmem.h>
  44 #include <vm/hat_i86.h>
  45 #if defined(__xpv)
  46 #include <sys/hypervisor.h>
  47 #endif
  48 #include <sys/bootinfo.h>
  49 #include <vm/kboot_mmu.h>
  50 #include <sys/machsystm.h>
  51 
  52 /*
  53  * The debugger needs direct access to the PTE of one page table entry
  54  * in order to implement vtop and physical read/writes
  55  */
  56 static uintptr_t hat_kdi_page = 0;      /* vaddr for phsical page accesses */
  57 static uint_t use_kbm = 1;
  58 uint_t hat_kdi_use_pae;                 /* if 0, use x86pte32_t for pte type */
  59 
  60 #if !defined(__xpv)
  61 static x86pte_t *hat_kdi_pte = NULL;    /* vaddr of pte for hat_kdi_page */
  62 #endif
  63 
  64 /*
  65  * Get the address for remapping physical pages during boot
  66  */
  67 void
  68 hat_boot_kdi_init(void)
  69 {
  70         hat_kdi_page = (uintptr_t)kbm_push(0);  /* first call gets address... */
  71 }
  72 
  73 /*
  74  * Switch to using a page in the kernel's va range for physical memory access.
  75  * We need to allocate a virtual page, then permanently map in the page that
  76  * contains the PTE to it.
  77  */
  78 void
  79 hat_kdi_init(void)
  80 {
  81         /*LINTED:set but not used in function*/
  82         htable_t *ht __unused;
  83 
  84         /*
  85          * Get an kernel page VA to use for phys mem access. Then make sure
  86          * the VA has a page table.
  87          */
  88         hat_kdi_use_pae = mmu.pae_hat;
  89         hat_kdi_page = (uintptr_t)vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
  90         ht = htable_create(kas.a_hat, hat_kdi_page, 0, NULL);
  91         use_kbm = 0;
  92 
  93 #ifndef __xpv
  94         /*
  95          * Get an address at which to put the pagetable and devload it.
  96          */
  97         hat_kdi_pte = vmem_xalloc(heap_arena, MMU_PAGESIZE, MMU_PAGESIZE, 0,
  98             0, NULL, NULL, VM_SLEEP);
  99         hat_devload(kas.a_hat, (caddr_t)hat_kdi_pte, MMU_PAGESIZE, ht->ht_pfn,
 100             PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK,
 101             HAT_LOAD | HAT_LOAD_NOCONSIST);
 102         hat_kdi_pte =
 103             PT_INDEX_PTR(hat_kdi_pte, htable_va2entry(hat_kdi_page, ht));
 104 
 105         HTABLE_INC(ht->ht_valid_cnt);
 106         htable_release(ht);
 107 #endif
 108 }
 109 
 110 #ifdef __xpv
 111 
 112 /*
 113  * translate machine address to physical address
 114  */
 115 static uint64_t
 116 kdi_ptom(uint64_t pa)
 117 {
 118         extern pfn_t *mfn_list;
 119         ulong_t mfn = mfn_list[mmu_btop(pa)];
 120 
 121         return (pfn_to_pa(mfn) | (pa & MMU_PAGEOFFSET));
 122 }
 123 
 124 /*
 125  * This is like mfn_to_pfn(), but we can't use ontrap() from kmdb.
 126  * Instead we let the fault happen and kmdb deals with it.
 127  */
 128 static uint64_t
 129 kdi_mtop(uint64_t ma)
 130 {
 131         pfn_t pfn;
 132         mfn_t mfn = ma >> MMU_PAGESHIFT;
 133 
 134         if (HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL) < mfn)
 135                 return (ma | PFN_IS_FOREIGN_MFN);
 136 
 137         pfn = mfn_to_pfn_mapping[mfn];
 138         if (pfn >= mfn_count || pfn_to_mfn(pfn) != mfn)
 139                 return (ma | PFN_IS_FOREIGN_MFN);
 140         return (pfn_to_pa(pfn) | (ma & MMU_PAGEOFFSET));
 141 }
 142 
 143 #else
 144 #define kdi_mtop(m)     (m)
 145 #define kdi_ptom(p)     (p)
 146 #endif
 147 
 148 /*ARGSUSED*/
 149 int
 150 kdi_vtop(uintptr_t va, uint64_t *pap)
 151 {
 152         uintptr_t vaddr = va;
 153         size_t  len;
 154         pfn_t   pfn;
 155         uint_t  prot;
 156         int     level;
 157         x86pte_t pte;
 158         int     index;
 159 
 160         /*
 161          * if the mmu struct isn't relevant yet, we need to probe
 162          * the boot loader's pagetables.
 163          */
 164         if (!khat_running) {
 165                 if (kbm_probe(&vaddr, &len, &pfn, &prot) == 0)
 166                         return (ENOENT);
 167                 if (vaddr > va)
 168                         return (ENOENT);
 169                 if (vaddr < va)
 170                         pfn += mmu_btop(va - vaddr);
 171                 *pap = pfn_to_pa(pfn) + (vaddr & MMU_PAGEOFFSET);
 172                 return (0);
 173         }
 174 
 175         /*
 176          * We can't go through normal hat routines, so we'll use
 177          * kdi_pread() to walk the page tables
 178          */
 179 #if defined(__xpv)
 180         *pap = pfn_to_pa(CPU->cpu_current_hat->hat_htable->ht_pfn);
 181 #else
 182         *pap = getcr3_pa();
 183 #endif
 184         for (level = mmu.max_level; ; --level) {
 185                 index = (va >> LEVEL_SHIFT(level)) & (mmu.ptes_per_table - 1);
 186                 *pap += index << mmu.pte_size_shift;
 187                 pte = 0;
 188                 if (kdi_pread((caddr_t)&pte, mmu.pte_size, *pap, &len) != 0)
 189                         return (ENOENT);
 190                 if (pte == 0)
 191                         return (ENOENT);
 192                 if (level > 0 && level <= mmu.max_page_level &&
 193                     (pte & PT_PAGESIZE)) {
 194                         *pap = kdi_mtop(pte & PT_PADDR_LGPG);
 195                         break;
 196                 } else {
 197                         *pap = kdi_mtop(pte & PT_PADDR);
 198                         if (level == 0)
 199                                 break;
 200                 }
 201         }
 202         *pap += va & LEVEL_OFFSET(level);
 203         return (0);
 204 }
 205 
 206 static int
 207 kdi_prw(caddr_t buf, size_t nbytes, uint64_t pa, size_t *ncopiedp, int doread)
 208 {
 209         size_t  ncopied = 0;
 210         off_t   pgoff;
 211         size_t  sz;
 212         caddr_t va;
 213         caddr_t from;
 214         caddr_t to;
 215         x86pte_t pte;
 216 
 217         /*
 218          * if this is called before any initialization - fail
 219          */
 220         if (hat_kdi_page == 0)
 221                 return (EAGAIN);
 222 
 223         while (nbytes > 0) {
 224                 /*
 225                  * figure out the addresses and construct a minimal PTE
 226                  */
 227                 pgoff = pa & MMU_PAGEOFFSET;
 228                 sz = MIN(nbytes, MMU_PAGESIZE - pgoff);
 229                 va = (caddr_t)hat_kdi_page + pgoff;
 230                 pte = kdi_ptom(mmu_ptob(mmu_btop(pa))) | PT_VALID;
 231                 if (doread) {
 232                         from = va;
 233                         to = buf;
 234                 } else {
 235                         PTE_SET(pte, PT_WRITABLE);
 236                         from = buf;
 237                         to = va;
 238                 }
 239 
 240                 /*
 241                  * map the physical page
 242                  */
 243                 if (use_kbm)
 244                         (void) kbm_push(pa);
 245 #if defined(__xpv)
 246                 else
 247                         (void) HYPERVISOR_update_va_mapping(
 248                             (uintptr_t)va, pte, UVMF_INVLPG);
 249 #else
 250                 else if (hat_kdi_use_pae)
 251                         *hat_kdi_pte = pte;
 252                 else
 253                         *(x86pte32_t *)hat_kdi_pte = pte;
 254                 mmu_flush_tlb_kpage(hat_kdi_page);
 255 #endif
 256 
 257                 bcopy(from, to, sz);
 258 
 259                 /*
 260                  * erase the mapping
 261                  */
 262                 if (use_kbm)
 263                         kbm_pop();
 264 #if defined(__xpv)
 265                 else
 266                         (void) HYPERVISOR_update_va_mapping(
 267                             (uintptr_t)va, 0, UVMF_INVLPG);
 268 #else
 269                 else if (hat_kdi_use_pae)
 270                         *hat_kdi_pte = 0;
 271                 else
 272                         *(x86pte32_t *)hat_kdi_pte = 0;
 273                 mmu_flush_tlb_kpage(hat_kdi_page);
 274 #endif
 275 
 276                 buf += sz;
 277                 pa += sz;
 278                 nbytes -= sz;
 279                 ncopied += sz;
 280         }
 281 
 282         if (ncopied == 0)
 283                 return (ENOENT);
 284 
 285         *ncopiedp = ncopied;
 286         return (0);
 287 }
 288 
 289 int
 290 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 291 {
 292         return (kdi_prw(buf, nbytes, addr, ncopiedp, 1));
 293 }
 294 
 295 int
 296 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 297 {
 298         return (kdi_prw(buf, nbytes, addr, ncopiedp, 0));
 299 }
 300 
 301 #if !defined(__xpv)
 302 /*
 303  * This gets used for flushing the TLB on all the slaves just prior to doing a
 304  * kdi_prw().  It's unclear why this was originally done, since kdi_prw() itself
 305  * will flush any lingering hat_kdi_page mappings, but let's presume it was a
 306  * good idea.
 307  */
 308 void
 309 kdi_flush_caches(void)
 310 {
 311         mmu_flush_tlb(FLUSH_TLB_ALL, NULL);
 312 }
 313 #endif
 314 
 315 /*
 316  * Return the number of bytes, relative to the beginning of a given range, that
 317  * are non-toxic (can be read from and written to with relative impunity).
 318  */
 319 /*ARGSUSED*/
 320 size_t
 321 kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
 322 {
 323 #if defined(__amd64)
 324         extern uintptr_t toxic_addr;
 325         extern size_t   toxic_size;
 326 
 327         /*
 328          * Check 64 bit toxic range.
 329          */
 330         if (toxic_addr != 0 &&
 331             va + sz >= toxic_addr &&
 332             va < toxic_addr + toxic_size)
 333                 return (va < toxic_addr ? toxic_addr - va : 0);
 334 
 335         /*
 336          * avoid any Virtual Address hole
 337          */
 338         if (va + sz >= hole_start && va < hole_end)
 339                 return (va < hole_start ? hole_start - va : 0);
 340 
 341         return (sz);
 342 
 343 #elif defined(__i386)
 344         extern void *device_arena_contains(void *, size_t, size_t *);
 345         uintptr_t v;
 346 
 347         v = (uintptr_t)device_arena_contains((void *)va, sz, NULL);
 348         if (v == 0)
 349                 return (sz);
 350         else if (v <= va)
 351                 return (0);
 352         else
 353                 return (v - va);
 354 
 355 #endif  /* __i386 */
 356 }