Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.


  25  */
  26 
  27 /*
  28  * HAT interfaces used by the kernel debugger to interact with the VM system.
  29  * These interfaces are invoked when the world is stopped.  As such, no blocking
  30  * operations may be performed.
  31  */
  32 
  33 #include <sys/cpuvar.h>
  34 #include <sys/kdi_impl.h>
  35 #include <sys/errno.h>
  36 #include <sys/systm.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/mman.h>
  39 #include <sys/bootconf.h>
  40 #include <sys/cmn_err.h>
  41 #include <vm/seg_kmem.h>
  42 #include <vm/hat_i86.h>
  43 #if defined(__xpv)
  44 #include <sys/hypervisor.h>


 160          * the boot loader's pagetables.
 161          */
 162         if (!khat_running) {
 163                 if (kbm_probe(&vaddr, &len, &pfn, &prot) == 0)
 164                         return (ENOENT);
 165                 if (vaddr > va)
 166                         return (ENOENT);
 167                 if (vaddr < va)
 168                         pfn += mmu_btop(va - vaddr);
 169                 *pap = pfn_to_pa(pfn) + (vaddr & MMU_PAGEOFFSET);
 170                 return (0);
 171         }
 172 
 173         /*
 174          * We can't go through normal hat routines, so we'll use
 175          * kdi_pread() to walk the page tables
 176          */
 177 #if defined(__xpv)
 178         *pap = pfn_to_pa(CPU->cpu_current_hat->hat_htable->ht_pfn);
 179 #else
 180         *pap = getcr3() & MMU_PAGEMASK;
 181 #endif
 182         for (level = mmu.max_level; ; --level) {
 183                 index = (va >> LEVEL_SHIFT(level)) & (mmu.ptes_per_table - 1);
 184                 *pap += index << mmu.pte_size_shift;
 185                 pte = 0;
 186                 if (kdi_pread((caddr_t)&pte, mmu.pte_size, *pap, &len) != 0)
 187                         return (ENOENT);
 188                 if (pte == 0)
 189                         return (ENOENT);
 190                 if (level > 0 && level <= mmu.max_page_level &&
 191                     (pte & PT_PAGESIZE)) {
 192                         *pap = kdi_mtop(pte & PT_PADDR_LGPG);
 193                         break;
 194                 } else {
 195                         *pap = kdi_mtop(pte & PT_PADDR);
 196                         if (level == 0)
 197                                 break;
 198                 }
 199         }
 200         *pap += va & LEVEL_OFFSET(level);


 232                 } else {
 233                         PTE_SET(pte, PT_WRITABLE);
 234                         from = buf;
 235                         to = va;
 236                 }
 237 
 238                 /*
 239                  * map the physical page
 240                  */
 241                 if (use_kbm)
 242                         (void) kbm_push(pa);
 243 #if defined(__xpv)
 244                 else
 245                         (void) HYPERVISOR_update_va_mapping(
 246                             (uintptr_t)va, pte, UVMF_INVLPG);
 247 #else
 248                 else if (hat_kdi_use_pae)
 249                         *hat_kdi_pte = pte;
 250                 else
 251                         *(x86pte32_t *)hat_kdi_pte = pte;
 252                 mmu_tlbflush_entry((caddr_t)hat_kdi_page);
 253 #endif
 254 
 255                 bcopy(from, to, sz);
 256 
 257                 /*
 258                  * erase the mapping
 259                  */
 260                 if (use_kbm)
 261                         kbm_pop();
 262 #if defined(__xpv)
 263                 else
 264                         (void) HYPERVISOR_update_va_mapping(
 265                             (uintptr_t)va, 0, UVMF_INVLPG);
 266 #else
 267                 else if (hat_kdi_use_pae)
 268                         *hat_kdi_pte = 0;
 269                 else
 270                         *(x86pte32_t *)hat_kdi_pte = 0;
 271                 mmu_tlbflush_entry((caddr_t)hat_kdi_page);
 272 #endif
 273 
 274                 buf += sz;
 275                 pa += sz;
 276                 nbytes -= sz;
 277                 ncopied += sz;
 278         }
 279 
 280         if (ncopied == 0)
 281                 return (ENOENT);
 282 
 283         *ncopiedp = ncopied;
 284         return (0);
 285 }
 286 
 287 int
 288 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 289 {
 290         return (kdi_prw(buf, nbytes, addr, ncopiedp, 1));
 291 }
 292 
 293 int
 294 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 295 {
 296         return (kdi_prw(buf, nbytes, addr, ncopiedp, 0));
 297 }
 298 













 299 
 300 /*
 301  * Return the number of bytes, relative to the beginning of a given range, that
 302  * are non-toxic (can be read from and written to with relative impunity).
 303  */
 304 /*ARGSUSED*/
 305 size_t
 306 kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
 307 {
 308 #if defined(__amd64)
 309         extern uintptr_t toxic_addr;
 310         extern size_t   toxic_size;
 311 
 312         /*
 313          * Check 64 bit toxic range.
 314          */
 315         if (toxic_addr != 0 &&
 316             va + sz >= toxic_addr &&
 317             va < toxic_addr + toxic_size)
 318                 return (va < toxic_addr ? toxic_addr - va : 0);




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright 2018 Joyent, Inc.
  27  */
  28 
  29 /*
  30  * HAT interfaces used by the kernel debugger to interact with the VM system.
  31  * These interfaces are invoked when the world is stopped.  As such, no blocking
  32  * operations may be performed.
  33  */
  34 
  35 #include <sys/cpuvar.h>
  36 #include <sys/kdi_impl.h>
  37 #include <sys/errno.h>
  38 #include <sys/systm.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/mman.h>
  41 #include <sys/bootconf.h>
  42 #include <sys/cmn_err.h>
  43 #include <vm/seg_kmem.h>
  44 #include <vm/hat_i86.h>
  45 #if defined(__xpv)
  46 #include <sys/hypervisor.h>


 162          * the boot loader's pagetables.
 163          */
 164         if (!khat_running) {
 165                 if (kbm_probe(&vaddr, &len, &pfn, &prot) == 0)
 166                         return (ENOENT);
 167                 if (vaddr > va)
 168                         return (ENOENT);
 169                 if (vaddr < va)
 170                         pfn += mmu_btop(va - vaddr);
 171                 *pap = pfn_to_pa(pfn) + (vaddr & MMU_PAGEOFFSET);
 172                 return (0);
 173         }
 174 
 175         /*
 176          * We can't go through normal hat routines, so we'll use
 177          * kdi_pread() to walk the page tables
 178          */
 179 #if defined(__xpv)
 180         *pap = pfn_to_pa(CPU->cpu_current_hat->hat_htable->ht_pfn);
 181 #else
 182         *pap = getcr3_pa();
 183 #endif
 184         for (level = mmu.max_level; ; --level) {
 185                 index = (va >> LEVEL_SHIFT(level)) & (mmu.ptes_per_table - 1);
 186                 *pap += index << mmu.pte_size_shift;
 187                 pte = 0;
 188                 if (kdi_pread((caddr_t)&pte, mmu.pte_size, *pap, &len) != 0)
 189                         return (ENOENT);
 190                 if (pte == 0)
 191                         return (ENOENT);
 192                 if (level > 0 && level <= mmu.max_page_level &&
 193                     (pte & PT_PAGESIZE)) {
 194                         *pap = kdi_mtop(pte & PT_PADDR_LGPG);
 195                         break;
 196                 } else {
 197                         *pap = kdi_mtop(pte & PT_PADDR);
 198                         if (level == 0)
 199                                 break;
 200                 }
 201         }
 202         *pap += va & LEVEL_OFFSET(level);


 234                 } else {
 235                         PTE_SET(pte, PT_WRITABLE);
 236                         from = buf;
 237                         to = va;
 238                 }
 239 
 240                 /*
 241                  * map the physical page
 242                  */
 243                 if (use_kbm)
 244                         (void) kbm_push(pa);
 245 #if defined(__xpv)
 246                 else
 247                         (void) HYPERVISOR_update_va_mapping(
 248                             (uintptr_t)va, pte, UVMF_INVLPG);
 249 #else
 250                 else if (hat_kdi_use_pae)
 251                         *hat_kdi_pte = pte;
 252                 else
 253                         *(x86pte32_t *)hat_kdi_pte = pte;
 254                 mmu_flush_tlb_kpage(hat_kdi_page);
 255 #endif
 256 
 257                 bcopy(from, to, sz);
 258 
 259                 /*
 260                  * erase the mapping
 261                  */
 262                 if (use_kbm)
 263                         kbm_pop();
 264 #if defined(__xpv)
 265                 else
 266                         (void) HYPERVISOR_update_va_mapping(
 267                             (uintptr_t)va, 0, UVMF_INVLPG);
 268 #else
 269                 else if (hat_kdi_use_pae)
 270                         *hat_kdi_pte = 0;
 271                 else
 272                         *(x86pte32_t *)hat_kdi_pte = 0;
 273                 mmu_flush_tlb_kpage(hat_kdi_page);
 274 #endif
 275 
 276                 buf += sz;
 277                 pa += sz;
 278                 nbytes -= sz;
 279                 ncopied += sz;
 280         }
 281 
 282         if (ncopied == 0)
 283                 return (ENOENT);
 284 
 285         *ncopiedp = ncopied;
 286         return (0);
 287 }
 288 
 289 int
 290 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 291 {
 292         return (kdi_prw(buf, nbytes, addr, ncopiedp, 1));
 293 }
 294 
 295 int
 296 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 297 {
 298         return (kdi_prw(buf, nbytes, addr, ncopiedp, 0));
 299 }
 300 
 301 #if !defined(__xpv)
 302 /*
 303  * This gets used for flushing the TLB on all the slaves just prior to doing a
 304  * kdi_prw().  It's unclear why this was originally done, since kdi_prw() itself
 305  * will flush any lingering hat_kdi_page mappings, but let's presume it was a
 306  * good idea.
 307  */
 308 void
 309 kdi_flush_caches(void)
 310 {
 311         mmu_flush_tlb(FLUSH_TLB_ALL, NULL);
 312 }
 313 #endif
 314 
 315 /*
 316  * Return the number of bytes, relative to the beginning of a given range, that
 317  * are non-toxic (can be read from and written to with relative impunity).
 318  */
 319 /*ARGSUSED*/
 320 size_t
 321 kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
 322 {
 323 #if defined(__amd64)
 324         extern uintptr_t toxic_addr;
 325         extern size_t   toxic_size;
 326 
 327         /*
 328          * Check 64 bit toxic range.
 329          */
 330         if (toxic_addr != 0 &&
 331             va + sz >= toxic_addr &&
 332             va < toxic_addr + toxic_size)
 333                 return (va < toxic_addr ? toxic_addr - va : 0);