Print this page
8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/mdb/i86pc/modules/unix/i86mmu.c
          +++ new/usr/src/cmd/mdb/i86pc/modules/unix/i86mmu.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
       24 + *
       25 + * Copyright 2018 Joyent, Inc.
  24   26   */
  25   27  
  26   28  /*
  27   29   * This part of the file contains the mdb support for dcmds:
  28   30   *      ::memseg_list
  29   31   * and walkers for:
  30   32   *      memseg - a memseg list walker for ::memseg_list
  31   33   *
  32   34   */
  33   35  
↓ open down ↓ 5 lines elided ↑ open up ↑
  39   41  #include <sys/hypervisor.h>
  40   42  #endif
  41   43  #include <vm/as.h>
  42   44  
  43   45  #include <mdb/mdb_modapi.h>
  44   46  #include <mdb/mdb_target.h>
  45   47  
  46   48  #include <vm/page.h>
  47   49  #include <vm/hat_i86.h>
  48   50  
       51 +#define VA_SIGN_BIT (1UL << 47)
       52 +#define VA_SIGN_EXTEND(va) (((va) ^ VA_SIGN_BIT) - VA_SIGN_BIT)
       53 +
  49   54  struct pfn2pp {
  50   55          pfn_t pfn;
  51   56          page_t *pp;
  52   57  };
  53   58  
  54   59  static int do_va2pa(uintptr_t, struct as *, int, physaddr_t *, pfn_t *);
  55   60  static void init_mmu(void);
  56   61  
  57   62  int
  58   63  platform_vtop(uintptr_t addr, struct as *asp, physaddr_t *pap)
↓ open down ↓ 332 lines elided ↑ open up ↑
 391  396  pte2mfn(x86pte_t pte, uint_t level)
 392  397  {
 393  398          pfn_t mfn;
 394  399          if (level > 0 && (pte & PT_PAGESIZE))
 395  400                  mfn = mmu_btop(pte & PT_PADDR_LGPG);
 396  401          else
 397  402                  mfn = mmu_btop(pte & PT_PADDR);
 398  403          return (mfn);
 399  404  }
 400  405  
 401      -/*
 402      - * Print a PTE in more human friendly way. The PTE is assumed to be in
 403      - * a level 0 page table, unless -l specifies another level.
 404      - *
 405      - * The PTE value can be specified as the -p option, since on a 32 bit kernel
 406      - * with PAE running it's larger than a uintptr_t.
 407      - */
 408  406  static int
 409  407  do_pte_dcmd(int level, uint64_t pte)
 410  408  {
 411  409          static char *attr[] = {
 412  410              "wrback", "wrthru", "uncached", "uncached",
 413  411              "wrback", "wrthru", "wrcombine", "uncached"};
 414  412          int pat_index = 0;
 415  413          pfn_t mfn;
 416  414  
 417      -        mdb_printf("pte=%llr: ", pte);
 418      -        if (PTE_GET(pte, mmu.pt_nx))
 419      -                mdb_printf("noexec ");
      415 +        mdb_printf("pte=0x%llr: ", pte);
 420  416  
 421  417          mfn = pte2mfn(pte, level);
 422  418          mdb_printf("%s=0x%lr ", is_xpv ? "mfn" : "pfn", mfn);
 423  419  
      420 +        if (PTE_GET(pte, mmu.pt_nx))
      421 +                mdb_printf("noexec ");
      422 +
 424  423          if (PTE_GET(pte, PT_NOCONSIST))
 425  424                  mdb_printf("noconsist ");
 426  425  
 427  426          if (PTE_GET(pte, PT_NOSYNC))
 428  427                  mdb_printf("nosync ");
 429  428  
 430  429          if (PTE_GET(pte, mmu.pt_global))
 431  430                  mdb_printf("global ");
 432  431  
 433  432          if (level > 0 && PTE_GET(pte, PT_PAGESIZE))
↓ open down ↓ 35 lines elided ↑ open up ↑
 469  468          if (PTE_GET(pte, PT_VALID) == 0)
 470  469                  mdb_printf(" !VALID ");
 471  470  
 472  471          mdb_printf("\n");
 473  472          return (DCMD_OK);
 474  473  }
 475  474  
 476  475  /*
 477  476   * Print a PTE in more human friendly way. The PTE is assumed to be in
 478  477   * a level 0 page table, unless -l specifies another level.
 479      - *
 480      - * The PTE value can be specified as the -p option, since on a 32 bit kernel
 481      - * with PAE running it's larger than a uintptr_t.
 482  478   */
 483  479  /*ARGSUSED*/
 484  480  int
 485  481  pte_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 486  482  {
 487      -        int level = 0;
 488      -        uint64_t pte = 0;
 489      -        char *level_str = NULL;
 490      -        char *pte_str = NULL;
      483 +        uint64_t level = 0;
 491  484  
 492  485          init_mmu();
 493  486  
 494  487          if (mmu.num_level == 0)
 495  488                  return (DCMD_ERR);
 496  489  
      490 +        if ((flags & DCMD_ADDRSPEC) == 0)
      491 +                return (DCMD_USAGE);
      492 +
 497  493          if (mdb_getopts(argc, argv,
 498      -            'p', MDB_OPT_STR, &pte_str,
 499      -            'l', MDB_OPT_STR, &level_str) != argc)
      494 +            'l', MDB_OPT_UINT64, &level) != argc)
 500  495                  return (DCMD_USAGE);
 501  496  
 502      -        /*
 503      -         * parse the PTE to decode, if it's 0, we don't do anything
 504      -         */
 505      -        if (pte_str != NULL) {
 506      -                pte = mdb_strtoull(pte_str);
 507      -        } else {
 508      -                if ((flags & DCMD_ADDRSPEC) == 0)
 509      -                        return (DCMD_USAGE);
 510      -                pte = addr;
      497 +        if (level > mmu.max_level) {
      498 +                mdb_warn("invalid level %lu\n", level);
      499 +                return (DCMD_ERR);
 511  500          }
 512      -        if (pte == 0)
      501 +
      502 +        if (addr == 0)
 513  503                  return (DCMD_OK);
 514  504  
 515      -        /*
 516      -         * parse the level if supplied
 517      -         */
 518      -        if (level_str != NULL) {
 519      -                level = mdb_strtoull(level_str);
 520      -                if (level < 0 || level > mmu.max_level)
 521      -                        return (DCMD_ERR);
 522      -        }
 523      -
 524      -        return (do_pte_dcmd(level, pte));
      505 +        return (do_pte_dcmd((int)level, addr));
 525  506  }
 526  507  
 527  508  static size_t
 528  509  va2entry(htable_t *htable, uintptr_t addr)
 529  510  {
 530  511          size_t entry = (addr - htable->ht_vaddr);
 531  512  
 532  513          entry >>= mmu.level_shift[htable->ht_level];
 533  514          return (entry & HTABLE_NUM_PTES(htable) - 1);
 534  515  }
 535  516  
 536  517  static x86pte_t
 537  518  get_pte(hat_t *hat, htable_t *htable, uintptr_t addr)
 538  519  {
 539  520          x86pte_t buf;
 540      -        x86pte32_t *pte32 = (x86pte32_t *)&buf;
 541      -        size_t len;
 542  521  
 543      -        if (htable->ht_flags & HTABLE_VLP) {
 544      -                uintptr_t ptr = (uintptr_t)hat->hat_vlp_ptes;
      522 +        if (htable->ht_flags & HTABLE_COPIED) {
      523 +                uintptr_t ptr = (uintptr_t)hat->hat_copied_ptes;
 545  524                  ptr += va2entry(htable, addr) << mmu.pte_size_shift;
 546      -                len = mdb_vread(&buf, mmu.pte_size, ptr);
 547      -        } else {
 548      -                paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
 549      -                paddr += va2entry(htable, addr) << mmu.pte_size_shift;
 550      -                len = mdb_pread(&buf, mmu.pte_size, paddr);
      525 +                return (*(x86pte_t *)ptr);
 551  526          }
 552  527  
 553      -        if (len != mmu.pte_size)
 554      -                return (0);
      528 +        paddr_t paddr = mmu_ptob((paddr_t)htable->ht_pfn);
      529 +        paddr += va2entry(htable, addr) << mmu.pte_size_shift;
 555  530  
 556      -        if (mmu.pte_size == sizeof (x86pte_t))
      531 +        if ((mdb_pread(&buf, mmu.pte_size, paddr)) == mmu.pte_size)
 557  532                  return (buf);
 558      -        return (*pte32);
      533 +
      534 +        return (0);
 559  535  }
 560  536  
 561  537  static int
 562  538  do_va2pa(uintptr_t addr, struct as *asp, int print_level, physaddr_t *pap,
 563  539      pfn_t *mfnp)
 564  540  {
 565  541          struct as as;
 566  542          struct hat *hatp;
 567  543          struct hat hat;
 568  544          htable_t *ht;
↓ open down ↓ 45 lines elided ↑ open up ↑
 614  590                                          return (DCMD_ERR);
 615  591                                  }
 616  592  
 617  593                                  if (htable.ht_vaddr != base ||
 618  594                                      htable.ht_level != level)
 619  595                                          continue;
 620  596  
 621  597                                  pte = get_pte(&hat, &htable, addr);
 622  598  
 623  599                                  if (print_level) {
 624      -                                        mdb_printf("\tlevel=%d htable=%p "
 625      -                                            "pte=%llr\n", level, ht, pte);
      600 +                                        mdb_printf("\tlevel=%d htable=0x%p "
      601 +                                            "pte=0x%llr\n", level, ht, pte);
 626  602                                  }
 627  603  
 628  604                                  if (!PTE_ISVALID(pte)) {
 629  605                                          mdb_printf("Address %p is unmapped.\n",
 630  606                                              addr);
 631  607                                          return (DCMD_ERR);
 632  608                                  }
 633  609  
 634  610                                  if (found)
 635  611                                          continue;
↓ open down ↓ 82 lines elided ↑ open up ↑
 718  694  {
 719  695          struct hat *hatp;
 720  696          struct hat hat;
 721  697          htable_t *ht;
 722  698          htable_t htable;
 723  699          uintptr_t base;
 724  700          int h;
 725  701          int level;
 726  702          int entry;
 727  703          x86pte_t pte;
 728      -        x86pte_t buf;
 729      -        x86pte32_t *pte32 = (x86pte32_t *)&buf;
 730  704          physaddr_t paddr;
 731  705          size_t len;
 732  706  
 733  707          /*
 734  708           * The hats are kept in a list with khat at the head.
 735  709           */
 736  710          for (hatp = khat; hatp != NULL; hatp = hat.hat_next) {
 737  711                  /*
 738  712                   * read the hat and its hash table
 739  713                   */
↓ open down ↓ 49 lines elided ↑ open up ↑
 789  763                                          base = htable.ht_vaddr + entry *
 790  764                                              mmu.level_size[level];
 791  765  
 792  766                                          /*
 793  767                                           * only report kernel addresses once
 794  768                                           */
 795  769                                          if (hatp != khat &&
 796  770                                              base >= kernelbase)
 797  771                                                  continue;
 798  772  
 799      -                                        len = mdb_pread(&buf, mmu.pte_size,
      773 +                                        len = mdb_pread(&pte, mmu.pte_size,
 800  774                                              paddr + entry * mmu.pte_size);
 801  775                                          if (len != mmu.pte_size)
 802  776                                                  return (DCMD_ERR);
 803      -                                        if (mmu.pte_size == sizeof (x86pte_t))
 804      -                                                pte = buf;
 805      -                                        else
 806      -                                                pte = *pte32;
 807  777  
 808  778                                          if ((pte & PT_VALID) == 0)
 809  779                                                  continue;
 810  780                                          if (level == 0 || !(pte & PT_PAGESIZE))
 811  781                                                  pte &= PT_PADDR;
 812  782                                          else
 813  783                                                  pte &= PT_PADDR_LGPG;
 814  784                                          if (mmu_btop(mdb_ma_to_pa(pte)) != pfn)
 815  785                                                  continue;
 816  786                                          mdb_printf("hat=%p maps addr=%p\n",
↓ open down ↓ 30 lines elided ↑ open up ↑
 847  817                  return (DCMD_USAGE);
 848  818  
 849  819          pfn = (pfn_t)addr;
 850  820          if (mflag)
 851  821                  pfn = mdb_mfn_to_pfn(pfn);
 852  822  
 853  823          return (do_report_maps(pfn));
 854  824  }
 855  825  
 856  826  static int
 857      -do_ptable_dcmd(pfn_t pfn)
      827 +do_ptable_dcmd(pfn_t pfn, uint64_t level)
 858  828  {
 859  829          struct hat *hatp;
 860  830          struct hat hat;
 861  831          htable_t *ht;
 862  832          htable_t htable;
 863  833          uintptr_t base;
 864  834          int h;
 865      -        int level;
 866  835          int entry;
 867  836          uintptr_t pagesize;
 868  837          x86pte_t pte;
 869  838          x86pte_t buf;
 870      -        x86pte32_t *pte32 = (x86pte32_t *)&buf;
 871  839          physaddr_t paddr;
 872  840          size_t len;
 873  841  
 874  842          /*
 875  843           * The hats are kept in a list with khat at the head.
 876  844           */
 877  845          for (hatp = khat; hatp != NULL; hatp = hat.hat_next) {
 878  846                  /*
 879  847                   * read the hat and its hash table
 880  848                   */
↓ open down ↓ 24 lines elided ↑ open up ↑
 905  873                                   */
 906  874                                  if (htable.ht_pfn == pfn)
 907  875                                          goto found_it;
 908  876                          }
 909  877                  }
 910  878          }
 911  879  
 912  880  found_it:
 913  881          if (htable.ht_pfn == pfn) {
 914  882                  mdb_printf("htable=%p\n", ht);
 915      -                level = htable.ht_level;
      883 +                if (level == (uint64_t)-1) {
      884 +                        level = htable.ht_level;
      885 +                } else if (htable.ht_level != level) {
      886 +                        mdb_warn("htable has level %d but forcing level %lu\n",
      887 +                            htable.ht_level, level);
      888 +                }
 916  889                  base = htable.ht_vaddr;
 917  890                  pagesize = mmu.level_size[level];
 918  891          } else {
 919      -                mdb_printf("Unknown pagetable - assuming level/addr 0");
 920      -                level = 0;      /* assume level == 0 for PFN */
      892 +                if (level == (uint64_t)-1)
      893 +                        level = 0;
      894 +                mdb_warn("couldn't find matching htable, using level=%lu, "
      895 +                    "base address=0x0\n", level);
 921  896                  base = 0;
 922      -                pagesize = MMU_PAGESIZE;
      897 +                pagesize = mmu.level_size[level];
 923  898          }
 924  899  
 925  900          paddr = mmu_ptob((physaddr_t)pfn);
 926  901          for (entry = 0; entry < mmu.ptes_per_table; ++entry) {
 927  902                  len = mdb_pread(&buf, mmu.pte_size,
 928  903                      paddr + entry * mmu.pte_size);
 929  904                  if (len != mmu.pte_size)
 930  905                          return (DCMD_ERR);
 931      -                if (mmu.pte_size == sizeof (x86pte_t))
 932  906                          pte = buf;
 933      -                else
 934      -                        pte = *pte32;
 935  907  
 936  908                  if (pte == 0)
 937  909                          continue;
 938  910  
 939      -                mdb_printf("[%3d] va=%p ", entry, base + entry * pagesize);
      911 +                mdb_printf("[%3d] va=0x%p ", entry,
      912 +                    VA_SIGN_EXTEND(base + entry * pagesize));
 940  913                  do_pte_dcmd(level, pte);
 941  914          }
 942  915  
 943  916  done:
 944  917          return (DCMD_OK);
 945  918  }
 946  919  
 947  920  /*
 948  921   * Dump the page table at the given PFN
 949  922   */
 950  923  /*ARGSUSED*/
 951  924  int
 952  925  ptable_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 953  926  {
 954  927          pfn_t pfn;
 955  928          uint_t mflag = 0;
      929 +        uint64_t level = (uint64_t)-1;
 956  930  
 957  931          init_mmu();
 958  932  
 959  933          if (mmu.num_level == 0)
 960  934                  return (DCMD_ERR);
 961  935  
 962  936          if ((flags & DCMD_ADDRSPEC) == 0)
 963  937                  return (DCMD_USAGE);
 964  938  
 965  939          if (mdb_getopts(argc, argv,
 966      -            'm', MDB_OPT_SETBITS, TRUE, &mflag, NULL) != argc)
      940 +            'm', MDB_OPT_SETBITS, TRUE, &mflag,
      941 +            'l', MDB_OPT_UINT64, &level, NULL) != argc)
 967  942                  return (DCMD_USAGE);
 968  943  
      944 +        if (level != (uint64_t)-1 && level > mmu.max_level) {
      945 +                mdb_warn("invalid level %lu\n", level);
      946 +                return (DCMD_ERR);
      947 +        }
      948 +
 969  949          pfn = (pfn_t)addr;
 970  950          if (mflag)
 971  951                  pfn = mdb_mfn_to_pfn(pfn);
 972  952  
 973      -        return (do_ptable_dcmd(pfn));
      953 +        return (do_ptable_dcmd(pfn, level));
 974  954  }
 975  955  
 976  956  static int
 977  957  do_htables_dcmd(hat_t *hatp)
 978  958  {
 979  959          struct hat hat;
 980  960          htable_t *ht;
 981  961          htable_t htable;
 982  962          int h;
 983  963  
↓ open down ↓ 39 lines elided ↑ open up ↑
1023 1003  
1024 1004          if (mmu.num_level == 0)
1025 1005                  return (DCMD_ERR);
1026 1006  
1027 1007          if ((flags & DCMD_ADDRSPEC) == 0)
1028 1008                  return (DCMD_USAGE);
1029 1009  
1030 1010          hat = (hat_t *)addr;
1031 1011  
1032 1012          return (do_htables_dcmd(hat));
     1013 +}
     1014 +
     1015 +static uintptr_t
     1016 +entry2va(size_t *entries)
     1017 +{
     1018 +        uintptr_t va = 0;
     1019 +
     1020 +        for (level_t l = mmu.max_level; l >= 0; l--)
     1021 +                va += entries[l] << mmu.level_shift[l];
     1022 +
     1023 +        return (VA_SIGN_EXTEND(va));
     1024 +}
     1025 +
     1026 +static void
     1027 +ptmap_report(size_t *entries, uintptr_t start,
     1028 +    boolean_t user, boolean_t writable, boolean_t wflag)
     1029 +{
     1030 +        uint64_t curva = entry2va(entries);
     1031 +
     1032 +        mdb_printf("mapped %s,%s range of %lu bytes: %a-%a\n",
     1033 +            user ? "user" : "kernel", writable ? "writable" : "read-only",
     1034 +            curva - start, start, curva - 1);
     1035 +        if (wflag && start >= kernelbase)
     1036 +                (void) mdb_call_dcmd("whatis", start, DCMD_ADDRSPEC, 0, NULL);
     1037 +}
     1038 +
     1039 +int
     1040 +ptmap_dcmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
     1041 +{
     1042 +        physaddr_t paddrs[MAX_NUM_LEVEL] = { 0, };
     1043 +        size_t entry[MAX_NUM_LEVEL] = { 0, };
     1044 +        uintptr_t start = (uintptr_t)-1;
     1045 +        boolean_t writable = B_FALSE;
     1046 +        boolean_t user = B_FALSE;
     1047 +        boolean_t wflag = B_FALSE;
     1048 +        level_t curlevel;
     1049 +
     1050 +        if ((flags & DCMD_ADDRSPEC) == 0)
     1051 +                return (DCMD_USAGE);
     1052 +
     1053 +        if (mdb_getopts(argc, argv,
     1054 +            'w', MDB_OPT_SETBITS, TRUE, &wflag, NULL) != argc)
     1055 +                return (DCMD_USAGE);
     1056 +
     1057 +        init_mmu();
     1058 +
     1059 +        if (mmu.num_level == 0)
     1060 +                return (DCMD_ERR);
     1061 +
     1062 +        curlevel = mmu.max_level;
     1063 +
     1064 +        paddrs[curlevel] = addr & MMU_PAGEMASK;
     1065 +
     1066 +        for (;;) {
     1067 +                physaddr_t pte_addr;
     1068 +                x86pte_t pte;
     1069 +
     1070 +                pte_addr = paddrs[curlevel] +
     1071 +                    (entry[curlevel] << mmu.pte_size_shift);
     1072 +
     1073 +                if (mdb_pread(&pte, sizeof (pte), pte_addr) != sizeof (pte)) {
     1074 +                        mdb_warn("couldn't read pte at %p", pte_addr);
     1075 +                        return (DCMD_ERR);
     1076 +                }
     1077 +
     1078 +                if (PTE_GET(pte, PT_VALID) == 0) {
     1079 +                        if (start != (uintptr_t)-1) {
     1080 +                                ptmap_report(entry, start,
     1081 +                                    user, writable, wflag);
     1082 +                                start = (uintptr_t)-1;
     1083 +                        }
     1084 +                } else if (curlevel == 0 || PTE_GET(pte, PT_PAGESIZE)) {
     1085 +                        if (start == (uintptr_t)-1) {
     1086 +                                start = entry2va(entry);
     1087 +                                user = PTE_GET(pte, PT_USER);
     1088 +                                writable = PTE_GET(pte, PT_WRITABLE);
     1089 +                        } else if (user != PTE_GET(pte, PT_USER) ||
     1090 +                            writable != PTE_GET(pte, PT_WRITABLE)) {
     1091 +                                ptmap_report(entry, start,
     1092 +                                    user, writable, wflag);
     1093 +                                start = entry2va(entry);
     1094 +                                user = PTE_GET(pte, PT_USER);
     1095 +                                writable = PTE_GET(pte, PT_WRITABLE);
     1096 +                        }
     1097 +                } else {
     1098 +                        /* Descend a level. */
     1099 +                        physaddr_t pa = mmu_ptob(pte2mfn(pte, curlevel));
     1100 +                        paddrs[--curlevel] = pa;
     1101 +                        entry[curlevel] = 0;
     1102 +                        continue;
     1103 +                }
     1104 +
     1105 +                while (++entry[curlevel] == mmu.ptes_per_table) {
     1106 +                        /* Ascend back up. */
     1107 +                        entry[curlevel] = 0;
     1108 +                        if (curlevel == mmu.max_level) {
     1109 +                                if (start != (uintptr_t)-1) {
     1110 +                                        ptmap_report(entry, start,
     1111 +                                            user, writable, wflag);
     1112 +                                }
     1113 +                                goto out;
     1114 +                        }
     1115 +
     1116 +                        curlevel++;
     1117 +                }
     1118 +        }
     1119 +
     1120 +out:
     1121 +        return (DCMD_OK);
1033 1122  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX