Print this page
11585 ::scalehrtime could be more precise
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>


 753 ptable_help(void)
 754 {
 755         mdb_printf(
 756             "Given a PFN holding a page table, print its contents, and\n"
 757             "the address of the corresponding htable structure.\n"
 758             "\n"
 759             "-m Interpret the PFN as an MFN (machine frame number)\n"
 760             "-l force page table level (3 is top)\n");
 761 }
 762 
 763 static void
 764 ptmap_help(void)
 765 {
 766         mdb_printf(
 767             "Report all mappings represented by the page table hierarchy\n"
 768             "rooted at the given cr3 value / physical address.\n"
 769             "\n"
 770             "-w run ::whatis on mapping start addresses\n");
 771 }
 772 




















 773 /*
 774  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 775  * but for amusement, the reader is directed to the comment that explains
 776  * the rationale for this particular value on x86.  Spoiler:  the value is
 777  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 778  * in that comment sounds too familiar, it's because your author also wrote
 779  * that code -- some fifteen years prior to this writing in 2011...)
 780  */
 781 #define NSEC_SHIFT 5
 782 
 783 /*ARGSUSED*/
 784 static int
 785 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 786 {
 787         uint32_t nsec_scale;
 788         hrtime_t tsc = addr, hrt;
 789         unsigned int *tscp = (unsigned int *)&tsc;
 790         uintptr_t scalehrtimef;
 791         uint64_t scale;
 792         GElf_Sym sym;


 793 
 794         if (!(flags & DCMD_ADDRSPEC)) {
 795                 if (argc != 1)

 796                         return (DCMD_USAGE);
 797 
 798                 switch (argv[0].a_type) {






 799                 case MDB_TYPE_STRING:
 800                         tsc = mdb_strtoull(argv[0].a_un.a_str);
 801                         break;
 802                 case MDB_TYPE_IMMEDIATE:
 803                         tsc = argv[0].a_un.a_val;
 804                         break;
 805                 default:
 806                         return (DCMD_USAGE);
 807                 }
 808         }
 809 
 810         if (mdb_readsym(&scalehrtimef,
 811             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 812                 mdb_warn("couldn't read 'scalehrtimef'");
 813                 return (DCMD_ERR);
 814         }
 815 
 816         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 817                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 818                 return (DCMD_ERR);
 819         }
 820 
 821         if (sym.st_value != scalehrtimef) {
 822                 mdb_warn("::scalehrtime requires that scalehrtimef "
 823                     "be set to tsc_scalehrtime\n");
 824                 return (DCMD_ERR);
 825         }
 826 
 827         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 828                 mdb_warn("couldn't read 'nsec_scale'");
 829                 return (DCMD_ERR);
 830         }
 831 




























 832         scale = (uint64_t)nsec_scale;
 833 
 834         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 835         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 836 
 837         mdb_printf("0x%llx\n", hrt);
 838 
 839         return (DCMD_OK);
 840 }
 841 
 842 /*
 843  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 844  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 845  * macro. We have the names for each of these features in unix's text segment
 846  * so we do not have to duplicate them and instead just look them up.
 847  */
 848 /*ARGSUSED*/
 849 static int
 850 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
 851     const mdb_arg_t *argv)
 852 {
 853         void *fset;
 854         GElf_Sym sym;
 855         uintptr_t nptr;
 856         char name[128];
 857         int ii;


 983         { "idt", ":[-v]", "dump an IDT", idt },
 984         { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
 985         { "vatopfn", ":[-a as]", "translate address to physical page",
 986             va2pfn_dcmd },
 987         { "report_maps", ":[-m]",
 988             "Given PFN, report mappings / page table usage",
 989             report_maps_dcmd, report_maps_help },
 990         { "htables", "", "Given hat_t *, lists all its htable_t * values",
 991             htables_dcmd, htables_help },
 992         { "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
 993             ptable_dcmd, ptable_help },
 994         { "ptmap", ":", "Given a cr3 value, dump all mappings",
 995             ptmap_dcmd, ptmap_help },
 996         { "pte", ":[-l N]", "print human readable page table entry",
 997             pte_dcmd },
 998         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
 999             pfntomfn_dcmd },
1000         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1001             mfntopfn_dcmd },
1002         { "memseg_list", ":", "show memseg list", memseg_list },
1003         { "scalehrtime", ":",
1004             "scale an unscaled high-res time", scalehrtime_cmd },
1005         { "x86_featureset", NULL, "dump the x86_featureset vector",
1006                 x86_featureset_cmd },
1007 #ifdef _KMDB
1008         { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1009 #endif
1010         { NULL }
1011 };
1012 
1013 static const mdb_walker_t walkers[] = {
1014         { "ttrace", "walks trap trace buffers in reverse chronological order",
1015                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1016         { "mutex_owner", "walks the owner of a mutex",
1017                 mutex_owner_init, mutex_owner_step },
1018         { "memseg", "walk the memseg structures",
1019                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1020         { NULL }
1021 };
1022 
1023 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1024 


 753 ptable_help(void)
 754 {
 755         mdb_printf(
 756             "Given a PFN holding a page table, print its contents, and\n"
 757             "the address of the corresponding htable structure.\n"
 758             "\n"
 759             "-m Interpret the PFN as an MFN (machine frame number)\n"
 760             "-l force page table level (3 is top)\n");
 761 }
 762 
 763 static void
 764 ptmap_help(void)
 765 {
 766         mdb_printf(
 767             "Report all mappings represented by the page table hierarchy\n"
 768             "rooted at the given cr3 value / physical address.\n"
 769             "\n"
 770             "-w run ::whatis on mapping start addresses\n");
 771 }
 772 
 773 static const char *const scalehrtime_desc =
 774         "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
 775         "are used as both a quick way of accumulating relative time (as for\n"
 776         "usage) and as a quick way of getting the absolute current time.\n"
 777         "These uses require slightly different scaling algorithms. By\n"
 778         "default, if a specified time is greater than half of the unscaled\n"
 779         "time at the last tick (that is, if the unscaled time represents\n"
 780         "more than half the time since boot), the timestamp is assumed to\n"
 781         "be absolute, and the scaling algorithm used mimics that which the\n"
 782         "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
 783         "be relative, and the algorithm mimics scalehrtime(). This behavior\n"
 784         "can be overridden by forcing the unscaled time to be interpreted\n"
 785         "as relative (via -r) or absolute (via -a).\n";
 786 
 787 static void
 788 scalehrtime_help(void)
 789 {
 790         mdb_printf("%s", scalehrtime_desc);
 791 }
 792 
 793 /*
 794  * NSEC_SHIFT is replicated here (it is not defined in a header file),
 795  * but for amusement, the reader is directed to the comment that explains
 796  * the rationale for this particular value on x86.  Spoiler:  the value is
 797  * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 798  * in that comment sounds too familiar, it's because your author also wrote
 799  * that code -- some fifteen years prior to this writing in 2011...)
 800  */
 801 #define NSEC_SHIFT 5
 802 
 803 /*ARGSUSED*/
 804 static int
 805 scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 806 {
 807         uint32_t nsec_scale;
 808         hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
 809         unsigned int *tscp = (unsigned int *)&tsc;
 810         uintptr_t scalehrtimef;
 811         uint64_t scale;
 812         GElf_Sym sym;
 813         int expected = !(flags & DCMD_ADDRSPEC);
 814         uint_t absolute = FALSE, relative = FALSE;
 815 
 816         if (mdb_getopts(argc, argv,
 817             'a', MDB_OPT_SETBITS, TRUE, &absolute,
 818             'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
 819                 return (DCMD_USAGE);
 820 
 821         if (absolute && relative) {
 822                 mdb_warn("can't specify both -a and -r\n");
 823                 return (DCMD_USAGE);
 824         }
 825 
 826         if (expected == 1) {
 827                 switch (argv[argc - 1].a_type) {
 828                 case MDB_TYPE_STRING:
 829                         tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
 830                         break;
 831                 case MDB_TYPE_IMMEDIATE:
 832                         tsc = argv[argc - 1].a_un.a_val;
 833                         break;
 834                 default:
 835                         return (DCMD_USAGE);
 836                 }
 837         }
 838 
 839         if (mdb_readsym(&scalehrtimef,
 840             sizeof (scalehrtimef), "scalehrtimef") == -1) {
 841                 mdb_warn("couldn't read 'scalehrtimef'");
 842                 return (DCMD_ERR);
 843         }
 844 
 845         if (mdb_lookup_by_name("tsc_scalehrtime", &sym) == -1) {
 846                 mdb_warn("couldn't find 'tsc_scalehrtime'");
 847                 return (DCMD_ERR);
 848         }
 849 
 850         if (sym.st_value != scalehrtimef) {
 851                 mdb_warn("::scalehrtime requires that scalehrtimef "
 852                     "be set to tsc_scalehrtime\n");
 853                 return (DCMD_ERR);
 854         }
 855 
 856         if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 857                 mdb_warn("couldn't read 'nsec_scale'");
 858                 return (DCMD_ERR);
 859         }
 860 
 861         if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
 862                 mdb_warn("couldn't read 'tsc_last'");
 863                 return (DCMD_ERR);
 864         }
 865 
 866         if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
 867                 mdb_warn("couldn't read 'tsc_hrtime_base'");
 868                 return (DCMD_ERR);
 869         }
 870 
 871         /*
 872          * If our time is greater than half of tsc_last, we will take our
 873          * delta against tsc_last, convert it, and add that to (or subtract it
 874          * from) tsc_hrtime_base.  This mimics what the kernel actually does
 875          * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
 876          * higher precision result than trying to convert a large tsc value.
 877          */
 878         if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
 879                 if (tsc > tsc_last) {
 880                         tsc = tsc - tsc_last;
 881                 } else {
 882                         tsc = tsc_last - tsc;
 883                         mult = -1;
 884                 }
 885         } else {
 886                 base = 0;
 887         }
 888 
 889         scale = (uint64_t)nsec_scale;
 890 
 891         hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 892         hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 893 
 894         mdb_printf("0x%llx\n", base + (hrt * mult));
 895 
 896         return (DCMD_OK);
 897 }
 898 
 899 /*
 900  * The x86 feature set is implemented as a bitmap array. That bitmap array is
 901  * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 902  * macro. We have the names for each of these features in unix's text segment
 903  * so we do not have to duplicate them and instead just look them up.
 904  */
 905 /*ARGSUSED*/
 906 static int
 907 x86_featureset_cmd(uintptr_t addr, uint_t flags, int argc,
 908     const mdb_arg_t *argv)
 909 {
 910         void *fset;
 911         GElf_Sym sym;
 912         uintptr_t nptr;
 913         char name[128];
 914         int ii;


1040         { "idt", ":[-v]", "dump an IDT", idt },
1041         { "ttrace", "[-x] [-t kthread]", "dump trap trace buffers", ttrace },
1042         { "vatopfn", ":[-a as]", "translate address to physical page",
1043             va2pfn_dcmd },
1044         { "report_maps", ":[-m]",
1045             "Given PFN, report mappings / page table usage",
1046             report_maps_dcmd, report_maps_help },
1047         { "htables", "", "Given hat_t *, lists all its htable_t * values",
1048             htables_dcmd, htables_help },
1049         { "ptable", ":[-lm]", "Given PFN, dump contents of a page table",
1050             ptable_dcmd, ptable_help },
1051         { "ptmap", ":", "Given a cr3 value, dump all mappings",
1052             ptmap_dcmd, ptmap_help },
1053         { "pte", ":[-l N]", "print human readable page table entry",
1054             pte_dcmd },
1055         { "pfntomfn", ":", "convert physical page to hypervisor machine page",
1056             pfntomfn_dcmd },
1057         { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1058             mfntopfn_dcmd },
1059         { "memseg_list", ":", "show memseg list", memseg_list },
1060         { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
1061             scalehrtime_cmd, scalehrtime_help },
1062         { "x86_featureset", NULL, "dump the x86_featureset vector",
1063                 x86_featureset_cmd },
1064 #ifdef _KMDB
1065         { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1066 #endif
1067         { NULL }
1068 };
1069 
1070 static const mdb_walker_t walkers[] = {
1071         { "ttrace", "walks trap trace buffers in reverse chronological order",
1072                 ttrace_walk_init, ttrace_walk_step, ttrace_walk_fini },
1073         { "mutex_owner", "walks the owner of a mutex",
1074                 mutex_owner_init, mutex_owner_step },
1075         { "memseg", "walk the memseg structures",
1076                 memseg_walk_init, memseg_walk_step, memseg_walk_fini },
1077         { NULL }
1078 };
1079 
1080 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1081