Print this page
11585 ::scalehrtime could be more precise
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/mdb/i86pc/modules/unix/unix.c
          +++ new/usr/src/cmd/mdb/i86pc/modules/unix/unix.c
↓ open down ↓ 762 lines elided ↑ open up ↑
 763  763  static void
 764  764  ptmap_help(void)
 765  765  {
 766  766          mdb_printf(
 767  767              "Report all mappings represented by the page table hierarchy\n"
 768  768              "rooted at the given cr3 value / physical address.\n"
 769  769              "\n"
 770  770              "-w run ::whatis on mapping start addresses\n");
 771  771  }
 772  772  
      773 +static const char *const scalehrtime_desc =
      774 +        "Scales a timestamp from ticks to nanoseconds. Unscaled timestamps\n"
      775 +        "are used as both a quick way of accumulating relative time (as for\n"
      776 +        "usage) and as a quick way of getting the absolute current time.\n"
      777 +        "These uses require slightly different scaling algorithms. By\n"
      778 +        "default, if a specified time is greater than half of the unscaled\n"
      779 +        "time at the last tick (that is, if the unscaled time represents\n"
      780 +        "more than half the time since boot), the timestamp is assumed to\n"
      781 +        "be absolute, and the scaling algorithm used mimics that which the\n"
      782 +        "kernel uses in gethrtime(). Otherwise, the timestamp is assumed to\n"
      783 +        "be relative, and the algorithm mimics scalehrtime(). This behavior\n"
      784 +        "can be overridden by forcing the unscaled time to be interpreted\n"
      785 +        "as relative (via -r) or absolute (via -a).\n";
      786 +
      787 +static void
      788 +scalehrtime_help(void)
      789 +{
      790 +        mdb_printf("%s", scalehrtime_desc);
      791 +}
      792 +
 773  793  /*
 774  794   * NSEC_SHIFT is replicated here (it is not defined in a header file),
 775  795   * but for amusement, the reader is directed to the comment that explains
 776  796   * the rationale for this particular value on x86.  Spoiler:  the value is
 777  797   * selected to accommodate 60 MHz Pentiums!  (And a confession:  if the voice
 778  798   * in that comment sounds too familiar, it's because your author also wrote
 779  799   * that code -- some fifteen years prior to this writing in 2011...)
 780  800   */
 781  801  #define NSEC_SHIFT 5
 782  802  
 783  803  /*ARGSUSED*/
 784  804  static int
 785  805  scalehrtime_cmd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 786  806  {
 787  807          uint32_t nsec_scale;
 788      -        hrtime_t tsc = addr, hrt;
      808 +        hrtime_t tsc = addr, hrt, tsc_last, base, mult = 1;
 789  809          unsigned int *tscp = (unsigned int *)&tsc;
 790  810          uintptr_t scalehrtimef;
 791  811          uint64_t scale;
 792  812          GElf_Sym sym;
      813 +        int expected = !(flags & DCMD_ADDRSPEC);
      814 +        uint_t absolute = FALSE, relative = FALSE;
 793  815  
 794      -        if (!(flags & DCMD_ADDRSPEC)) {
 795      -                if (argc != 1)
 796      -                        return (DCMD_USAGE);
      816 +        if (mdb_getopts(argc, argv,
      817 +            'a', MDB_OPT_SETBITS, TRUE, &absolute,
      818 +            'r', MDB_OPT_SETBITS, TRUE, &relative, NULL) != argc - expected)
      819 +                return (DCMD_USAGE);
 797  820  
 798      -                switch (argv[0].a_type) {
      821 +        if (absolute && relative) {
      822 +                mdb_warn("can't specify both -a and -r\n");
      823 +                return (DCMD_USAGE);
      824 +        }
      825 +
      826 +        if (expected == 1) {
      827 +                switch (argv[argc - 1].a_type) {
 799  828                  case MDB_TYPE_STRING:
 800      -                        tsc = mdb_strtoull(argv[0].a_un.a_str);
      829 +                        tsc = mdb_strtoull(argv[argc - 1].a_un.a_str);
 801  830                          break;
 802  831                  case MDB_TYPE_IMMEDIATE:
 803      -                        tsc = argv[0].a_un.a_val;
      832 +                        tsc = argv[argc - 1].a_un.a_val;
 804  833                          break;
 805  834                  default:
 806  835                          return (DCMD_USAGE);
 807  836                  }
 808  837          }
 809  838  
 810  839          if (mdb_readsym(&scalehrtimef,
 811  840              sizeof (scalehrtimef), "scalehrtimef") == -1) {
 812  841                  mdb_warn("couldn't read 'scalehrtimef'");
 813  842                  return (DCMD_ERR);
↓ open down ↓ 8 lines elided ↑ open up ↑
 822  851                  mdb_warn("::scalehrtime requires that scalehrtimef "
 823  852                      "be set to tsc_scalehrtime\n");
 824  853                  return (DCMD_ERR);
 825  854          }
 826  855  
 827  856          if (mdb_readsym(&nsec_scale, sizeof (nsec_scale), "nsec_scale") == -1) {
 828  857                  mdb_warn("couldn't read 'nsec_scale'");
 829  858                  return (DCMD_ERR);
 830  859          }
 831  860  
      861 +        if (mdb_readsym(&tsc_last, sizeof (tsc_last), "tsc_last") == -1) {
      862 +                mdb_warn("couldn't read 'tsc_last'");
      863 +                return (DCMD_ERR);
      864 +        }
      865 +
      866 +        if (mdb_readsym(&base, sizeof (base), "tsc_hrtime_base") == -1) {
      867 +                mdb_warn("couldn't read 'tsc_hrtime_base'");
      868 +                return (DCMD_ERR);
      869 +        }
      870 +
      871 +        /*
      872 +         * If our time is greater than half of tsc_last, we will take our
      873 +         * delta against tsc_last, convert it, and add that to (or subtract it
      874 +         * from) tsc_hrtime_base.  This mimics what the kernel actually does
      875 +         * in gethrtime() (modulo the tsc_sync_tick_delta) and gets us a much
      876 +         * higher precision result than trying to convert a large tsc value.
      877 +         */
      878 +        if (absolute || (tsc > (tsc_last >> 1) && !relative)) {
      879 +                if (tsc > tsc_last) {
      880 +                        tsc = tsc - tsc_last;
      881 +                } else {
      882 +                        tsc = tsc_last - tsc;
      883 +                        mult = -1;
      884 +                }
      885 +        } else {
      886 +                base = 0;
      887 +        }
      888 +
 832  889          scale = (uint64_t)nsec_scale;
 833  890  
 834  891          hrt = ((uint64_t)tscp[1] * scale) << NSEC_SHIFT;
 835  892          hrt += ((uint64_t)tscp[0] * scale) >> (32 - NSEC_SHIFT);
 836  893  
 837      -        mdb_printf("0x%llx\n", hrt);
      894 +        mdb_printf("0x%llx\n", base + (hrt * mult));
 838  895  
 839  896          return (DCMD_OK);
 840  897  }
 841  898  
 842  899  /*
 843  900   * The x86 feature set is implemented as a bitmap array. That bitmap array is
 844  901   * stored across a number of uchars based on the BT_SIZEOFMAP(NUM_X86_FEATURES)
 845  902   * macro. We have the names for each of these features in unix's text segment
 846  903   * so we do not have to duplicate them and instead just look them up.
 847  904   */
↓ open down ↓ 145 lines elided ↑ open up ↑
 993 1050              ptable_dcmd, ptable_help },
 994 1051          { "ptmap", ":", "Given a cr3 value, dump all mappings",
 995 1052              ptmap_dcmd, ptmap_help },
 996 1053          { "pte", ":[-l N]", "print human readable page table entry",
 997 1054              pte_dcmd },
 998 1055          { "pfntomfn", ":", "convert physical page to hypervisor machine page",
 999 1056              pfntomfn_dcmd },
1000 1057          { "mfntopfn", ":", "convert hypervisor machine page to physical page",
1001 1058              mfntopfn_dcmd },
1002 1059          { "memseg_list", ":", "show memseg list", memseg_list },
1003      -        { "scalehrtime", ":",
1004      -            "scale an unscaled high-res time", scalehrtime_cmd },
     1060 +        { "scalehrtime", ":[-a|-r]", "scale an unscaled high-res time",
     1061 +            scalehrtime_cmd, scalehrtime_help },
1005 1062          { "x86_featureset", NULL, "dump the x86_featureset vector",
1006 1063                  x86_featureset_cmd },
1007 1064  #ifdef _KMDB
1008 1065          { "sysregs", NULL, "dump system registers", sysregs_dcmd },
1009 1066  #endif
1010 1067          { NULL }
1011 1068  };
1012 1069  
1013 1070  static const mdb_walker_t walkers[] = {
1014 1071          { "ttrace", "walks trap trace buffers in reverse chronological order",
↓ open down ↓ 21 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX