Print this page
    
9685 KPTI %cr3 handling needs fixes
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/intel/kdi/kdi_idt.c
          +++ new/usr/src/uts/intel/kdi/kdi_idt.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   *
  25   25   * Copyright 2018 Joyent, Inc.
  26   26   */
  27   27  
  28   28  /*
  29   29   * Management of KMDB's IDT, which is installed upon KMDB activation.
  30   30   *
  31   31   * Debugger activation has two flavors, which cover the cases where KMDB is
  32   32   * loaded at boot, and when it is loaded after boot.  In brief, in both cases,
  33   33   * the KDI needs to interpose upon several handlers in the IDT.  When
  34   34   * mod-loaded KMDB is deactivated, we undo the IDT interposition, restoring the
  35   35   * handlers to what they were before we started.
  36   36   *
  37   37   * We also take over the entirety of IDT (except the double-fault handler) on
  38   38   * the active CPU when we're in kmdb so we can handle things like page faults
  39   39   * sensibly.
  40   40   *
  41   41   * Boot-loaded KMDB
  42   42   *
  43   43   * When we're first activated, we're running on boot's IDT.  We need to be able
  44   44   * to function in this world, so we'll install our handlers into boot's IDT.
  45   45   * This is a little complicated: we're using the fake cpu_t set up by
  46   46   * boot_kdi_tmpinit(), so we can't access cpu_idt directly.  Instead,
  47   47   * kdi_idt_write() notices that cpu_idt is NULL, and works around this problem.
  48   48   *
  49   49   * Later, when we're about to switch to the kernel's IDT, it'll call us via
  50   50   * kdi_idt_sync(), allowing us to add our handlers to the new IDT.  While
  51   51   * boot-loaded KMDB can't be unloaded, we still need to save the descriptors we
  52   52   * replace so we can pass traps back to the kernel as necessary.
  53   53   *
  54   54   * The last phase of boot-loaded KMDB activation occurs at non-boot CPU
  55   55   * startup.  We will be called on each non-boot CPU, thus allowing us to set up
  56   56   * any watchpoints that may have been configured on the boot CPU and interpose
  57   57   * on the given CPU's IDT.  We don't save the interposed descriptors in this
  58   58   * case -- see kdi_cpu_init() for details.
  59   59   *
  60   60   * Mod-loaded KMDB
  61   61   *
  62   62   * This style of activation is much simpler, as the CPUs are already running,
  63   63   * and are using their own copy of the kernel's IDT.  We simply interpose upon
  64   64   * each CPU's IDT.  We save the handlers we replace, both for deactivation and
  65   65   * for passing traps back to the kernel.  Note that for the hypervisors'
  66   66   * benefit, we need to xcall to the other CPUs to do this, since we need to
  67   67   * actively set the trap entries in its virtual IDT from that vcpu's context
  68   68   * rather than just modifying the IDT table from the CPU running kdi_activate().
  69   69   */
  70   70  
  71   71  #include <sys/types.h>
  72   72  #include <sys/segments.h>
  73   73  #include <sys/trap.h>
  74   74  #include <sys/cpuvar.h>
  75   75  #include <sys/reboot.h>
  76   76  #include <sys/sunddi.h>
  77   77  #include <sys/archsystm.h>
  78   78  #include <sys/kdi_impl.h>
  79   79  #include <sys/x_call.h>
  80   80  #include <ia32/sys/psw.h>
  81   81  #include <vm/hat_i86.h>
  82   82  
  83   83  #define KDI_GATE_NVECS  3
  84   84  
  85   85  #define KDI_IDT_NOSAVE  0
  86   86  #define KDI_IDT_SAVE    1
  87   87  
  88   88  #define KDI_IDT_DTYPE_KERNEL    0
  89   89  #define KDI_IDT_DTYPE_BOOT      1
  90   90  
  91   91  kdi_cpusave_t *kdi_cpusave;
  92   92  int kdi_ncpusave;
  93   93  
  94   94  static kdi_main_t kdi_kmdb_main;
  95   95  
  96   96  kdi_drreg_t kdi_drreg;
  97   97  
  98   98  #ifndef __amd64
  99   99  /* Used to track the current set of valid kernel selectors. */
 100  100  uint32_t        kdi_cs;
 101  101  uint32_t        kdi_ds;
 102  102  uint32_t        kdi_fs;
 103  103  uint32_t        kdi_gs;
 104  104  #endif
 105  105  
 106  106  uintptr_t       kdi_kernel_handler;
 107  107  
 108  108  int             kdi_trap_switch;
 109  109  
 110  110  #define KDI_MEMRANGES_MAX       2
 111  111  
 112  112  kdi_memrange_t  kdi_memranges[KDI_MEMRANGES_MAX];
 113  113  int             kdi_nmemranges;
 114  114  
 115  115  typedef void idt_hdlr_f(void);
 116  116  
 117  117  extern idt_hdlr_f kdi_trap0, kdi_trap1, kdi_int2, kdi_trap3, kdi_trap4;
 118  118  extern idt_hdlr_f kdi_trap5, kdi_trap6, kdi_trap7, kdi_trap9;
 119  119  extern idt_hdlr_f kdi_traperr10, kdi_traperr11, kdi_traperr12;
 120  120  extern idt_hdlr_f kdi_traperr13, kdi_traperr14, kdi_trap16, kdi_traperr17;
 121  121  extern idt_hdlr_f kdi_trap18, kdi_trap19, kdi_trap20, kdi_ivct32;
 122  122  extern idt_hdlr_f kdi_invaltrap;
 123  123  extern size_t kdi_ivct_size;
 124  124  
 125  125  typedef struct kdi_gate_spec {
 126  126          uint_t kgs_vec;
 127  127          uint_t kgs_dpl;
 128  128  } kdi_gate_spec_t;
 129  129  
 130  130  /*
 131  131   * Beware: kdi_pass_to_kernel() has unpleasant knowledge of this list.
 132  132   */
 133  133  static const kdi_gate_spec_t kdi_gate_specs[KDI_GATE_NVECS] = {
 134  134          { T_SGLSTP, TRP_KPL },
 135  135          { T_BPTFLT, TRP_UPL },
 136  136          { T_DBGENTR, TRP_KPL }
 137  137  };
 138  138  
 139  139  static gate_desc_t kdi_kgates[KDI_GATE_NVECS];
 140  140  
 141  141  extern gate_desc_t kdi_idt[NIDT];
 142  142  
 143  143  struct idt_description {
 144  144          uint_t id_low;
 145  145          uint_t id_high;
 146  146          idt_hdlr_f *id_basehdlr;
 147  147          size_t *id_incrp;
 148  148  } idt_description[] = {
 149  149          { T_ZERODIV, 0,         kdi_trap0, NULL },
 150  150          { T_SGLSTP, 0,          kdi_trap1, NULL },
 151  151          { T_NMIFLT, 0,          kdi_int2, NULL },
 152  152          { T_BPTFLT, 0,          kdi_trap3, NULL },
 153  153          { T_OVFLW, 0,           kdi_trap4, NULL },
 154  154          { T_BOUNDFLT, 0,        kdi_trap5, NULL },
 155  155          { T_ILLINST, 0,         kdi_trap6, NULL },
 156  156          { T_NOEXTFLT, 0,        kdi_trap7, NULL },
  
    | 
      ↓ open down ↓ | 
    156 lines elided | 
    
      ↑ open up ↑ | 
  
 157  157  #if !defined(__xpv)
 158  158          { T_DBLFLT, 0,          syserrtrap, NULL },
 159  159  #endif
 160  160          { T_EXTOVRFLT, 0,       kdi_trap9, NULL },
 161  161          { T_TSSFLT, 0,          kdi_traperr10, NULL },
 162  162          { T_SEGFLT, 0,          kdi_traperr11, NULL },
 163  163          { T_STKFLT, 0,          kdi_traperr12, NULL },
 164  164          { T_GPFLT, 0,           kdi_traperr13, NULL },
 165  165          { T_PGFLT, 0,           kdi_traperr14, NULL },
 166  166          { 15, 0,                kdi_invaltrap, NULL },
 167      -        { T_EXTERRFLT, 0,       kdi_trap16, NULL },
 168      -        { T_ALIGNMENT, 0,       kdi_traperr17, NULL },
      167 +        { T_EXTERRFLT, 0,       kdi_trap16, NULL },
      168 +        { T_ALIGNMENT, 0,       kdi_traperr17, NULL },
 169  169          { T_MCE, 0,             kdi_trap18, NULL },
 170  170          { T_SIMDFPE, 0,         kdi_trap19, NULL },
 171  171          { T_DBGENTR, 0,         kdi_trap20, NULL },
 172  172          { 21, 31,               kdi_invaltrap, NULL },
 173  173          { 32, 255,              kdi_ivct32, &kdi_ivct_size },
 174  174          { 0, 0, NULL },
 175  175  };
 176  176  
 177  177  void
 178  178  kdi_idt_init(selector_t sel)
 179  179  {
 180  180          struct idt_description *id;
 181  181          int i;
 182  182  
 183  183          for (id = idt_description; id->id_basehdlr != NULL; id++) {
 184  184                  uint_t high = id->id_high != 0 ? id->id_high : id->id_low;
 185  185                  size_t incr = id->id_incrp != NULL ? *id->id_incrp : 0;
 186  186  
 187  187  #if !defined(__xpv)
 188  188                  if (kpti_enable && sel == KCS_SEL && id->id_low == T_DBLFLT)
 189  189                          id->id_basehdlr = tr_syserrtrap;
 190  190  #endif
 191  191  
 192  192                  for (i = id->id_low; i <= high; i++) {
 193  193                          caddr_t hdlr = (caddr_t)id->id_basehdlr +
 194  194                              incr * (i - id->id_low);
 195  195                          set_gatesegd(&kdi_idt[i], (void (*)())hdlr, sel,
 196  196                              SDT_SYSIGT, TRP_KPL, IST_DBG);
 197  197                  }
 198  198          }
 199  199  }
 200  200  
 201  201  static void
 202  202  kdi_idt_gates_install(selector_t sel, int saveold)
 203  203  {
 204  204          gate_desc_t gates[KDI_GATE_NVECS];
 205  205          int i;
 206  206  
 207  207          bzero(gates, sizeof (*gates));
 208  208  
 209  209          for (i = 0; i < KDI_GATE_NVECS; i++) {
 210  210                  const kdi_gate_spec_t *gs = &kdi_gate_specs[i];
 211  211                  uintptr_t func = GATESEG_GETOFFSET(&kdi_idt[gs->kgs_vec]);
 212  212                  set_gatesegd(&gates[i], (void (*)())func, sel, SDT_SYSIGT,
 213  213                      gs->kgs_dpl, IST_DBG);
 214  214          }
 215  215  
 216  216          for (i = 0; i < KDI_GATE_NVECS; i++) {
 217  217                  uint_t vec = kdi_gate_specs[i].kgs_vec;
 218  218  
 219  219                  if (saveold)
 220  220                          kdi_kgates[i] = CPU->cpu_m.mcpu_idt[vec];
 221  221  
 222  222                  kdi_idt_write(&gates[i], vec);
 223  223          }
 224  224  }
 225  225  
 226  226  static void
 227  227  kdi_idt_gates_restore(void)
 228  228  {
 229  229          int i;
 230  230  
 231  231          for (i = 0; i < KDI_GATE_NVECS; i++)
 232  232                  kdi_idt_write(&kdi_kgates[i], kdi_gate_specs[i].kgs_vec);
 233  233  }
 234  234  
 235  235  /*
 236  236   * Called when we switch to the kernel's IDT.  We need to interpose on the
 237  237   * kernel's IDT entries and stop using KMDBCODE_SEL.
 238  238   */
 239  239  void
 240  240  kdi_idt_sync(void)
 241  241  {
 242  242          kdi_idt_init(KCS_SEL);
 243  243          kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
 244  244  }
 245  245  
 246  246  void
 247  247  kdi_update_drreg(kdi_drreg_t *drreg)
 248  248  {
 249  249          kdi_drreg = *drreg;
 250  250  }
 251  251  
 252  252  void
 253  253  kdi_memrange_add(caddr_t base, size_t len)
 254  254  {
 255  255          kdi_memrange_t *mr = &kdi_memranges[kdi_nmemranges];
 256  256  
 257  257          ASSERT(kdi_nmemranges != KDI_MEMRANGES_MAX);
 258  258  
 259  259          mr->mr_base = base;
 260  260          mr->mr_lim = base + len - 1;
 261  261          kdi_nmemranges++;
 262  262  }
 263  263  
 264  264  void
 265  265  kdi_idt_switch(kdi_cpusave_t *cpusave)
 266  266  {
 267  267          if (cpusave == NULL)
 268  268                  kdi_idtr_set(kdi_idt, sizeof (kdi_idt) - 1);
 269  269          else
 270  270                  kdi_idtr_set(cpusave->krs_idt, (sizeof (*idt0) * NIDT) - 1);
 271  271  }
 272  272  
 273  273  /*
 274  274   * Activation for CPUs other than the boot CPU, called from that CPU's
 275  275   * mp_startup().  We saved the kernel's descriptors when we initialized the
 276  276   * boot CPU, so we don't want to do it again.  Saving the handlers from this
 277  277   * CPU's IDT would actually be dangerous with the CPU initialization method in
 278  278   * use at the time of this writing.  With that method, the startup code creates
 279  279   * the IDTs for slave CPUs by copying the one used by the boot CPU, which has
 280  280   * already been interposed upon by KMDB.  Were we to interpose again, we'd
 281  281   * replace the kernel's descriptors with our own in the save area.  By not
 282  282   * saving, but still overwriting, we'll work in the current world, and in any
 283  283   * future world where the IDT is generated from scratch.
 284  284   */
 285  285  void
 286  286  kdi_cpu_init(void)
 287  287  {
 288  288          kdi_idt_gates_install(KCS_SEL, KDI_IDT_NOSAVE);
 289  289          /* Load the debug registers. */
 290  290          kdi_cpu_debug_init(&kdi_cpusave[CPU->cpu_id]);
 291  291  }
 292  292  
 293  293  /*
 294  294   * Activation for all CPUs for mod-loaded kmdb, i.e. a kmdb that wasn't
 295  295   * loaded at boot.
 296  296   */
 297  297  static int
 298  298  kdi_cpu_activate(void)
 299  299  {
 300  300          kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
 301  301          return (0);
 302  302  }
 303  303  
 304  304  void
 305  305  kdi_activate(kdi_main_t main, kdi_cpusave_t *cpusave, uint_t ncpusave)
 306  306  {
 307  307          int i;
 308  308          cpuset_t cpuset;
 309  309  
 310  310          CPUSET_ALL(cpuset);
 311  311  
 312  312          kdi_cpusave = cpusave;
 313  313          kdi_ncpusave = ncpusave;
 314  314  
 315  315          kdi_kmdb_main = main;
 316  316  
 317  317          for (i = 0; i < kdi_ncpusave; i++) {
 318  318                  kdi_cpusave[i].krs_cpu_id = i;
 319  319  
 320  320                  kdi_cpusave[i].krs_curcrumb =
 321  321                      &kdi_cpusave[i].krs_crumbs[KDI_NCRUMBS - 1];
 322  322                  kdi_cpusave[i].krs_curcrumbidx = KDI_NCRUMBS - 1;
 323  323          }
 324  324  
 325  325          if (boothowto & RB_KMDB)
 326  326                  kdi_idt_init(KMDBCODE_SEL);
 327  327          else
 328  328                  kdi_idt_init(KCS_SEL);
 329  329  
 330  330          /* The initial selector set.  Updated by the debugger-entry code */
 331  331  #ifndef __amd64
 332  332          kdi_cs = B32CODE_SEL;
 333  333          kdi_ds = kdi_fs = kdi_gs = B32DATA_SEL;
 334  334  #endif
 335  335  
 336  336          kdi_memranges[0].mr_base = kdi_segdebugbase;
 337  337          kdi_memranges[0].mr_lim = kdi_segdebugbase + kdi_segdebugsize - 1;
 338  338          kdi_nmemranges = 1;
 339  339  
 340  340          kdi_drreg.dr_ctl = KDIREG_DRCTL_RESERVED;
 341  341          kdi_drreg.dr_stat = KDIREG_DRSTAT_RESERVED;
 342  342  
 343  343          if (boothowto & RB_KMDB) {
 344  344                  kdi_idt_gates_install(KMDBCODE_SEL, KDI_IDT_NOSAVE);
 345  345          } else {
 346  346                  xc_call(0, 0, 0, CPUSET2BV(cpuset),
 347  347                      (xc_func_t)kdi_cpu_activate);
 348  348          }
 349  349  }
 350  350  
 351  351  static int
 352  352  kdi_cpu_deactivate(void)
 353  353  {
 354  354          kdi_idt_gates_restore();
 355  355          return (0);
 356  356  }
 357  357  
 358  358  void
  
    | 
      ↓ open down ↓ | 
    180 lines elided | 
    
      ↑ open up ↑ | 
  
 359  359  kdi_deactivate(void)
 360  360  {
 361  361          cpuset_t cpuset;
 362  362          CPUSET_ALL(cpuset);
 363  363  
 364  364          xc_call(0, 0, 0, CPUSET2BV(cpuset), (xc_func_t)kdi_cpu_deactivate);
 365  365          kdi_nmemranges = 0;
 366  366  }
 367  367  
 368  368  /*
 369      - * We receive all breakpoints and single step traps.  Some of them,
 370      - * including those from userland and those induced by DTrace providers,
 371      - * are intended for the kernel, and must be processed there.  We adopt
 372      - * this ours-until-proven-otherwise position due to the painful
 373      - * consequences of sending the kernel an unexpected breakpoint or
 374      - * single step.  Unless someone can prove to us that the kernel is
 375      - * prepared to handle the trap, we'll assume there's a problem and will
 376      - * give the user a chance to debug it.
      369 + * We receive all breakpoints and single step traps.  Some of them, including
      370 + * those from userland and those induced by DTrace providers, are intended for
      371 + * the kernel, and must be processed there.  We adopt this
      372 + * ours-until-proven-otherwise position due to the painful consequences of
      373 + * sending the kernel an unexpected breakpoint or single step.  Unless someone
      374 + * can prove to us that the kernel is prepared to handle the trap, we'll assume
      375 + * there's a problem and will give the user a chance to debug it.
      376 + *
      377 + * If we return 2, then the calling code should restore the trap-time %cr3: that
      378 + * is, it really is a kernel-originated trap.
 377  379   */
 378  380  int
 379  381  kdi_trap_pass(kdi_cpusave_t *cpusave)
 380  382  {
 381  383          greg_t tt = cpusave->krs_gregs[KDIREG_TRAPNO];
 382  384          greg_t pc = cpusave->krs_gregs[KDIREG_PC];
 383  385          greg_t cs = cpusave->krs_gregs[KDIREG_CS];
 384  386  
 385  387          if (USERMODE(cs))
 386  388                  return (1);
 387  389  
 388  390          if (tt != T_BPTFLT && tt != T_SGLSTP)
 389  391                  return (0);
 390  392  
 391  393          if (tt == T_BPTFLT && kdi_dtrace_get_state() ==
 392  394              KDI_DTSTATE_DTRACE_ACTIVE)
 393      -                return (1);
      395 +                return (2);
 394  396  
 395  397          /*
 396  398           * See the comments in the kernel's T_SGLSTP handler for why we need to
 397  399           * do this.
 398  400           */
 399  401  #if !defined(__xpv)
 400  402          if (tt == T_SGLSTP &&
 401  403              (pc == (greg_t)sys_sysenter || pc == (greg_t)brand_sys_sysenter ||
 402  404              pc == (greg_t)tr_sys_sysenter ||
 403  405              pc == (greg_t)tr_brand_sys_sysenter)) {
 404  406  #else
 405  407          if (tt == T_SGLSTP &&
 406  408              (pc == (greg_t)sys_sysenter || pc == (greg_t)brand_sys_sysenter)) {
 407  409  #endif
 408  410                  return (1);
 409  411          }
 410  412  
 411  413          return (0);
 412  414  }
 413  415  
 414  416  /*
 415  417   * State has been saved, and all CPUs are on the CPU-specific stacks.  All
 416  418   * CPUs enter here, and head off into the debugger proper.
 417  419   */
 418  420  void
 419  421  kdi_debugger_entry(kdi_cpusave_t *cpusave)
 420  422  {
 421  423          /*
 422  424           * BPTFLT gives us control with %eip set to the instruction *after*
 423  425           * the int 3.  Back it off, so we're looking at the instruction that
 424  426           * triggered the fault.
 425  427           */
 426  428          if (cpusave->krs_gregs[KDIREG_TRAPNO] == T_BPTFLT)
 427  429                  cpusave->krs_gregs[KDIREG_PC]--;
 428  430  
 429  431          kdi_kmdb_main(cpusave);
 430  432  }
  
    | 
      ↓ open down ↓ | 
    27 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX