8956 Implement KPTI
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
9210 remove KMDB branch debugging support
9211 ::crregs could do with cr2/cr3 support
9209 ::ttrace should be able to filter by thread
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Yuri Pankov <yuripv@yuripv.net>
9207 kdi_idt: Cast GATESEG_GETOFFSET through uintptr_t
Reviewed by: Yuri Pankov <yuripv@yuripv.net>

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.


  24  */
  25 
  26 /*
  27  * Management of KMDB's IDT, which is installed upon KMDB activation.
  28  *
  29  * Debugger activation has two flavors, which cover the cases where KMDB is
  30  * loaded at boot, and when it is loaded after boot.  In brief, in both cases,
  31  * the KDI needs to interpose upon several handlers in the IDT.  When
  32  * mod-loaded KMDB is deactivated, we undo the IDT interposition, restoring the
  33  * handlers to what they were before we started.
  34  *
  35  * We also take over the entirety of IDT (except the double-fault handler) on
  36  * the active CPU when we're in kmdb so we can handle things like page faults
  37  * sensibly.
  38  *
  39  * Boot-loaded KMDB
  40  *
  41  * When we're first activated, we're running on boot's IDT.  We need to be able
  42  * to function in this world, so we'll install our handlers into boot's IDT.
  43  * This is a little complicated: we're using the fake cpu_t set up by
  44  * boot_kdi_tmpinit(), so we can't access cpu_idt directly.  Instead,
  45  * kdi_idt_write() notices that cpu_idt is NULL, and works around this problem.
  46  *
  47  * Later, when we're about to switch to the kernel's IDT, it'll call us via
  48  * kdi_idt_sync(), allowing us to add our handlers to the new IDT.  While
  49  * boot-loaded KMDB can't be unloaded, we still need to save the descriptors we
  50  * replace so we can pass traps back to the kernel as necessary.
  51  *
  52  * The last phase of boot-loaded KMDB activation occurs at non-boot CPU
  53  * startup.  We will be called on each non-boot CPU, thus allowing us to set up
  54  * any watchpoints that may have been configured on the boot CPU and interpose
  55  * on the given CPU's IDT.  We don't save the interposed descriptors in this
  56  * case -- see kdi_cpu_init() for details.
  57  *
  58  * Mod-loaded KMDB
  59  *
  60  * This style of activation is much simpler, as the CPUs are already running,
  61  * and are using their own copy of the kernel's IDT.  We simply interpose upon
  62  * each CPU's IDT.  We save the handlers we replace, both for deactivation and
  63  * for passing traps back to the kernel.  Note that for the hypervisors'
  64  * benefit, we need to xcall to the other CPUs to do this, since we need to
  65  * actively set the trap entries in its virtual IDT from that vcpu's context
  66  * rather than just modifying the IDT table from the CPU running kdi_activate().
  67  */
  68 
  69 #include <sys/types.h>
  70 #include <sys/segments.h>
  71 #include <sys/trap.h>
  72 #include <sys/cpuvar.h>
  73 #include <sys/reboot.h>
  74 #include <sys/sunddi.h>
  75 #include <sys/archsystm.h>
  76 #include <sys/kdi_impl.h>
  77 #include <sys/x_call.h>
  78 #include <ia32/sys/psw.h>

  79 
  80 #define KDI_GATE_NVECS  3
  81 
  82 #define KDI_IDT_NOSAVE  0
  83 #define KDI_IDT_SAVE    1
  84 
  85 #define KDI_IDT_DTYPE_KERNEL    0
  86 #define KDI_IDT_DTYPE_BOOT      1
  87 
  88 kdi_cpusave_t *kdi_cpusave;
  89 int kdi_ncpusave;
  90 
  91 static kdi_main_t kdi_kmdb_main;
  92 
  93 kdi_drreg_t kdi_drreg;
  94 
  95 #ifndef __amd64
  96 /* Used to track the current set of valid kernel selectors. */
  97 uint32_t        kdi_cs;
  98 uint32_t        kdi_ds;
  99 uint32_t        kdi_fs;
 100 uint32_t        kdi_gs;
 101 #endif
 102 
 103 uint_t          kdi_msr_wrexit_msr;
 104 uint64_t        *kdi_msr_wrexit_valp;
 105 
 106 uintptr_t       kdi_kernel_handler;
 107 
 108 int             kdi_trap_switch;
 109 
 110 #define KDI_MEMRANGES_MAX       2
 111 
 112 kdi_memrange_t  kdi_memranges[KDI_MEMRANGES_MAX];
 113 int             kdi_nmemranges;
 114 
 115 typedef void idt_hdlr_f(void);
 116 
 117 extern idt_hdlr_f kdi_trap0, kdi_trap1, kdi_int2, kdi_trap3, kdi_trap4;
 118 extern idt_hdlr_f kdi_trap5, kdi_trap6, kdi_trap7, kdi_trap9;
 119 extern idt_hdlr_f kdi_traperr10, kdi_traperr11, kdi_traperr12;
 120 extern idt_hdlr_f kdi_traperr13, kdi_traperr14, kdi_trap16, kdi_trap17;
 121 extern idt_hdlr_f kdi_trap18, kdi_trap19, kdi_trap20, kdi_ivct32;
 122 extern idt_hdlr_f kdi_invaltrap;
 123 extern size_t kdi_ivct_size;
 124 extern char kdi_slave_entry_patch;
 125 
 126 typedef struct kdi_gate_spec {
 127         uint_t kgs_vec;
 128         uint_t kgs_dpl;
 129 } kdi_gate_spec_t;
 130 
 131 /*
 132  * Beware: kdi_pass_to_kernel() has unpleasant knowledge of this list.
 133  */
 134 static const kdi_gate_spec_t kdi_gate_specs[KDI_GATE_NVECS] = {
 135         { T_SGLSTP, TRP_KPL },
 136         { T_BPTFLT, TRP_UPL },
 137         { T_DBGENTR, TRP_KPL }
 138 };
 139 
 140 static gate_desc_t kdi_kgates[KDI_GATE_NVECS];
 141 
 142 gate_desc_t kdi_idt[NIDT];
 143 
 144 struct idt_description {
 145         uint_t id_low;
 146         uint_t id_high;
 147         idt_hdlr_f *id_basehdlr;
 148         size_t *id_incrp;
 149 } idt_description[] = {
 150         { T_ZERODIV, 0,         kdi_trap0, NULL },
 151         { T_SGLSTP, 0,          kdi_trap1, NULL },
 152         { T_NMIFLT, 0,          kdi_int2, NULL },
 153         { T_BPTFLT, 0,          kdi_trap3, NULL },
 154         { T_OVFLW, 0,           kdi_trap4, NULL },
 155         { T_BOUNDFLT, 0,        kdi_trap5, NULL },
 156         { T_ILLINST, 0,         kdi_trap6, NULL },
 157         { T_NOEXTFLT, 0,        kdi_trap7, NULL },
 158 #if !defined(__xpv)
 159         { T_DBLFLT, 0,          syserrtrap, NULL },
 160 #endif
 161         { T_EXTOVRFLT, 0,       kdi_trap9, NULL },
 162         { T_TSSFLT, 0,          kdi_traperr10, NULL },
 163         { T_SEGFLT, 0,          kdi_traperr11, NULL },
 164         { T_STKFLT, 0,          kdi_traperr12, NULL },
 165         { T_GPFLT, 0,           kdi_traperr13, NULL },
 166         { T_PGFLT, 0,           kdi_traperr14, NULL },
 167         { 15, 0,                kdi_invaltrap, NULL },
 168         { T_EXTERRFLT, 0,       kdi_trap16, NULL },
 169         { T_ALIGNMENT, 0,       kdi_trap17, NULL },
 170         { T_MCE, 0,             kdi_trap18, NULL },
 171         { T_SIMDFPE, 0,         kdi_trap19, NULL },
 172         { T_DBGENTR, 0,         kdi_trap20, NULL },
 173         { 21, 31,               kdi_invaltrap, NULL },
 174         { 32, 255,              kdi_ivct32, &kdi_ivct_size },
 175         { 0, 0, NULL },
 176 };
 177 
 178 void
 179 kdi_idt_init(selector_t sel)
 180 {
 181         struct idt_description *id;
 182         int i;
 183 
 184         for (id = idt_description; id->id_basehdlr != NULL; id++) {
 185                 uint_t high = id->id_high != 0 ? id->id_high : id->id_low;
 186                 size_t incr = id->id_incrp != NULL ? *id->id_incrp : 0;
 187 





 188                 for (i = id->id_low; i <= high; i++) {
 189                         caddr_t hdlr = (caddr_t)id->id_basehdlr +
 190                             incr * (i - id->id_low);
 191                         set_gatesegd(&kdi_idt[i], (void (*)())hdlr, sel,
 192                             SDT_SYSIGT, TRP_KPL, i);
 193                 }
 194         }
 195 }
 196 
 197 /*
 198  * Patch caller-provided code into the debugger's IDT handlers.  This code is
 199  * used to save MSRs that must be saved before the first branch.  All handlers
 200  * are essentially the same, and end with a branch to kdi_cmnint.  To save the
 201  * MSR, we need to patch in before the branch.  The handlers have the following
 202  * structure: KDI_MSR_PATCHOFF bytes of code, KDI_MSR_PATCHSZ bytes of
 203  * patchable space, followed by more code.
 204  */
 205 void
 206 kdi_idt_patch(caddr_t code, size_t sz)
 207 {
 208         int i;
 209 
 210         ASSERT(sz <= KDI_MSR_PATCHSZ);
 211 
 212         for (i = 0; i < sizeof (kdi_idt) / sizeof (struct gate_desc); i++) {
 213                 gate_desc_t *gd;
 214                 uchar_t *patch;
 215 
 216                 if (i == T_DBLFLT)
 217                         continue;       /* uses kernel's handler */
 218 
 219                 gd = &kdi_idt[i];
 220                 patch = (uchar_t *)GATESEG_GETOFFSET(gd) + KDI_MSR_PATCHOFF;
 221 
 222                 /*
 223                  * We can't ASSERT that there's a nop here, because this may be
 224                  * a debugger restart.  In that case, we're copying the new
 225                  * patch point over the old one.
 226                  */
 227                 /* FIXME: dtrace fbt ... */
 228                 bcopy(code, patch, sz);
 229 
 230                 /* Fill the rest with nops to be sure */
 231                 while (sz < KDI_MSR_PATCHSZ)
 232                         patch[sz++] = 0x90; /* nop */
 233         }
 234 }
 235 
 236 static void
 237 kdi_idt_gates_install(selector_t sel, int saveold)
 238 {
 239         gate_desc_t gates[KDI_GATE_NVECS];
 240         int i;
 241 
 242         bzero(gates, sizeof (*gates));
 243 
 244         for (i = 0; i < KDI_GATE_NVECS; i++) {
 245                 const kdi_gate_spec_t *gs = &kdi_gate_specs[i];
 246                 uintptr_t func = GATESEG_GETOFFSET(&kdi_idt[gs->kgs_vec]);
 247                 set_gatesegd(&gates[i], (void (*)())func, sel, SDT_SYSIGT,
 248                     gs->kgs_dpl, gs->kgs_vec);
 249         }
 250 
 251         for (i = 0; i < KDI_GATE_NVECS; i++) {
 252                 uint_t vec = kdi_gate_specs[i].kgs_vec;
 253 
 254                 if (saveold)
 255                         kdi_kgates[i] = CPU->cpu_m.mcpu_idt[vec];
 256 
 257                 kdi_idt_write(&gates[i], vec);
 258         }
 259 }
 260 
 261 static void
 262 kdi_idt_gates_restore(void)
 263 {
 264         int i;
 265 
 266         for (i = 0; i < KDI_GATE_NVECS; i++)
 267                 kdi_idt_write(&kdi_kgates[i], kdi_gate_specs[i].kgs_vec);
 268 }
 269 
 270 /*
 271  * Called when we switch to the kernel's IDT.  We need to interpose on the
 272  * kernel's IDT entries and stop using KMDBCODE_SEL.
 273  */
 274 void
 275 kdi_idt_sync(void)
 276 {
 277         kdi_idt_init(KCS_SEL);
 278         kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
 279 }
 280 
 281 /*
 282  * On some processors, we'll need to clear a certain MSR before proceeding into
 283  * the debugger.  Complicating matters, this MSR must be cleared before we take
 284  * any branches.  We have patch points in every trap handler, which will cover
 285  * all entry paths for master CPUs.  We also have a patch point in the slave
 286  * entry code.
 287  */
 288 static void
 289 kdi_msr_add_clrentry(uint_t msr)
 290 {
 291 #ifdef __amd64
 292         uchar_t code[] = {
 293                 0x51, 0x50, 0x52,               /* pushq %rcx, %rax, %rdx */
 294                 0xb9, 0x00, 0x00, 0x00, 0x00,   /* movl $MSRNUM, %ecx */
 295                 0x31, 0xc0,                     /* clr %eax */
 296                 0x31, 0xd2,                     /* clr %edx */
 297                 0x0f, 0x30,                     /* wrmsr */
 298                 0x5a, 0x58, 0x59                /* popq %rdx, %rax, %rcx */
 299         };
 300         uchar_t *patch = &code[4];
 301 #else
 302         uchar_t code[] = {
 303                 0x60,                           /* pushal */
 304                 0xb9, 0x00, 0x00, 0x00, 0x00,   /* movl $MSRNUM, %ecx */
 305                 0x31, 0xc0,                     /* clr %eax */
 306                 0x31, 0xd2,                     /* clr %edx */
 307                 0x0f, 0x30,                     /* wrmsr */
 308                 0x61                            /* popal */
 309         };
 310         uchar_t *patch = &code[2];
 311 #endif
 312 
 313         bcopy(&msr, patch, sizeof (uint32_t));
 314 
 315         kdi_idt_patch((caddr_t)code, sizeof (code));
 316 
 317         bcopy(code, &kdi_slave_entry_patch, sizeof (code));
 318 }
 319 
 320 static void
 321 kdi_msr_add_wrexit(uint_t msr, uint64_t *valp)
 322 {
 323         kdi_msr_wrexit_msr = msr;
 324         kdi_msr_wrexit_valp = valp;
 325 }
 326 
 327 void
 328 kdi_set_debug_msrs(kdi_msr_t *msrs)
 329 {
 330         int nmsrs, i;
 331 
 332         ASSERT(kdi_cpusave[0].krs_msr == NULL);
 333 
 334         /* Look in CPU0's MSRs for any special MSRs. */
 335         for (nmsrs = 0; msrs[nmsrs].msr_num != 0; nmsrs++) {
 336                 switch (msrs[nmsrs].msr_type) {
 337                 case KDI_MSR_CLEARENTRY:
 338                         kdi_msr_add_clrentry(msrs[nmsrs].msr_num);
 339                         break;
 340 
 341                 case KDI_MSR_WRITEDELAY:
 342                         kdi_msr_add_wrexit(msrs[nmsrs].msr_num,
 343                             msrs[nmsrs].kdi_msr_valp);
 344                         break;
 345                 }
 346         }
 347 
 348         nmsrs++;
 349 
 350         for (i = 0; i < kdi_ncpusave; i++)
 351                 kdi_cpusave[i].krs_msr = &msrs[nmsrs * i];
 352 }
 353 
 354 void
 355 kdi_update_drreg(kdi_drreg_t *drreg)
 356 {
 357         kdi_drreg = *drreg;
 358 }
 359 
 360 void
 361 kdi_memrange_add(caddr_t base, size_t len)
 362 {
 363         kdi_memrange_t *mr = &kdi_memranges[kdi_nmemranges];
 364 
 365         ASSERT(kdi_nmemranges != KDI_MEMRANGES_MAX);
 366 
 367         mr->mr_base = base;
 368         mr->mr_lim = base + len - 1;
 369         kdi_nmemranges++;
 370 }
 371 
 372 void
 373 kdi_idt_switch(kdi_cpusave_t *cpusave)
 374 {
 375         if (cpusave == NULL)
 376                 kdi_idtr_set(kdi_idt, sizeof (kdi_idt) - 1);
 377         else
 378                 kdi_idtr_set(cpusave->krs_idt, (sizeof (*idt0) * NIDT) - 1);
 379 }
 380 
 381 /*
 382  * Activation for CPUs other than the boot CPU, called from that CPU's
 383  * mp_startup().  We saved the kernel's descriptors when we initialized the
 384  * boot CPU, so we don't want to do it again.  Saving the handlers from this
 385  * CPU's IDT would actually be dangerous with the CPU initialization method in
 386  * use at the time of this writing.  With that method, the startup code creates
 387  * the IDTs for slave CPUs by copying the one used by the boot CPU, which has
 388  * already been interposed upon by KMDB.  Were we to interpose again, we'd
 389  * replace the kernel's descriptors with our own in the save area.  By not
 390  * saving, but still overwriting, we'll work in the current world, and in any
 391  * future world where the IDT is generated from scratch.
 392  */
 393 void
 394 kdi_cpu_init(void)
 395 {
 396         kdi_idt_gates_install(KCS_SEL, KDI_IDT_NOSAVE);
 397         /* Load the debug registers and MSRs */
 398         kdi_cpu_debug_init(&kdi_cpusave[CPU->cpu_id]);
 399 }
 400 
 401 /*
 402  * Activation for all CPUs for mod-loaded kmdb, i.e. a kmdb that wasn't
 403  * loaded at boot.
 404  */
 405 static int
 406 kdi_cpu_activate(void)
 407 {
 408         kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
 409         return (0);
 410 }
 411 
 412 void
 413 kdi_activate(kdi_main_t main, kdi_cpusave_t *cpusave, uint_t ncpusave)
 414 {
 415         int i;
 416         cpuset_t cpuset;
 417 
 418         CPUSET_ALL(cpuset);
 419 
 420         kdi_cpusave = cpusave;
 421         kdi_ncpusave = ncpusave;
 422 
 423         kdi_kmdb_main = main;
 424 
 425         for (i = 0; i < kdi_ncpusave; i++) {
 426                 kdi_cpusave[i].krs_cpu_id = i;
 427 
 428                 kdi_cpusave[i].krs_curcrumb =
 429                     &kdi_cpusave[i].krs_crumbs[KDI_NCRUMBS - 1];
 430                 kdi_cpusave[i].krs_curcrumbidx = KDI_NCRUMBS - 1;
 431         }
 432 
 433         if (boothowto & RB_KMDB)
 434                 kdi_idt_init(KMDBCODE_SEL);
 435         else
 436                 kdi_idt_init(KCS_SEL);
 437 
 438         /* The initial selector set.  Updated by the debugger-entry code */
 439 #ifndef __amd64
 440         kdi_cs = B32CODE_SEL;
 441         kdi_ds = kdi_fs = kdi_gs = B32DATA_SEL;
 442 #endif
 443 
 444         kdi_memranges[0].mr_base = kdi_segdebugbase;
 445         kdi_memranges[0].mr_lim = kdi_segdebugbase + kdi_segdebugsize - 1;
 446         kdi_nmemranges = 1;
 447 
 448         kdi_drreg.dr_ctl = KDIREG_DRCTL_RESERVED;
 449         kdi_drreg.dr_stat = KDIREG_DRSTAT_RESERVED;
 450 
 451         kdi_msr_wrexit_msr = 0;
 452         kdi_msr_wrexit_valp = NULL;
 453 
 454         if (boothowto & RB_KMDB) {
 455                 kdi_idt_gates_install(KMDBCODE_SEL, KDI_IDT_NOSAVE);
 456         } else {
 457                 xc_call(0, 0, 0, CPUSET2BV(cpuset),
 458                     (xc_func_t)kdi_cpu_activate);
 459         }
 460 }
 461 
 462 static int
 463 kdi_cpu_deactivate(void)
 464 {
 465         kdi_idt_gates_restore();
 466         return (0);
 467 }
 468 
 469 void
 470 kdi_deactivate(void)
 471 {
 472         cpuset_t cpuset;
 473         CPUSET_ALL(cpuset);
 474 
 475         xc_call(0, 0, 0, CPUSET2BV(cpuset), (xc_func_t)kdi_cpu_deactivate);
 476         kdi_nmemranges = 0;
 477 }
 478 
 479 /*
 480  * We receive all breakpoints and single step traps.  Some of them,
 481  * including those from userland and those induced by DTrace providers,
 482  * are intended for the kernel, and must be processed there.  We adopt
 483  * this ours-until-proven-otherwise position due to the painful
 484  * consequences of sending the kernel an unexpected breakpoint or
 485  * single step.  Unless someone can prove to us that the kernel is
 486  * prepared to handle the trap, we'll assume there's a problem and will
 487  * give the user a chance to debug it.
 488  */
 489 int
 490 kdi_trap_pass(kdi_cpusave_t *cpusave)
 491 {
 492         greg_t tt = cpusave->krs_gregs[KDIREG_TRAPNO];
 493         greg_t pc = cpusave->krs_gregs[KDIREG_PC];
 494         greg_t cs = cpusave->krs_gregs[KDIREG_CS];
 495 
 496         if (USERMODE(cs))
 497                 return (1);
 498 
 499         if (tt != T_BPTFLT && tt != T_SGLSTP)
 500                 return (0);
 501 
 502         if (tt == T_BPTFLT && kdi_dtrace_get_state() ==
 503             KDI_DTSTATE_DTRACE_ACTIVE)
 504                 return (1);
 505 
 506         /*
 507          * See the comments in the kernel's T_SGLSTP handler for why we need to
 508          * do this.
 509          */

 510         if (tt == T_SGLSTP &&
 511             (pc == (greg_t)sys_sysenter || pc == (greg_t)brand_sys_sysenter))






 512                 return (1);

 513 
 514         return (0);
 515 }
 516 
 517 /*
 518  * State has been saved, and all CPUs are on the CPU-specific stacks.  All
 519  * CPUs enter here, and head off into the debugger proper.
 520  */
 521 void
 522 kdi_debugger_entry(kdi_cpusave_t *cpusave)
 523 {
 524         /*
 525          * BPTFLT gives us control with %eip set to the instruction *after*
 526          * the int 3.  Back it off, so we're looking at the instruction that
 527          * triggered the fault.
 528          */
 529         if (cpusave->krs_gregs[KDIREG_TRAPNO] == T_BPTFLT)
 530                 cpusave->krs_gregs[KDIREG_PC]--;
 531 
 532         kdi_kmdb_main(cpusave);
 533 }
--- EOF ---