8956 Implement KPTI Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> 9210 remove KMDB branch debugging support 9211 ::crregs could do with cr2/cr3 support 9209 ::ttrace should be able to filter by thread Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Yuri Pankov <yuripv@yuripv.net> 9207 kdi_idt: Cast GATESEG_GETOFFSET through uintptr_t Reviewed by: Yuri Pankov <yuripv@yuripv.net>
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Management of KMDB's IDT, which is installed upon KMDB activation. 28 * 29 * Debugger activation has two flavors, which cover the cases where KMDB is 30 * loaded at boot, and when it is loaded after boot. In brief, in both cases, 31 * the KDI needs to interpose upon several handlers in the IDT. When 32 * mod-loaded KMDB is deactivated, we undo the IDT interposition, restoring the 33 * handlers to what they were before we started. 34 * 35 * We also take over the entirety of IDT (except the double-fault handler) on 36 * the active CPU when we're in kmdb so we can handle things like page faults 37 * sensibly. 38 * 39 * Boot-loaded KMDB 40 * 41 * When we're first activated, we're running on boot's IDT. We need to be able 42 * to function in this world, so we'll install our handlers into boot's IDT. 43 * This is a little complicated: we're using the fake cpu_t set up by 44 * boot_kdi_tmpinit(), so we can't access cpu_idt directly. Instead, 45 * kdi_idt_write() notices that cpu_idt is NULL, and works around this problem. 46 * 47 * Later, when we're about to switch to the kernel's IDT, it'll call us via 48 * kdi_idt_sync(), allowing us to add our handlers to the new IDT. While 49 * boot-loaded KMDB can't be unloaded, we still need to save the descriptors we 50 * replace so we can pass traps back to the kernel as necessary. 51 * 52 * The last phase of boot-loaded KMDB activation occurs at non-boot CPU 53 * startup. We will be called on each non-boot CPU, thus allowing us to set up 54 * any watchpoints that may have been configured on the boot CPU and interpose 55 * on the given CPU's IDT. We don't save the interposed descriptors in this 56 * case -- see kdi_cpu_init() for details. 57 * 58 * Mod-loaded KMDB 59 * 60 * This style of activation is much simpler, as the CPUs are already running, 61 * and are using their own copy of the kernel's IDT. We simply interpose upon 62 * each CPU's IDT. We save the handlers we replace, both for deactivation and 63 * for passing traps back to the kernel. Note that for the hypervisors' 64 * benefit, we need to xcall to the other CPUs to do this, since we need to 65 * actively set the trap entries in its virtual IDT from that vcpu's context 66 * rather than just modifying the IDT table from the CPU running kdi_activate(). 67 */ 68 69 #include <sys/types.h> 70 #include <sys/segments.h> 71 #include <sys/trap.h> 72 #include <sys/cpuvar.h> 73 #include <sys/reboot.h> 74 #include <sys/sunddi.h> 75 #include <sys/archsystm.h> 76 #include <sys/kdi_impl.h> 77 #include <sys/x_call.h> 78 #include <ia32/sys/psw.h> 79 80 #define KDI_GATE_NVECS 3 81 82 #define KDI_IDT_NOSAVE 0 83 #define KDI_IDT_SAVE 1 84 85 #define KDI_IDT_DTYPE_KERNEL 0 86 #define KDI_IDT_DTYPE_BOOT 1 87 88 kdi_cpusave_t *kdi_cpusave; 89 int kdi_ncpusave; 90 91 static kdi_main_t kdi_kmdb_main; 92 93 kdi_drreg_t kdi_drreg; 94 95 #ifndef __amd64 96 /* Used to track the current set of valid kernel selectors. */ 97 uint32_t kdi_cs; 98 uint32_t kdi_ds; 99 uint32_t kdi_fs; 100 uint32_t kdi_gs; 101 #endif 102 103 uint_t kdi_msr_wrexit_msr; 104 uint64_t *kdi_msr_wrexit_valp; 105 106 uintptr_t kdi_kernel_handler; 107 108 int kdi_trap_switch; 109 110 #define KDI_MEMRANGES_MAX 2 111 112 kdi_memrange_t kdi_memranges[KDI_MEMRANGES_MAX]; 113 int kdi_nmemranges; 114 115 typedef void idt_hdlr_f(void); 116 117 extern idt_hdlr_f kdi_trap0, kdi_trap1, kdi_int2, kdi_trap3, kdi_trap4; 118 extern idt_hdlr_f kdi_trap5, kdi_trap6, kdi_trap7, kdi_trap9; 119 extern idt_hdlr_f kdi_traperr10, kdi_traperr11, kdi_traperr12; 120 extern idt_hdlr_f kdi_traperr13, kdi_traperr14, kdi_trap16, kdi_trap17; 121 extern idt_hdlr_f kdi_trap18, kdi_trap19, kdi_trap20, kdi_ivct32; 122 extern idt_hdlr_f kdi_invaltrap; 123 extern size_t kdi_ivct_size; 124 extern char kdi_slave_entry_patch; 125 126 typedef struct kdi_gate_spec { 127 uint_t kgs_vec; 128 uint_t kgs_dpl; 129 } kdi_gate_spec_t; 130 131 /* 132 * Beware: kdi_pass_to_kernel() has unpleasant knowledge of this list. 133 */ 134 static const kdi_gate_spec_t kdi_gate_specs[KDI_GATE_NVECS] = { 135 { T_SGLSTP, TRP_KPL }, 136 { T_BPTFLT, TRP_UPL }, 137 { T_DBGENTR, TRP_KPL } 138 }; 139 140 static gate_desc_t kdi_kgates[KDI_GATE_NVECS]; 141 142 gate_desc_t kdi_idt[NIDT]; 143 144 struct idt_description { 145 uint_t id_low; 146 uint_t id_high; 147 idt_hdlr_f *id_basehdlr; 148 size_t *id_incrp; 149 } idt_description[] = { 150 { T_ZERODIV, 0, kdi_trap0, NULL }, 151 { T_SGLSTP, 0, kdi_trap1, NULL }, 152 { T_NMIFLT, 0, kdi_int2, NULL }, 153 { T_BPTFLT, 0, kdi_trap3, NULL }, 154 { T_OVFLW, 0, kdi_trap4, NULL }, 155 { T_BOUNDFLT, 0, kdi_trap5, NULL }, 156 { T_ILLINST, 0, kdi_trap6, NULL }, 157 { T_NOEXTFLT, 0, kdi_trap7, NULL }, 158 #if !defined(__xpv) 159 { T_DBLFLT, 0, syserrtrap, NULL }, 160 #endif 161 { T_EXTOVRFLT, 0, kdi_trap9, NULL }, 162 { T_TSSFLT, 0, kdi_traperr10, NULL }, 163 { T_SEGFLT, 0, kdi_traperr11, NULL }, 164 { T_STKFLT, 0, kdi_traperr12, NULL }, 165 { T_GPFLT, 0, kdi_traperr13, NULL }, 166 { T_PGFLT, 0, kdi_traperr14, NULL }, 167 { 15, 0, kdi_invaltrap, NULL }, 168 { T_EXTERRFLT, 0, kdi_trap16, NULL }, 169 { T_ALIGNMENT, 0, kdi_trap17, NULL }, 170 { T_MCE, 0, kdi_trap18, NULL }, 171 { T_SIMDFPE, 0, kdi_trap19, NULL }, 172 { T_DBGENTR, 0, kdi_trap20, NULL }, 173 { 21, 31, kdi_invaltrap, NULL }, 174 { 32, 255, kdi_ivct32, &kdi_ivct_size }, 175 { 0, 0, NULL }, 176 }; 177 178 void 179 kdi_idt_init(selector_t sel) 180 { 181 struct idt_description *id; 182 int i; 183 184 for (id = idt_description; id->id_basehdlr != NULL; id++) { 185 uint_t high = id->id_high != 0 ? id->id_high : id->id_low; 186 size_t incr = id->id_incrp != NULL ? *id->id_incrp : 0; 187 188 for (i = id->id_low; i <= high; i++) { 189 caddr_t hdlr = (caddr_t)id->id_basehdlr + 190 incr * (i - id->id_low); 191 set_gatesegd(&kdi_idt[i], (void (*)())hdlr, sel, 192 SDT_SYSIGT, TRP_KPL, i); 193 } 194 } 195 } 196 197 /* 198 * Patch caller-provided code into the debugger's IDT handlers. This code is 199 * used to save MSRs that must be saved before the first branch. All handlers 200 * are essentially the same, and end with a branch to kdi_cmnint. To save the 201 * MSR, we need to patch in before the branch. The handlers have the following 202 * structure: KDI_MSR_PATCHOFF bytes of code, KDI_MSR_PATCHSZ bytes of 203 * patchable space, followed by more code. 204 */ 205 void 206 kdi_idt_patch(caddr_t code, size_t sz) 207 { 208 int i; 209 210 ASSERT(sz <= KDI_MSR_PATCHSZ); 211 212 for (i = 0; i < sizeof (kdi_idt) / sizeof (struct gate_desc); i++) { 213 gate_desc_t *gd; 214 uchar_t *patch; 215 216 if (i == T_DBLFLT) 217 continue; /* uses kernel's handler */ 218 219 gd = &kdi_idt[i]; 220 patch = (uchar_t *)GATESEG_GETOFFSET(gd) + KDI_MSR_PATCHOFF; 221 222 /* 223 * We can't ASSERT that there's a nop here, because this may be 224 * a debugger restart. In that case, we're copying the new 225 * patch point over the old one. 226 */ 227 /* FIXME: dtrace fbt ... */ 228 bcopy(code, patch, sz); 229 230 /* Fill the rest with nops to be sure */ 231 while (sz < KDI_MSR_PATCHSZ) 232 patch[sz++] = 0x90; /* nop */ 233 } 234 } 235 236 static void 237 kdi_idt_gates_install(selector_t sel, int saveold) 238 { 239 gate_desc_t gates[KDI_GATE_NVECS]; 240 int i; 241 242 bzero(gates, sizeof (*gates)); 243 244 for (i = 0; i < KDI_GATE_NVECS; i++) { 245 const kdi_gate_spec_t *gs = &kdi_gate_specs[i]; 246 uintptr_t func = GATESEG_GETOFFSET(&kdi_idt[gs->kgs_vec]); 247 set_gatesegd(&gates[i], (void (*)())func, sel, SDT_SYSIGT, 248 gs->kgs_dpl, gs->kgs_vec); 249 } 250 251 for (i = 0; i < KDI_GATE_NVECS; i++) { 252 uint_t vec = kdi_gate_specs[i].kgs_vec; 253 254 if (saveold) 255 kdi_kgates[i] = CPU->cpu_m.mcpu_idt[vec]; 256 257 kdi_idt_write(&gates[i], vec); 258 } 259 } 260 261 static void 262 kdi_idt_gates_restore(void) 263 { 264 int i; 265 266 for (i = 0; i < KDI_GATE_NVECS; i++) 267 kdi_idt_write(&kdi_kgates[i], kdi_gate_specs[i].kgs_vec); 268 } 269 270 /* 271 * Called when we switch to the kernel's IDT. We need to interpose on the 272 * kernel's IDT entries and stop using KMDBCODE_SEL. 273 */ 274 void 275 kdi_idt_sync(void) 276 { 277 kdi_idt_init(KCS_SEL); 278 kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE); 279 } 280 281 /* 282 * On some processors, we'll need to clear a certain MSR before proceeding into 283 * the debugger. Complicating matters, this MSR must be cleared before we take 284 * any branches. We have patch points in every trap handler, which will cover 285 * all entry paths for master CPUs. We also have a patch point in the slave 286 * entry code. 287 */ 288 static void 289 kdi_msr_add_clrentry(uint_t msr) 290 { 291 #ifdef __amd64 292 uchar_t code[] = { 293 0x51, 0x50, 0x52, /* pushq %rcx, %rax, %rdx */ 294 0xb9, 0x00, 0x00, 0x00, 0x00, /* movl $MSRNUM, %ecx */ 295 0x31, 0xc0, /* clr %eax */ 296 0x31, 0xd2, /* clr %edx */ 297 0x0f, 0x30, /* wrmsr */ 298 0x5a, 0x58, 0x59 /* popq %rdx, %rax, %rcx */ 299 }; 300 uchar_t *patch = &code[4]; 301 #else 302 uchar_t code[] = { 303 0x60, /* pushal */ 304 0xb9, 0x00, 0x00, 0x00, 0x00, /* movl $MSRNUM, %ecx */ 305 0x31, 0xc0, /* clr %eax */ 306 0x31, 0xd2, /* clr %edx */ 307 0x0f, 0x30, /* wrmsr */ 308 0x61 /* popal */ 309 }; 310 uchar_t *patch = &code[2]; 311 #endif 312 313 bcopy(&msr, patch, sizeof (uint32_t)); 314 315 kdi_idt_patch((caddr_t)code, sizeof (code)); 316 317 bcopy(code, &kdi_slave_entry_patch, sizeof (code)); 318 } 319 320 static void 321 kdi_msr_add_wrexit(uint_t msr, uint64_t *valp) 322 { 323 kdi_msr_wrexit_msr = msr; 324 kdi_msr_wrexit_valp = valp; 325 } 326 327 void 328 kdi_set_debug_msrs(kdi_msr_t *msrs) 329 { 330 int nmsrs, i; 331 332 ASSERT(kdi_cpusave[0].krs_msr == NULL); 333 334 /* Look in CPU0's MSRs for any special MSRs. */ 335 for (nmsrs = 0; msrs[nmsrs].msr_num != 0; nmsrs++) { 336 switch (msrs[nmsrs].msr_type) { 337 case KDI_MSR_CLEARENTRY: 338 kdi_msr_add_clrentry(msrs[nmsrs].msr_num); 339 break; 340 341 case KDI_MSR_WRITEDELAY: 342 kdi_msr_add_wrexit(msrs[nmsrs].msr_num, 343 msrs[nmsrs].kdi_msr_valp); 344 break; 345 } 346 } 347 348 nmsrs++; 349 350 for (i = 0; i < kdi_ncpusave; i++) 351 kdi_cpusave[i].krs_msr = &msrs[nmsrs * i]; 352 } 353 354 void 355 kdi_update_drreg(kdi_drreg_t *drreg) 356 { 357 kdi_drreg = *drreg; 358 } 359 360 void 361 kdi_memrange_add(caddr_t base, size_t len) 362 { 363 kdi_memrange_t *mr = &kdi_memranges[kdi_nmemranges]; 364 365 ASSERT(kdi_nmemranges != KDI_MEMRANGES_MAX); 366 367 mr->mr_base = base; 368 mr->mr_lim = base + len - 1; 369 kdi_nmemranges++; 370 } 371 372 void 373 kdi_idt_switch(kdi_cpusave_t *cpusave) 374 { 375 if (cpusave == NULL) 376 kdi_idtr_set(kdi_idt, sizeof (kdi_idt) - 1); 377 else 378 kdi_idtr_set(cpusave->krs_idt, (sizeof (*idt0) * NIDT) - 1); 379 } 380 381 /* 382 * Activation for CPUs other than the boot CPU, called from that CPU's 383 * mp_startup(). We saved the kernel's descriptors when we initialized the 384 * boot CPU, so we don't want to do it again. Saving the handlers from this 385 * CPU's IDT would actually be dangerous with the CPU initialization method in 386 * use at the time of this writing. With that method, the startup code creates 387 * the IDTs for slave CPUs by copying the one used by the boot CPU, which has 388 * already been interposed upon by KMDB. Were we to interpose again, we'd 389 * replace the kernel's descriptors with our own in the save area. By not 390 * saving, but still overwriting, we'll work in the current world, and in any 391 * future world where the IDT is generated from scratch. 392 */ 393 void 394 kdi_cpu_init(void) 395 { 396 kdi_idt_gates_install(KCS_SEL, KDI_IDT_NOSAVE); 397 /* Load the debug registers and MSRs */ 398 kdi_cpu_debug_init(&kdi_cpusave[CPU->cpu_id]); 399 } 400 401 /* 402 * Activation for all CPUs for mod-loaded kmdb, i.e. a kmdb that wasn't 403 * loaded at boot. 404 */ 405 static int 406 kdi_cpu_activate(void) 407 { 408 kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE); 409 return (0); 410 } 411 412 void 413 kdi_activate(kdi_main_t main, kdi_cpusave_t *cpusave, uint_t ncpusave) 414 { 415 int i; 416 cpuset_t cpuset; 417 418 CPUSET_ALL(cpuset); 419 420 kdi_cpusave = cpusave; 421 kdi_ncpusave = ncpusave; 422 423 kdi_kmdb_main = main; 424 425 for (i = 0; i < kdi_ncpusave; i++) { 426 kdi_cpusave[i].krs_cpu_id = i; 427 428 kdi_cpusave[i].krs_curcrumb = 429 &kdi_cpusave[i].krs_crumbs[KDI_NCRUMBS - 1]; 430 kdi_cpusave[i].krs_curcrumbidx = KDI_NCRUMBS - 1; 431 } 432 433 if (boothowto & RB_KMDB) 434 kdi_idt_init(KMDBCODE_SEL); 435 else 436 kdi_idt_init(KCS_SEL); 437 438 /* The initial selector set. Updated by the debugger-entry code */ 439 #ifndef __amd64 440 kdi_cs = B32CODE_SEL; 441 kdi_ds = kdi_fs = kdi_gs = B32DATA_SEL; 442 #endif 443 444 kdi_memranges[0].mr_base = kdi_segdebugbase; 445 kdi_memranges[0].mr_lim = kdi_segdebugbase + kdi_segdebugsize - 1; 446 kdi_nmemranges = 1; 447 448 kdi_drreg.dr_ctl = KDIREG_DRCTL_RESERVED; 449 kdi_drreg.dr_stat = KDIREG_DRSTAT_RESERVED; 450 451 kdi_msr_wrexit_msr = 0; 452 kdi_msr_wrexit_valp = NULL; 453 454 if (boothowto & RB_KMDB) { 455 kdi_idt_gates_install(KMDBCODE_SEL, KDI_IDT_NOSAVE); 456 } else { 457 xc_call(0, 0, 0, CPUSET2BV(cpuset), 458 (xc_func_t)kdi_cpu_activate); 459 } 460 } 461 462 static int 463 kdi_cpu_deactivate(void) 464 { 465 kdi_idt_gates_restore(); 466 return (0); 467 } 468 469 void 470 kdi_deactivate(void) 471 { 472 cpuset_t cpuset; 473 CPUSET_ALL(cpuset); 474 475 xc_call(0, 0, 0, CPUSET2BV(cpuset), (xc_func_t)kdi_cpu_deactivate); 476 kdi_nmemranges = 0; 477 } 478 479 /* 480 * We receive all breakpoints and single step traps. Some of them, 481 * including those from userland and those induced by DTrace providers, 482 * are intended for the kernel, and must be processed there. We adopt 483 * this ours-until-proven-otherwise position due to the painful 484 * consequences of sending the kernel an unexpected breakpoint or 485 * single step. Unless someone can prove to us that the kernel is 486 * prepared to handle the trap, we'll assume there's a problem and will 487 * give the user a chance to debug it. 488 */ 489 int 490 kdi_trap_pass(kdi_cpusave_t *cpusave) 491 { 492 greg_t tt = cpusave->krs_gregs[KDIREG_TRAPNO]; 493 greg_t pc = cpusave->krs_gregs[KDIREG_PC]; 494 greg_t cs = cpusave->krs_gregs[KDIREG_CS]; 495 496 if (USERMODE(cs)) 497 return (1); 498 499 if (tt != T_BPTFLT && tt != T_SGLSTP) 500 return (0); 501 502 if (tt == T_BPTFLT && kdi_dtrace_get_state() == 503 KDI_DTSTATE_DTRACE_ACTIVE) 504 return (1); 505 506 /* 507 * See the comments in the kernel's T_SGLSTP handler for why we need to 508 * do this. 509 */ 510 if (tt == T_SGLSTP && 511 (pc == (greg_t)sys_sysenter || pc == (greg_t)brand_sys_sysenter)) 512 return (1); 513 514 return (0); 515 } 516 517 /* 518 * State has been saved, and all CPUs are on the CPU-specific stacks. All 519 * CPUs enter here, and head off into the debugger proper. 520 */ 521 void 522 kdi_debugger_entry(kdi_cpusave_t *cpusave) 523 { 524 /* 525 * BPTFLT gives us control with %eip set to the instruction *after* 526 * the int 3. Back it off, so we're looking at the instruction that 527 * triggered the fault. 528 */ 529 if (cpusave->krs_gregs[KDIREG_TRAPNO] == T_BPTFLT) 530 cpusave->krs_gregs[KDIREG_PC]--; 531 532 kdi_kmdb_main(cpusave); 533 } --- EOF ---