15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 /* All Rights Reserved */
29 /* */
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
32 /* */
33
34 /*
35 * Copyright 2017 Joyent, Inc.
36 */
37
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/param.h>
41 #include <sys/signal.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/proc.h>
45 #include <sys/disp.h>
46 #include <sys/class.h>
47 #include <sys/core.h>
48 #include <sys/syscall.h>
49 #include <sys/cpuvar.h>
50 #include <sys/vm.h>
51 #include <sys/sysinfo.h>
52 #include <sys/fault.h>
53 #include <sys/stack.h>
54 #include <sys/psw.h>
55 #include <sys/regset.h>
463 */
464 void
465 trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
466 {
467 kthread_t *ct = curthread;
468 enum seg_rw rw;
469 unsigned type;
470 proc_t *p = ttoproc(ct);
471 klwp_t *lwp = ttolwp(ct);
472 uintptr_t lofault;
473 label_t *onfault;
474 faultcode_t pagefault(), res, errcode;
475 enum fault_type fault_type;
476 k_siginfo_t siginfo;
477 uint_t fault = 0;
478 int mstate;
479 int sicode = 0;
480 int watchcode;
481 int watchpage;
482 caddr_t vaddr;
483 int singlestep_twiddle;
484 size_t sz;
485 int ta;
486 #ifdef __amd64
487 uchar_t instr;
488 #endif
489
490 ASSERT_STACK_ALIGNED();
491
492 type = rp->r_trapno;
493 CPU_STATS_ADDQ(CPU, sys, trap, 1);
494 ASSERT(ct->t_schedflag & TS_DONT_SWAP);
495
496 if (type == T_PGFLT) {
497
498 errcode = rp->r_err;
499 if (errcode & PF_ERR_WRITE)
500 rw = S_WRITE;
501 else if ((caddr_t)rp->r_pc == addr ||
502 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC)))
503 rw = S_EXEC;
1074
1075 sti(); /* The SIMD exception comes in via cmninttrap */
1076 break;
1077
1078 case T_BPTFLT: /* breakpoint trap */
1079 /*
1080 * Kernel breakpoint traps should only happen when kmdb is
1081 * active, and even then, it'll have interposed on the IDT, so
1082 * control won't get here. If it does, we've hit a breakpoint
1083 * without the debugger, which is very strange, and very
1084 * fatal.
1085 */
1086 if (tudebug && tudebugbpt)
1087 showregs(type, rp, (caddr_t)0);
1088
1089 (void) die(type, rp, addr, cpuid);
1090 break;
1091
1092 case T_SGLSTP: /* single step/hw breakpoint exception */
1093
1094 /* Now evaluate how we got here */
1095 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1096 /*
1097 * i386 single-steps even through lcalls which
1098 * change the privilege level. So we take a trap at
1099 * the first instruction in privileged mode.
1100 *
1101 * Set a flag to indicate that upon completion of
1102 * the system call, deal with the single-step trap.
1103 *
1104 * The same thing happens for sysenter, too.
1105 */
1106 singlestep_twiddle = 0;
1107 if (rp->r_pc == (uintptr_t)sys_sysenter ||
1108 rp->r_pc == (uintptr_t)brand_sys_sysenter) {
1109 singlestep_twiddle = 1;
1110 #if defined(__amd64)
1111 /*
1112 * Since we are already on the kernel's
1113 * %gs, on 64-bit systems the sysenter case
1114 * needs to adjust the pc to avoid
1115 * executing the swapgs instruction at the
1116 * top of the handler.
1117 */
1118 if (rp->r_pc == (uintptr_t)sys_sysenter)
1119 rp->r_pc = (uintptr_t)
1120 _sys_sysenter_post_swapgs;
1121 else
1122 rp->r_pc = (uintptr_t)
1123 _brand_sys_sysenter_post_swapgs;
1124 #endif
1125 }
1126 #if defined(__i386)
1127 else if (rp->r_pc == (uintptr_t)sys_call ||
1128 rp->r_pc == (uintptr_t)brand_sys_call) {
1129 singlestep_twiddle = 1;
1130 }
1131 #endif
1132 else {
1133 /* not on sysenter/syscall; uregs available */
1134 if (tudebug && tudebugbpt)
1135 showregs(type, rp, (caddr_t)0);
1136 }
1137 if (singlestep_twiddle) {
1138 rp->r_ps &= ~PS_T; /* turn off trace */
1139 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1140 ct->t_post_sys = 1;
1141 aston(curthread);
1142 goto cleanup;
1143 }
1144 }
1145 /* XXX - needs review on debugger interface? */
1146 if (boothowto & RB_DEBUG)
1147 debug_enter((char *)NULL);
1148 else
1149 (void) die(type, rp, addr, cpuid);
1150 break;
1151
1152 case T_NMIFLT: /* NMI interrupt */
1153 printf("Unexpected NMI in system mode\n");
1154 goto cleanup;
1155
1156 case T_NMIFLT + USER: /* NMI interrupt */
1157 printf("Unexpected NMI in user mode\n");
1158 break;
1159
1160 case T_GPFLT: /* general protection violation */
1161 /*
1162 * Any #GP that occurs during an on_trap .. no_trap bracket
1163 * with OT_DATA_ACCESS or OT_SEGMENT_ACCESS protection,
1164 * or in a on_fault .. no_fault bracket, is forgiven
1165 * and we trampoline. This protection is given regardless
1721 } else if (addr) {
1722 printf("addr=0x%lx\n", (uintptr_t)addr);
1723 }
1724
1725 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1726 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1727 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1728
1729 #if defined(__lint)
1730 /*
1731 * this clause can be deleted when lint bug 4870403 is fixed
1732 * (lint thinks that bit 32 is illegal in a %b format string)
1733 */
1734 printf("cr0: %x cr4: %b\n",
1735 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1736 #else
1737 printf("cr0: %b cr4: %b\n",
1738 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1739 #endif /* __lint */
1740
1741 printf("cr2: %lx", getcr2());
1742 #if !defined(__xpv)
1743 printf("cr3: %lx", getcr3());
1744 #if defined(__amd64)
1745 printf("cr8: %lx\n", getcr8());
1746 #endif
1747 #endif
1748 printf("\n");
1749
1750 dumpregs(rp);
1751 splx(s);
1752 }
1753
1754 static void
1755 dumpregs(struct regs *rp)
1756 {
1757 #if defined(__amd64)
1758 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n";
1759
1760 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx);
1761 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9);
1762 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp);
1763 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12);
1829 static int
1830 instr_is_segregs_pop(caddr_t pc)
1831 {
1832 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 };
1833 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 };
1834 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 };
1835 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc };
1836
1837 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 ||
1838 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 ||
1839 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1840 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1841 return (1);
1842
1843 return (0);
1844 }
1845
1846 #endif /* __i386 */
1847
1848 /*
1849 * Test to see if the instruction is part of _sys_rtt.
1850 *
1851 * Again on the hypervisor if we try to IRET to user land with a bad code
1852 * or stack selector we will get vectored through xen_failsafe_callback.
1853 * In which case we assume we got here via _sys_rtt since we only allow
1854 * IRET to user land to take place in _sys_rtt.
1855 */
1856 static int
1857 instr_is_sys_rtt(caddr_t pc)
1858 {
1859 extern void _sys_rtt(), _sys_rtt_end();
1860
1861 if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1862 (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1863 return (0);
1864
1865 return (1);
1866 }
1867
1868 /*
1869 * Handle #gp faults in kernel mode.
1870 *
1871 * One legitimate way this can happen is if we attempt to update segment
1872 * registers to naughty values on the way out of the kernel.
1873 *
1874 * This can happen in a couple of ways: someone - either accidentally or
1875 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies
1876 * (signal(2)) a ucontext that contains silly segment register values.
1877 * Or someone - either accidentally or on purpose - modifies the prgregset_t
1878 * of a subject process via /proc to contain silly segment register values.
1879 *
1880 * (The unfortunate part is that we can end up discovering the bad segment
|
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
28 /* All Rights Reserved */
29 /* */
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
32 /* */
33
34 /*
35 * Copyright 2018 Joyent, Inc.
36 */
37
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/param.h>
41 #include <sys/signal.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/proc.h>
45 #include <sys/disp.h>
46 #include <sys/class.h>
47 #include <sys/core.h>
48 #include <sys/syscall.h>
49 #include <sys/cpuvar.h>
50 #include <sys/vm.h>
51 #include <sys/sysinfo.h>
52 #include <sys/fault.h>
53 #include <sys/stack.h>
54 #include <sys/psw.h>
55 #include <sys/regset.h>
463 */
464 void
465 trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
466 {
467 kthread_t *ct = curthread;
468 enum seg_rw rw;
469 unsigned type;
470 proc_t *p = ttoproc(ct);
471 klwp_t *lwp = ttolwp(ct);
472 uintptr_t lofault;
473 label_t *onfault;
474 faultcode_t pagefault(), res, errcode;
475 enum fault_type fault_type;
476 k_siginfo_t siginfo;
477 uint_t fault = 0;
478 int mstate;
479 int sicode = 0;
480 int watchcode;
481 int watchpage;
482 caddr_t vaddr;
483 size_t sz;
484 int ta;
485 #ifdef __amd64
486 uchar_t instr;
487 #endif
488
489 ASSERT_STACK_ALIGNED();
490
491 type = rp->r_trapno;
492 CPU_STATS_ADDQ(CPU, sys, trap, 1);
493 ASSERT(ct->t_schedflag & TS_DONT_SWAP);
494
495 if (type == T_PGFLT) {
496
497 errcode = rp->r_err;
498 if (errcode & PF_ERR_WRITE)
499 rw = S_WRITE;
500 else if ((caddr_t)rp->r_pc == addr ||
501 (mmu.pt_nx != 0 && (errcode & PF_ERR_EXEC)))
502 rw = S_EXEC;
1073
1074 sti(); /* The SIMD exception comes in via cmninttrap */
1075 break;
1076
1077 case T_BPTFLT: /* breakpoint trap */
1078 /*
1079 * Kernel breakpoint traps should only happen when kmdb is
1080 * active, and even then, it'll have interposed on the IDT, so
1081 * control won't get here. If it does, we've hit a breakpoint
1082 * without the debugger, which is very strange, and very
1083 * fatal.
1084 */
1085 if (tudebug && tudebugbpt)
1086 showregs(type, rp, (caddr_t)0);
1087
1088 (void) die(type, rp, addr, cpuid);
1089 break;
1090
1091 case T_SGLSTP: /* single step/hw breakpoint exception */
1092
1093 #if !defined(__xpv)
1094 /*
1095 * We'd never normally get here, as kmdb handles its own single
1096 * step traps. There is one nasty exception though, as
1097 * described in more detail in sys_sysenter(). Note that
1098 * checking for all four locations covers both the KPTI and the
1099 * non-KPTI cases correctly: the former will never be found at
1100 * (brand_)sys_sysenter, and vice versa.
1101 */
1102 if (lwp != NULL && (lwp->lwp_pcb.pcb_drstat & DR_SINGLESTEP)) {
1103 if (rp->r_pc == (greg_t)brand_sys_sysenter ||
1104 rp->r_pc == (greg_t)sys_sysenter ||
1105 rp->r_pc == (greg_t)tr_brand_sys_sysenter ||
1106 rp->r_pc == (greg_t)tr_sys_sysenter) {
1107
1108 rp->r_pc += 0x3; /* sizeof (swapgs) */
1109
1110 rp->r_ps &= ~PS_T; /* turn off trace */
1111 lwp->lwp_pcb.pcb_flags |= DEBUG_PENDING;
1112 ct->t_post_sys = 1;
1113 aston(curthread);
1114 goto cleanup;
1115 } else {
1116 if (tudebug && tudebugbpt)
1117 showregs(type, rp, (caddr_t)0);
1118 }
1119 }
1120 #endif /* !__xpv */
1121
1122 if (boothowto & RB_DEBUG)
1123 debug_enter((char *)NULL);
1124 else
1125 (void) die(type, rp, addr, cpuid);
1126 break;
1127
1128 case T_NMIFLT: /* NMI interrupt */
1129 printf("Unexpected NMI in system mode\n");
1130 goto cleanup;
1131
1132 case T_NMIFLT + USER: /* NMI interrupt */
1133 printf("Unexpected NMI in user mode\n");
1134 break;
1135
1136 case T_GPFLT: /* general protection violation */
1137 /*
1138 * Any #GP that occurs during an on_trap .. no_trap bracket
1139 * with OT_DATA_ACCESS or OT_SEGMENT_ACCESS protection,
1140 * or in a on_fault .. no_fault bracket, is forgiven
1141 * and we trampoline. This protection is given regardless
1697 } else if (addr) {
1698 printf("addr=0x%lx\n", (uintptr_t)addr);
1699 }
1700
1701 printf("pid=%d, pc=0x%lx, sp=0x%lx, eflags=0x%lx\n",
1702 (ttoproc(curthread) && ttoproc(curthread)->p_pidp) ?
1703 ttoproc(curthread)->p_pid : 0, rp->r_pc, rp->r_sp, rp->r_ps);
1704
1705 #if defined(__lint)
1706 /*
1707 * this clause can be deleted when lint bug 4870403 is fixed
1708 * (lint thinks that bit 32 is illegal in a %b format string)
1709 */
1710 printf("cr0: %x cr4: %b\n",
1711 (uint_t)getcr0(), (uint_t)getcr4(), FMT_CR4);
1712 #else
1713 printf("cr0: %b cr4: %b\n",
1714 (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4);
1715 #endif /* __lint */
1716
1717 printf("cr2: %lx ", getcr2());
1718 #if !defined(__xpv)
1719 printf("cr3: %lx ", getcr3());
1720 #if defined(__amd64)
1721 printf("cr8: %lx\n", getcr8());
1722 #endif
1723 #endif
1724 printf("\n");
1725
1726 dumpregs(rp);
1727 splx(s);
1728 }
1729
1730 static void
1731 dumpregs(struct regs *rp)
1732 {
1733 #if defined(__amd64)
1734 const char fmt[] = "\t%3s: %16lx %3s: %16lx %3s: %16lx\n";
1735
1736 printf(fmt, "rdi", rp->r_rdi, "rsi", rp->r_rsi, "rdx", rp->r_rdx);
1737 printf(fmt, "rcx", rp->r_rcx, " r8", rp->r_r8, " r9", rp->r_r9);
1738 printf(fmt, "rax", rp->r_rax, "rbx", rp->r_rbx, "rbp", rp->r_rbp);
1739 printf(fmt, "r10", rp->r_r10, "r11", rp->r_r11, "r12", rp->r_r12);
1805 static int
1806 instr_is_segregs_pop(caddr_t pc)
1807 {
1808 static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 };
1809 static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 };
1810 static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 };
1811 static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc };
1812
1813 if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 ||
1814 bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 ||
1815 bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 ||
1816 bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0)
1817 return (1);
1818
1819 return (0);
1820 }
1821
1822 #endif /* __i386 */
1823
1824 /*
1825 * Test to see if the instruction is part of _sys_rtt (or the KPTI trampolines
1826 * which are used by _sys_rtt).
1827 *
1828 * Again on the hypervisor if we try to IRET to user land with a bad code
1829 * or stack selector we will get vectored through xen_failsafe_callback.
1830 * In which case we assume we got here via _sys_rtt since we only allow
1831 * IRET to user land to take place in _sys_rtt.
1832 */
1833 static int
1834 instr_is_sys_rtt(caddr_t pc)
1835 {
1836 extern void _sys_rtt(), _sys_rtt_end();
1837
1838 #if !defined(__xpv)
1839 extern void tr_sysc_ret_start(), tr_sysc_ret_end();
1840 extern void tr_intr_ret_start(), tr_intr_ret_end();
1841
1842 if ((uintptr_t)pc >= (uintptr_t)tr_sysc_ret_start &&
1843 (uintptr_t)pc <= (uintptr_t)tr_sysc_ret_end)
1844 return (1);
1845
1846 if ((uintptr_t)pc >= (uintptr_t)tr_intr_ret_start &&
1847 (uintptr_t)pc <= (uintptr_t)tr_intr_ret_end)
1848 return (1);
1849 #endif
1850
1851 if ((uintptr_t)pc < (uintptr_t)_sys_rtt ||
1852 (uintptr_t)pc > (uintptr_t)_sys_rtt_end)
1853 return (0);
1854
1855 return (1);
1856 }
1857
1858 /*
1859 * Handle #gp faults in kernel mode.
1860 *
1861 * One legitimate way this can happen is if we attempt to update segment
1862 * registers to naughty values on the way out of the kernel.
1863 *
1864 * This can happen in a couple of ways: someone - either accidentally or
1865 * on purpose - creates (setcontext(2), lwp_create(2)) or modifies
1866 * (signal(2)) a ucontext that contains silly segment register values.
1867 * Or someone - either accidentally or on purpose - modifies the prgregset_t
1868 * of a subject process via /proc to contain silly segment register values.
1869 *
1870 * (The unfortunate part is that we can end up discovering the bad segment
|