Print this page
11787 Kernel needs to be built with retpolines
11788 Kernel needs to generally use RSB stuffing
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: John Levon <john.levon@joyent.com>


 182 #define BRAND_CALLBACK(callback_id, push_userland_ret)                      \
 183         movq    %rsp, %gs:CPU_RTMP_RSP  /* save the stack pointer       */ ;\
 184         movq    %r15, %gs:CPU_RTMP_R15  /* save %r15                    */ ;\
 185         movq    %gs:CPU_THREAD, %r15    /* load the thread pointer      */ ;\
 186         movq    T_STACK(%r15), %rsp     /* switch to the kernel stack   */ ;\
 187         subq    $16, %rsp               /* save space for 2 pointers    */ ;\
 188         pushq   %r14                    /* save %r14                    */ ;\
 189         movq    %gs:CPU_RTMP_RSP, %r14                                     ;\
 190         movq    %r14, 8(%rsp)           /* stash the user stack pointer */ ;\
 191         popq    %r14                    /* restore %r14                 */ ;\
 192         movq    T_LWP(%r15), %r15       /* load the lwp pointer         */ ;\
 193         pushq   %r15                    /* push the lwp pointer         */ ;\
 194         movq    LWP_PROCP(%r15), %r15   /* load the proc pointer        */ ;\
 195         movq    P_BRAND(%r15), %r15     /* load the brand pointer       */ ;\
 196         movq    B_MACHOPS(%r15), %r15   /* load the machops pointer     */ ;\
 197         movq    _CONST(_MUL(callback_id, CPTRSIZE))(%r15), %r15            ;\
 198         cmpq    $0, %r15                                                   ;\
 199         je      1f                                                         ;\
 200         movq    %r15, 16(%rsp)          /* save the callback pointer    */ ;\
 201         push_userland_ret               /* push the return address      */ ;\
 202         call    *24(%rsp)               /* call callback                */ ;\

 203 1:      movq    %gs:CPU_RTMP_R15, %r15  /* restore %r15                 */ ;\
 204         movq    %gs:CPU_RTMP_RSP, %rsp  /* restore the stack pointer    */
 205 
 206 #define MSTATE_TRANSITION(from, to)             \
 207         movl    $from, %edi;                    \
 208         movl    $to, %esi;                      \
 209         call    syscall_mstate
 210 
 211 /*
 212  * Check to see if a simple (direct) return is possible i.e.
 213  *
 214  *      if (t->t_post_sys_ast | syscalltrace |
 215  *          lwp->lwp_pcb.pcb_rupdate == 1)
 216  *              do full version ;
 217  *
 218  * Preconditions:
 219  * -    t is curthread
 220  * Postconditions:
 221  * -    condition code NE is set if post-sys is too complex
 222  * -    rtmp is zeroed if it isn't (we rely on this!)


 558 
 559         movw    %ax, T_SYSNUM(%r15)
 560         movzbl  T_PRE_SYS(%r15), %ebx
 561         ORL_SYSCALLTRACE(%ebx)
 562         testl   %ebx, %ebx
 563         jne     _syscall_pre
 564 
 565 _syscall_invoke:
 566         movq    REGOFF_RDI(%rbp), %rdi
 567         movq    REGOFF_RSI(%rbp), %rsi
 568         movq    REGOFF_RDX(%rbp), %rdx
 569         movq    REGOFF_RCX(%rbp), %rcx
 570         movq    REGOFF_R8(%rbp), %r8
 571         movq    REGOFF_R9(%rbp), %r9
 572 
 573         cmpl    $NSYSCALL, %eax
 574         jae     _syscall_ill
 575         shll    $SYSENT_SIZE_SHIFT, %eax
 576         leaq    sysent(%rax), %rbx
 577 
 578         call    *SY_CALLC(%rbx)

 579 
 580         movq    %rax, %r12
 581         movq    %rdx, %r13
 582 
 583         /*
 584          * If the handler returns two ints, then we need to split the
 585          * 64-bit return value into two 32-bit values.
 586          */
 587         testw   $SE_32RVAL2, SY_FLAGS(%rbx)
 588         je      5f
 589         movq    %r12, %r13
 590         shrq    $32, %r13       /* upper 32-bits into %edx */
 591         movl    %r12d, %r12d    /* lower 32-bits into %eax */
 592 5:
 593         /*
 594          * Optimistically assume that there's no post-syscall
 595          * work to do.  (This is to avoid having to call syscall_mstate()
 596          * with interrupts disabled)
 597          */
 598         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)


 634 
 635 
 636         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 637 
 638         movq    %r12, REGOFF_RAX(%rsp)
 639         movq    %r13, REGOFF_RDX(%rsp)
 640 
 641         /*
 642          * Clobber %r11 as we check CR0.TS.
 643          */
 644         ASSERT_CR0TS_ZERO(%r11)
 645 
 646         /*
 647          * Unlike other cases, because we need to restore the user stack pointer
 648          * before exiting the kernel we must clear the microarch state before
 649          * getting here. This should be safe because it means that the only
 650          * values on the bus after this are based on the user's registers and
 651          * potentially the addresses where we stored them. Given the constraints
 652          * of sysret, that's how it has to be.
 653          */
 654         call    *x86_md_clear
 655 
 656         /*
 657          * To get back to userland, we need the return %rip in %rcx and
 658          * the return %rfl in %r11d.  The sysretq instruction also arranges
 659          * to fix up %cs and %ss; everything else is our responsibility.
 660          */
 661         movq    REGOFF_RDI(%rsp), %rdi
 662         movq    REGOFF_RSI(%rsp), %rsi
 663         movq    REGOFF_RDX(%rsp), %rdx
 664         /* %rcx used to restore %rip value */
 665 
 666         movq    REGOFF_R8(%rsp), %r8
 667         movq    REGOFF_R9(%rsp), %r9
 668         movq    REGOFF_RAX(%rsp), %rax
 669         movq    REGOFF_RBX(%rsp), %rbx
 670 
 671         movq    REGOFF_RBP(%rsp), %rbp
 672         movq    REGOFF_R10(%rsp), %r10
 673         /* %r11 used to restore %rfl value */
 674         movq    REGOFF_R12(%rsp), %r12


 885          * forcibly fetching 6 arguments from the user stack under lofault
 886          * protection, reverting to copyin_args only when watchpoints
 887          * are in effect.
 888          *
 889          * (If we do this, make sure that exec and libthread leave
 890          * enough space at the top of the stack to ensure that we'll
 891          * never do a fetch from an invalid page.)
 892          *
 893          * Lots of ideas here, but they won't really help with bringup B-)
 894          * Correctness can't wait, performance can wait a little longer ..
 895          */
 896 
 897         movq    %rax, %rbx
 898         movl    0(%rsp), %edi
 899         movl    8(%rsp), %esi
 900         movl    0x10(%rsp), %edx
 901         movl    0x18(%rsp), %ecx
 902         movl    0x20(%rsp), %r8d
 903         movl    0x28(%rsp), %r9d
 904 
 905         call    *SY_CALLC(%rbx)

 906 
 907         movq    %rbp, %rsp      /* pop the args */
 908 
 909         /*
 910          * amd64 syscall handlers -always- return a 64-bit value in %rax.
 911          * On the 32-bit kernel, they always return that value in %eax:%edx
 912          * as required by the 32-bit ABI.
 913          *
 914          * Simulate the same behaviour by unconditionally splitting the
 915          * return value in the same way.
 916          */
 917         movq    %rax, %r13
 918         shrq    $32, %r13       /* upper 32-bits into %edx */
 919         movl    %eax, %r12d     /* lower 32-bits into %eax */
 920 
 921         /*
 922          * Optimistically assume that there's no post-syscall
 923          * work to do.  (This is to avoid having to call syscall_mstate()
 924          * with interrupts disabled)
 925          */


 932          * registers without us noticing before we return to userland.
 933          */
 934         CLI(%r14)
 935         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 936         jne     _full_syscall_postsys32
 937         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 938 
 939         /*
 940          * Clobber %r11 as we check CR0.TS.
 941          */
 942         ASSERT_CR0TS_ZERO(%r11)
 943 
 944         /*
 945          * Unlike other cases, because we need to restore the user stack pointer
 946          * before exiting the kernel we must clear the microarch state before
 947          * getting here. This should be safe because it means that the only
 948          * values on the bus after this are based on the user's registers and
 949          * potentially the addresses where we stored them. Given the constraints
 950          * of sysret, that's how it has to be.
 951          */
 952         call    *x86_md_clear
 953 
 954         /*
 955          * To get back to userland, we need to put the return %rip in %rcx and
 956          * the return %rfl in %r11d.  The sysret instruction also arranges
 957          * to fix up %cs and %ss; everything else is our responsibility.
 958          */
 959 
 960         movl    %r12d, %eax                     /* %eax: rval1 */
 961         movl    REGOFF_RBX(%rsp), %ebx
 962         /* %ecx used for return pointer */
 963         movl    %r13d, %edx                     /* %edx: rval2 */
 964         movl    REGOFF_RBP(%rsp), %ebp
 965         movl    REGOFF_RSI(%rsp), %esi
 966         movl    REGOFF_RDI(%rsp), %edi
 967 
 968         movl    REGOFF_RFL(%rsp), %r11d         /* %r11 -> eflags */
 969         movl    REGOFF_RIP(%rsp), %ecx          /* %ecx -> %eip */
 970         movl    REGOFF_RSP(%rsp), %esp
 971 
 972         ASSERT_UPCALL_MASK_IS_SET


1172          */
1173         subq    $SYS_DROP, %rsp
1174         movb    $LWP_SYS, LWP_STATE(%r14)
1175         movq    %r15, %rdi
1176         movq    %rsp, %rsi
1177         call    syscall_entry
1178 
1179         /*
1180          * Fetch the arguments copied onto the kernel stack and put
1181          * them in the right registers to invoke a C-style syscall handler.
1182          * %rax contains the handler address.
1183          */
1184         movq    %rax, %rbx
1185         movl    0(%rsp), %edi
1186         movl    8(%rsp), %esi
1187         movl    0x10(%rsp), %edx
1188         movl    0x18(%rsp), %ecx
1189         movl    0x20(%rsp), %r8d
1190         movl    0x28(%rsp), %r9d
1191 
1192         call    *SY_CALLC(%rbx)

1193 
1194         movq    %rbp, %rsp      /* pop the args */
1195 
1196         /*
1197          * amd64 syscall handlers -always- return a 64-bit value in %rax.
1198          * On the 32-bit kernel, the always return that value in %eax:%edx
1199          * as required by the 32-bit ABI.
1200          *
1201          * Simulate the same behaviour by unconditionally splitting the
1202          * return value in the same way.
1203          */
1204         movq    %rax, %r13
1205         shrq    $32, %r13       /* upper 32-bits into %edx */
1206         movl    %eax, %r12d     /* lower 32-bits into %eax */
1207 
1208         /*
1209          * Optimistically assume that there's no post-syscall
1210          * work to do.  (This is to avoid having to call syscall_mstate()
1211          * with interrupts disabled)
1212          */


1240         /*
1241          * Clobber %r11 as we check CR0.TS.
1242          */
1243         ASSERT_CR0TS_ZERO(%r11)
1244 
1245         /*
1246          * (There's no point in loading up %edx because the sysexit
1247          * mechanism smashes it.)
1248          */
1249         movl    %r12d, %eax
1250         movl    REGOFF_RBX(%rsp), %ebx
1251         movl    REGOFF_RBP(%rsp), %ebp
1252         movl    REGOFF_RSI(%rsp), %esi
1253         movl    REGOFF_RDI(%rsp), %edi
1254 
1255         movl    REGOFF_RIP(%rsp), %edx  /* sysexit: %edx -> %eip */
1256         pushq   REGOFF_RFL(%rsp)
1257         popfq
1258         movl    REGOFF_RSP(%rsp), %ecx  /* sysexit: %ecx -> %esp */
1259         ALTENTRY(sys_sysenter_swapgs_sysexit)
1260         call    *x86_md_clear
1261         jmp     tr_sysexit
1262         SET_SIZE(sys_sysenter_swapgs_sysexit)
1263         SET_SIZE(sys_sysenter)
1264         SET_SIZE(_sys_sysenter_post_swapgs)
1265         SET_SIZE(brand_sys_sysenter)
1266 
1267 #endif  /* __lint */
1268 
1269 /*
1270  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1271  * the generic i386 libc to do system calls. We do a small amount of setup
1272  * before jumping into the existing sys_syscall32 path.
1273  */
1274 #if defined(__lint)
1275 
1276 /*ARGSUSED*/
1277 void
1278 sys_syscall_int()
1279 {}
1280 


1297         movq    T_STACK(%r15), %rsp
1298         movl    %eax, %eax
1299         /*
1300          * Set t_post_sys on this thread to force ourselves out via the slow
1301          * path. It might be possible at some later date to optimize this out
1302          * and use a faster return mechanism.
1303          */
1304         movb    $1, T_POST_SYS(%r15)
1305         CLEAN_CS
1306         jmp     _syscall32_save
1307         /*
1308          * There should be no instructions between this label and SWAPGS/IRET
1309          * or we could end up breaking branded zone support. See the usage of
1310          * this label in lx_brand_int80_callback and sn1_brand_int91_callback
1311          * for examples.
1312          *
1313          * We want to swapgs to maintain the invariant that all entries into
1314          * tr_iret_user are done on the user gsbase.
1315          */
1316         ALTENTRY(sys_sysint_swapgs_iret)
1317         call    *x86_md_clear
1318         SWAPGS
1319         jmp     tr_iret_user
1320         /*NOTREACHED*/
1321         SET_SIZE(sys_sysint_swapgs_iret)
1322         SET_SIZE(sys_syscall_int)
1323         SET_SIZE(brand_sys_syscall_int)
1324 
1325 #endif  /* __lint */
1326 
1327 /*
1328  * Legacy 32-bit applications and old libc implementations do lcalls;
1329  * we should never get here because the LDT entry containing the syscall
1330  * segment descriptor has the "segment present" bit cleared, which means
1331  * we end up processing those system calls in trap() via a not-present trap.
1332  *
1333  * We do it this way because a call gate unhelpfully does -nothing- to the
1334  * interrupt flag bit, so an interrupt can run us just after the lcall
1335  * completes, but just before the swapgs takes effect.   Thus the INTR_PUSH and
1336  * INTR_POP paths would have to be slightly more complex to dance around
1337  * this problem, and end up depending explicitly on the first




 182 #define BRAND_CALLBACK(callback_id, push_userland_ret)                      \
 183         movq    %rsp, %gs:CPU_RTMP_RSP  /* save the stack pointer       */ ;\
 184         movq    %r15, %gs:CPU_RTMP_R15  /* save %r15                    */ ;\
 185         movq    %gs:CPU_THREAD, %r15    /* load the thread pointer      */ ;\
 186         movq    T_STACK(%r15), %rsp     /* switch to the kernel stack   */ ;\
 187         subq    $16, %rsp               /* save space for 2 pointers    */ ;\
 188         pushq   %r14                    /* save %r14                    */ ;\
 189         movq    %gs:CPU_RTMP_RSP, %r14                                     ;\
 190         movq    %r14, 8(%rsp)           /* stash the user stack pointer */ ;\
 191         popq    %r14                    /* restore %r14                 */ ;\
 192         movq    T_LWP(%r15), %r15       /* load the lwp pointer         */ ;\
 193         pushq   %r15                    /* push the lwp pointer         */ ;\
 194         movq    LWP_PROCP(%r15), %r15   /* load the proc pointer        */ ;\
 195         movq    P_BRAND(%r15), %r15     /* load the brand pointer       */ ;\
 196         movq    B_MACHOPS(%r15), %r15   /* load the machops pointer     */ ;\
 197         movq    _CONST(_MUL(callback_id, CPTRSIZE))(%r15), %r15            ;\
 198         cmpq    $0, %r15                                                   ;\
 199         je      1f                                                         ;\
 200         movq    %r15, 16(%rsp)          /* save the callback pointer    */ ;\
 201         push_userland_ret               /* push the return address      */ ;\
 202         movq    24(%rsp), %r15          /* load callback pointer        */ ;\
 203         INDIRECT_CALL_REG(r15)          /* call callback                */ ;\
 204 1:      movq    %gs:CPU_RTMP_R15, %r15  /* restore %r15                 */ ;\
 205         movq    %gs:CPU_RTMP_RSP, %rsp  /* restore the stack pointer    */
 206 
 207 #define MSTATE_TRANSITION(from, to)             \
 208         movl    $from, %edi;                    \
 209         movl    $to, %esi;                      \
 210         call    syscall_mstate
 211 
 212 /*
 213  * Check to see if a simple (direct) return is possible i.e.
 214  *
 215  *      if (t->t_post_sys_ast | syscalltrace |
 216  *          lwp->lwp_pcb.pcb_rupdate == 1)
 217  *              do full version ;
 218  *
 219  * Preconditions:
 220  * -    t is curthread
 221  * Postconditions:
 222  * -    condition code NE is set if post-sys is too complex
 223  * -    rtmp is zeroed if it isn't (we rely on this!)


 559 
 560         movw    %ax, T_SYSNUM(%r15)
 561         movzbl  T_PRE_SYS(%r15), %ebx
 562         ORL_SYSCALLTRACE(%ebx)
 563         testl   %ebx, %ebx
 564         jne     _syscall_pre
 565 
 566 _syscall_invoke:
 567         movq    REGOFF_RDI(%rbp), %rdi
 568         movq    REGOFF_RSI(%rbp), %rsi
 569         movq    REGOFF_RDX(%rbp), %rdx
 570         movq    REGOFF_RCX(%rbp), %rcx
 571         movq    REGOFF_R8(%rbp), %r8
 572         movq    REGOFF_R9(%rbp), %r9
 573 
 574         cmpl    $NSYSCALL, %eax
 575         jae     _syscall_ill
 576         shll    $SYSENT_SIZE_SHIFT, %eax
 577         leaq    sysent(%rax), %rbx
 578 
 579         movq    SY_CALLC(%rbx), %rax
 580         INDIRECT_CALL_REG(rax)
 581 
 582         movq    %rax, %r12
 583         movq    %rdx, %r13
 584 
 585         /*
 586          * If the handler returns two ints, then we need to split the
 587          * 64-bit return value into two 32-bit values.
 588          */
 589         testw   $SE_32RVAL2, SY_FLAGS(%rbx)
 590         je      5f
 591         movq    %r12, %r13
 592         shrq    $32, %r13       /* upper 32-bits into %edx */
 593         movl    %r12d, %r12d    /* lower 32-bits into %eax */
 594 5:
 595         /*
 596          * Optimistically assume that there's no post-syscall
 597          * work to do.  (This is to avoid having to call syscall_mstate()
 598          * with interrupts disabled)
 599          */
 600         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)


 636 
 637 
 638         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 639 
 640         movq    %r12, REGOFF_RAX(%rsp)
 641         movq    %r13, REGOFF_RDX(%rsp)
 642 
 643         /*
 644          * Clobber %r11 as we check CR0.TS.
 645          */
 646         ASSERT_CR0TS_ZERO(%r11)
 647 
 648         /*
 649          * Unlike other cases, because we need to restore the user stack pointer
 650          * before exiting the kernel we must clear the microarch state before
 651          * getting here. This should be safe because it means that the only
 652          * values on the bus after this are based on the user's registers and
 653          * potentially the addresses where we stored them. Given the constraints
 654          * of sysret, that's how it has to be.
 655          */
 656         call    x86_md_clear
 657 
 658         /*
 659          * To get back to userland, we need the return %rip in %rcx and
 660          * the return %rfl in %r11d.  The sysretq instruction also arranges
 661          * to fix up %cs and %ss; everything else is our responsibility.
 662          */
 663         movq    REGOFF_RDI(%rsp), %rdi
 664         movq    REGOFF_RSI(%rsp), %rsi
 665         movq    REGOFF_RDX(%rsp), %rdx
 666         /* %rcx used to restore %rip value */
 667 
 668         movq    REGOFF_R8(%rsp), %r8
 669         movq    REGOFF_R9(%rsp), %r9
 670         movq    REGOFF_RAX(%rsp), %rax
 671         movq    REGOFF_RBX(%rsp), %rbx
 672 
 673         movq    REGOFF_RBP(%rsp), %rbp
 674         movq    REGOFF_R10(%rsp), %r10
 675         /* %r11 used to restore %rfl value */
 676         movq    REGOFF_R12(%rsp), %r12


 887          * forcibly fetching 6 arguments from the user stack under lofault
 888          * protection, reverting to copyin_args only when watchpoints
 889          * are in effect.
 890          *
 891          * (If we do this, make sure that exec and libthread leave
 892          * enough space at the top of the stack to ensure that we'll
 893          * never do a fetch from an invalid page.)
 894          *
 895          * Lots of ideas here, but they won't really help with bringup B-)
 896          * Correctness can't wait, performance can wait a little longer ..
 897          */
 898 
 899         movq    %rax, %rbx
 900         movl    0(%rsp), %edi
 901         movl    8(%rsp), %esi
 902         movl    0x10(%rsp), %edx
 903         movl    0x18(%rsp), %ecx
 904         movl    0x20(%rsp), %r8d
 905         movl    0x28(%rsp), %r9d
 906 
 907         movq    SY_CALLC(%rbx), %rax
 908         INDIRECT_CALL_REG(rax)
 909 
 910         movq    %rbp, %rsp      /* pop the args */
 911 
 912         /*
 913          * amd64 syscall handlers -always- return a 64-bit value in %rax.
 914          * On the 32-bit kernel, they always return that value in %eax:%edx
 915          * as required by the 32-bit ABI.
 916          *
 917          * Simulate the same behaviour by unconditionally splitting the
 918          * return value in the same way.
 919          */
 920         movq    %rax, %r13
 921         shrq    $32, %r13       /* upper 32-bits into %edx */
 922         movl    %eax, %r12d     /* lower 32-bits into %eax */
 923 
 924         /*
 925          * Optimistically assume that there's no post-syscall
 926          * work to do.  (This is to avoid having to call syscall_mstate()
 927          * with interrupts disabled)
 928          */


 935          * registers without us noticing before we return to userland.
 936          */
 937         CLI(%r14)
 938         CHECK_POSTSYS_NE(%r15, %r14, %ebx)
 939         jne     _full_syscall_postsys32
 940         SIMPLE_SYSCALL_POSTSYS(%r15, %r14, %bx)
 941 
 942         /*
 943          * Clobber %r11 as we check CR0.TS.
 944          */
 945         ASSERT_CR0TS_ZERO(%r11)
 946 
 947         /*
 948          * Unlike other cases, because we need to restore the user stack pointer
 949          * before exiting the kernel we must clear the microarch state before
 950          * getting here. This should be safe because it means that the only
 951          * values on the bus after this are based on the user's registers and
 952          * potentially the addresses where we stored them. Given the constraints
 953          * of sysret, that's how it has to be.
 954          */
 955         call    x86_md_clear
 956 
 957         /*
 958          * To get back to userland, we need to put the return %rip in %rcx and
 959          * the return %rfl in %r11d.  The sysret instruction also arranges
 960          * to fix up %cs and %ss; everything else is our responsibility.
 961          */
 962 
 963         movl    %r12d, %eax                     /* %eax: rval1 */
 964         movl    REGOFF_RBX(%rsp), %ebx
 965         /* %ecx used for return pointer */
 966         movl    %r13d, %edx                     /* %edx: rval2 */
 967         movl    REGOFF_RBP(%rsp), %ebp
 968         movl    REGOFF_RSI(%rsp), %esi
 969         movl    REGOFF_RDI(%rsp), %edi
 970 
 971         movl    REGOFF_RFL(%rsp), %r11d         /* %r11 -> eflags */
 972         movl    REGOFF_RIP(%rsp), %ecx          /* %ecx -> %eip */
 973         movl    REGOFF_RSP(%rsp), %esp
 974 
 975         ASSERT_UPCALL_MASK_IS_SET


1175          */
1176         subq    $SYS_DROP, %rsp
1177         movb    $LWP_SYS, LWP_STATE(%r14)
1178         movq    %r15, %rdi
1179         movq    %rsp, %rsi
1180         call    syscall_entry
1181 
1182         /*
1183          * Fetch the arguments copied onto the kernel stack and put
1184          * them in the right registers to invoke a C-style syscall handler.
1185          * %rax contains the handler address.
1186          */
1187         movq    %rax, %rbx
1188         movl    0(%rsp), %edi
1189         movl    8(%rsp), %esi
1190         movl    0x10(%rsp), %edx
1191         movl    0x18(%rsp), %ecx
1192         movl    0x20(%rsp), %r8d
1193         movl    0x28(%rsp), %r9d
1194 
1195         movq    SY_CALLC(%rbx), %rax
1196         INDIRECT_CALL_REG(rax)
1197 
1198         movq    %rbp, %rsp      /* pop the args */
1199 
1200         /*
1201          * amd64 syscall handlers -always- return a 64-bit value in %rax.
1202          * On the 32-bit kernel, the always return that value in %eax:%edx
1203          * as required by the 32-bit ABI.
1204          *
1205          * Simulate the same behaviour by unconditionally splitting the
1206          * return value in the same way.
1207          */
1208         movq    %rax, %r13
1209         shrq    $32, %r13       /* upper 32-bits into %edx */
1210         movl    %eax, %r12d     /* lower 32-bits into %eax */
1211 
1212         /*
1213          * Optimistically assume that there's no post-syscall
1214          * work to do.  (This is to avoid having to call syscall_mstate()
1215          * with interrupts disabled)
1216          */


1244         /*
1245          * Clobber %r11 as we check CR0.TS.
1246          */
1247         ASSERT_CR0TS_ZERO(%r11)
1248 
1249         /*
1250          * (There's no point in loading up %edx because the sysexit
1251          * mechanism smashes it.)
1252          */
1253         movl    %r12d, %eax
1254         movl    REGOFF_RBX(%rsp), %ebx
1255         movl    REGOFF_RBP(%rsp), %ebp
1256         movl    REGOFF_RSI(%rsp), %esi
1257         movl    REGOFF_RDI(%rsp), %edi
1258 
1259         movl    REGOFF_RIP(%rsp), %edx  /* sysexit: %edx -> %eip */
1260         pushq   REGOFF_RFL(%rsp)
1261         popfq
1262         movl    REGOFF_RSP(%rsp), %ecx  /* sysexit: %ecx -> %esp */
1263         ALTENTRY(sys_sysenter_swapgs_sysexit)
1264         call    x86_md_clear
1265         jmp     tr_sysexit
1266         SET_SIZE(sys_sysenter_swapgs_sysexit)
1267         SET_SIZE(sys_sysenter)
1268         SET_SIZE(_sys_sysenter_post_swapgs)
1269         SET_SIZE(brand_sys_sysenter)
1270 
1271 #endif  /* __lint */
1272 
1273 /*
1274  * This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
1275  * the generic i386 libc to do system calls. We do a small amount of setup
1276  * before jumping into the existing sys_syscall32 path.
1277  */
1278 #if defined(__lint)
1279 
1280 /*ARGSUSED*/
1281 void
1282 sys_syscall_int()
1283 {}
1284 


1301         movq    T_STACK(%r15), %rsp
1302         movl    %eax, %eax
1303         /*
1304          * Set t_post_sys on this thread to force ourselves out via the slow
1305          * path. It might be possible at some later date to optimize this out
1306          * and use a faster return mechanism.
1307          */
1308         movb    $1, T_POST_SYS(%r15)
1309         CLEAN_CS
1310         jmp     _syscall32_save
1311         /*
1312          * There should be no instructions between this label and SWAPGS/IRET
1313          * or we could end up breaking branded zone support. See the usage of
1314          * this label in lx_brand_int80_callback and sn1_brand_int91_callback
1315          * for examples.
1316          *
1317          * We want to swapgs to maintain the invariant that all entries into
1318          * tr_iret_user are done on the user gsbase.
1319          */
1320         ALTENTRY(sys_sysint_swapgs_iret)
1321         call    x86_md_clear
1322         SWAPGS
1323         jmp     tr_iret_user
1324         /*NOTREACHED*/
1325         SET_SIZE(sys_sysint_swapgs_iret)
1326         SET_SIZE(sys_syscall_int)
1327         SET_SIZE(brand_sys_syscall_int)
1328 
1329 #endif  /* __lint */
1330 
1331 /*
1332  * Legacy 32-bit applications and old libc implementations do lcalls;
1333  * we should never get here because the LDT entry containing the syscall
1334  * segment descriptor has the "segment present" bit cleared, which means
1335  * we end up processing those system calls in trap() via a not-present trap.
1336  *
1337  * We do it this way because a call gate unhelpfully does -nothing- to the
1338  * interrupt flag bit, so an interrupt can run us just after the lcall
1339  * completes, but just before the swapgs takes effect.   Thus the INTR_PUSH and
1340  * INTR_POP paths would have to be slightly more complex to dance around
1341  * this problem, and end up depending explicitly on the first