1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T       */
  27 /*        All Rights Reserved                                   */
  28 
  29 /*      Copyright (c) 1987, 1988 Microsoft Corporation          */
  30 /*        All Rights Reserved                                   */
  31 
  32 #include <sys/asm_linkage.h>
  33 #include <sys/asm_misc.h>
  34 #include <sys/regset.h>
  35 #include <sys/psw.h>
  36 #include <sys/x86_archext.h>
  37 #include <sys/machbrand.h>
  38 #include <sys/privregs.h>
  39 
  40 #if defined(__lint)
  41 
  42 #include <sys/types.h>
  43 #include <sys/thread.h>
  44 #include <sys/systm.h>
  45 
  46 #else   /* __lint */
  47 
  48 #include <sys/segments.h>
  49 #include <sys/pcb.h>
  50 #include <sys/trap.h>
  51 #include <sys/ftrace.h>
  52 #include <sys/traptrace.h>
  53 #include <sys/clock.h>
  54 #include <sys/panic.h>
  55 #include "assym.h"
  56 
  57 #endif  /* __lint */
  58 
  59 /*
  60  * We implement two flavours of system call entry points
  61  *
  62  * -    {int,lcall}/iret        (i386)
  63  * -    sysenter/sysexit        (Pentium II and beyond)
  64  *
  65  * The basic pattern used in the handlers is to check to see if we can
  66  * do fast (simple) version of the system call; if we can't we use various
  67  * C routines that handle corner cases and debugging.
  68  *
  69  * To reduce the amount of assembler replication, yet keep the system call
  70  * implementations vaguely comprehensible, the common code in the body
  71  * of the handlers is broken up into a set of preprocessor definitions
  72  * below.
  73  */
  74 
  75 /*
  76  * When we have SYSCALLTRACE defined, we sneak an extra
  77  * predicate into a couple of tests.
  78  */
  79 #if defined(SYSCALLTRACE)
  80 #define ORL_SYSCALLTRACE(r32)   \
  81         orl     syscalltrace, r32
  82 #else
  83 #define ORL_SYSCALLTRACE(r32)
  84 #endif
  85 
  86 /*
  87  * This check is false whenever we want to go fast i.e.
  88  *
  89  *      if (code >= NSYSCALL ||
  90  *          t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
  91  *              do full version
  92  * #ifdef SYSCALLTRACE
  93  *      if (syscalltrace)
  94  *              do full version
  95  * #endif
  96  *
  97  * Preconditions:
  98  * -    t       curthread
  99  * -    code    contains the syscall number
 100  * Postconditions:
 101  * -    %ecx and %edi are smashed
 102  * -    condition code flag ZF is cleared if pre-sys is too complex
 103  */
 104 #define CHECK_PRESYS_NE(t, code)                \
 105         movzbl  T_PRE_SYS(t), %edi;             \
 106         movzwl  T_PROC_FLAG(t), %ecx;           \
 107         andl    $TP_WATCHPT, %ecx;              \
 108         orl     %ecx, %edi;                     \
 109         cmpl    $NSYSCALL, code;                \
 110         setae   %cl;                            \
 111         movzbl  %cl, %ecx;                      \
 112         orl     %ecx, %edi;                     \
 113         ORL_SYSCALLTRACE(%edi)
 114 
 115 /*
 116  * Check if a brand_mach_ops callback is defined for the specified callback_id
 117  * type.  If so invoke it with the user's %gs value loaded and the following
 118  * data on the stack:
 119  *         --------------------------------------
 120  *         | user's %ss                         |
 121  *    |    | user's %esp                        |
 122  *    |    | EFLAGS register                    |
 123  *    |    | user's %cs                         |
 124  *    |    | user's %eip (user return address)  |
 125  *    |    | 'scratch space'                    |
 126  *    |    | user's %ebx                        |
 127  *    |    | user's %gs selector                |
 128  *    v    | lwp pointer                        |
 129  *         | callback wrapper return addr       |
 130  *         --------------------------------------
 131  *
 132  * If the brand code returns, we assume that we are meant to execute the
 133  * normal system call path.
 134  *
 135  * The interface to the brand callbacks on the 32-bit kernel assumes %ebx
 136  * is available as a scratch register within the callback.  If the callback
 137  * returns within the kernel then this macro will restore %ebx.  If the
 138  * callback is going to return directly to userland then it should restore
 139  * %ebx before returning to userland.
 140  */
 141 #define BRAND_CALLBACK(callback_id)                                         \
 142         subl    $4, %esp                /* save some scratch space      */ ;\
 143         pushl   %ebx                    /* save %ebx to use for scratch */ ;\
 144         pushl   %gs                     /* save the user %gs            */ ;\
 145         movl    $KGS_SEL, %ebx                                             ;\
 146         movw    %bx, %gs                /* switch to the kernel's %gs   */ ;\
 147         movl    %gs:CPU_THREAD, %ebx    /* load the thread pointer      */ ;\
 148         movl    T_LWP(%ebx), %ebx       /* load the lwp pointer         */ ;\
 149         pushl   %ebx                    /* push the lwp pointer         */ ;\
 150         movl    LWP_PROCP(%ebx), %ebx   /* load the proc pointer        */ ;\
 151         movl    P_BRAND(%ebx), %ebx     /* load the brand pointer       */ ;\
 152         movl    B_MACHOPS(%ebx), %ebx   /* load the machops pointer     */ ;\
 153         movl    _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx            ;\
 154         cmpl    $0, %ebx                                                   ;\
 155         je      1f                                                         ;\
 156         movl    %ebx, 12(%esp)          /* save callback to scratch     */ ;\
 157         movl    4(%esp), %ebx           /* grab the user %gs            */ ;\
 158         movw    %bx, %gs                /* restore the user %gs         */ ;\
 159         call    *12(%esp)               /* call callback in scratch     */ ;\
 160 1:      movl    4(%esp), %ebx           /* restore user %gs (re-do if   */ ;\
 161         movw    %bx, %gs                /* branch due to no callback)   */ ;\
 162         movl    8(%esp), %ebx           /* restore user's %ebx          */ ;\
 163         addl    $16, %esp               /* restore stack ptr            */ 
 164 
 165 #define MSTATE_TRANSITION(from, to)             \
 166         pushl   $to;                            \
 167         pushl   $from;                          \
 168         call    syscall_mstate;                 \
 169         addl    $0x8, %esp
 170 
 171 /*
 172  * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
 173  * This must be called with interrupts or preemption disabled.
 174  */
 175 #define CPU_STATS_SYS_SYSCALL_INC                       \
 176         addl    $1, %gs:CPU_STATS_SYS_SYSCALL;          \
 177         adcl    $0, %gs:CPU_STATS_SYS_SYSCALL+4;
 178 
 179 #if !defined(__lint)
 180 
 181 /*
 182  * ASSERT(lwptoregs(lwp) == rp);
 183  *
 184  * this may seem obvious, but very odd things happen if this
 185  * assertion is false
 186  *
 187  * Preconditions:
 188  *      -none-
 189  * Postconditions (if assertion is true):
 190  *      %esi and %edi are smashed
 191  */
 192 #if defined(DEBUG)
 193 
 194 __lwptoregs_msg:
 195         .string "syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
 196 
 197 #define ASSERT_LWPTOREGS(t, rp)                         \
 198         movl    T_LWP(t), %esi;                         \
 199         movl    LWP_REGS(%esi), %edi;                   \
 200         cmpl    rp, %edi;                               \
 201         je      7f;                                     \
 202         pushl   rp;                                     \
 203         pushl   %edi;                                   \
 204         pushl   %esi;                                   \
 205         pushl   $__LINE__;                              \
 206         pushl   $__lwptoregs_msg;                       \
 207         call    panic;                                  \
 208 7:
 209 #else
 210 #define ASSERT_LWPTOREGS(t, rp)
 211 #endif
 212 
 213 #endif  /* __lint */
 214 
 215 /*
 216  * This is an assembler version of this fragment:
 217  *
 218  * lwp->lwp_state = LWP_SYS;
 219  * lwp->lwp_ru.sysc++;
 220  * lwp->lwp_eosys = NORMALRETURN;
 221  * lwp->lwp_ap = argp;
 222  *
 223  * Preconditions:
 224  *      -none-
 225  * Postconditions:
 226  *      -none-
 227  */
 228 #define SET_LWP(lwp, argp)                              \
 229         movb    $LWP_SYS, LWP_STATE(lwp);               \
 230         addl    $1, LWP_RU_SYSC(lwp);                   \
 231         adcl    $0, LWP_RU_SYSC+4(lwp);                 \
 232         movb    $NORMALRETURN, LWP_EOSYS(lwp);          \
 233         movl    argp, LWP_AP(lwp)
 234 
 235 /*
 236  * Set up the thread, lwp, find the handler, and copy
 237  * in the arguments from userland to the kernel stack.
 238  *
 239  * Preconditions:
 240  * -    %eax contains the syscall number
 241  * Postconditions:
 242  * -    %eax contains a pointer to the sysent structure
 243  * -    %ecx is zeroed
 244  * -    %esi, %edi are smashed
 245  * -    %esp is SYS_DROPped ready for the syscall
 246  */
 247 #define SIMPLE_SYSCALL_PRESYS(t, faultlabel)            \
 248         movl    T_LWP(t), %esi;                         \
 249         movw    %ax, T_SYSNUM(t);                       \
 250         subl    $SYS_DROP, %esp;                        \
 251         shll    $SYSENT_SIZE_SHIFT, %eax;                       \
 252         SET_LWP(%esi, %esp);                            \
 253         leal    sysent(%eax), %eax;                     \
 254         movzbl  SY_NARG(%eax), %ecx;                    \
 255         testl   %ecx, %ecx;                             \
 256         jz      4f;                                     \
 257         movl    %esp, %edi;                             \
 258         movl    SYS_DROP + REGOFF_UESP(%esp), %esi;     \
 259         movl    $faultlabel, T_LOFAULT(t);              \
 260         addl    $4, %esi;                               \
 261         rep;                                            \
 262           smovl;                                        \
 263         movl    %ecx, T_LOFAULT(t);                     \
 264 4:
 265 
 266 /*
 267  * Check to see if a simple return is possible i.e.
 268  *
 269  *      if ((t->t_post_sys_ast | syscalltrace) != 0)
 270  *              do full version;
 271  *
 272  * Preconditions:
 273  * -    t is curthread
 274  * Postconditions:
 275  * -    condition code NE is set if post-sys is too complex
 276  * -    rtmp is zeroed if it isn't (we rely on this!)
 277  */
 278 #define CHECK_POSTSYS_NE(t, rtmp)                       \
 279         xorl    rtmp, rtmp;                             \
 280         ORL_SYSCALLTRACE(rtmp);                         \
 281         orl     T_POST_SYS_AST(t), rtmp;                \
 282         cmpl    $0, rtmp
 283 
 284 /*
 285  * Fix up the lwp, thread, and eflags for a successful return
 286  *
 287  * Preconditions:
 288  * -    zwreg contains zero
 289  * Postconditions:
 290  * -    %esp has been unSYS_DROPped
 291  * -    %esi is smashed (points to lwp)
 292  */
 293 #define SIMPLE_SYSCALL_POSTSYS(t, zwreg)                \
 294         movl    T_LWP(t), %esi;                         \
 295         addl    $SYS_DROP, %esp;                        \
 296         movw    zwreg, T_SYSNUM(t);                     \
 297         movb    $LWP_USER, LWP_STATE(%esi);             \
 298         andb    $_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
 299 
 300 /*
 301  * System call handler.  This is the destination of both the call
 302  * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
 303  * there are two significant differences between an interrupt gate and a call
 304  * gate:
 305  *
 306  * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
 307  * call gate runs the handler with whatever EFLAGS settings were in effect at
 308  * the time of the call.
 309  *
 310  * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
 311  * of the interrupt onto the stack, whereas a call gate does not.
 312  *
 313  * Because we use the following code sequence to handle system calls made from
 314  * _both_ a call gate _and_ an interrupt gate, these two differences must be
 315  * respected. In regards to number 1) above, the handler must ensure that a sane
 316  * EFLAGS snapshot is stored on the stack so that when the kernel returns back
 317  * to the user via iret (which returns to user with the EFLAGS value saved on
 318  * the stack), interrupts are re-enabled.
 319  *
 320  * In regards to number 2) above, the handler must always put a current snapshot
 321  * of EFLAGS onto the stack in the appropriate place. If we came in via an
 322  * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
 323  * the interrupt gate. This is OK, as the only bit that was changed by the
 324  * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
 325  * now off. If we were to do nothing, the stack would contain an EFLAGS with
 326  * IE off, resulting in us eventually returning back to the user with interrupts
 327  * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
 328  * the stack.
 329  *
 330  * Another subtlety which deserves mention is the difference between the two
 331  * descriptors. The call gate descriptor is set to instruct the hardware to copy
 332  * one parameter from the user stack to the kernel stack, whereas the interrupt
 333  * gate descriptor doesn't use the parameter passing mechanism at all. The
 334  * kernel doesn't actually use the parameter that is copied by the hardware; the
 335  * only reason it does this is so that there is a space on the stack large
 336  * enough to hold an EFLAGS register value, which happens to be in the correct
 337  * place for use by iret when we go back to userland. How convenient.
 338  *
 339  * Stack frame description in syscall() and callees.
 340  *
 341  * |------------|
 342  * | regs       | +(8*4)+4      registers
 343  * |------------|
 344  * | 8 args     | <- %esp    MAXSYSARGS (currently 8) arguments
 345  * |------------|
 346  * 
 347  */
 348 #define SYS_DROP        _CONST(_MUL(MAXSYSARGS, 4))
 349 
 350 #if defined(__lint)
 351 
 352 /*ARGSUSED*/
 353 void
 354 sys_call()
 355 {}
 356 
 357 void
 358 _allsyscalls()
 359 {}
 360 
 361 size_t _allsyscalls_size;
 362 
 363 #else   /* __lint */
 364 
 365         ENTRY_NP2(brand_sys_call, _allsyscalls)
 366         BRAND_CALLBACK(BRAND_CB_SYSCALL)
 367 
 368         ALTENTRY(sys_call)
 369         / on entry      eax = system call number
 370 
 371         / set up the stack to look as in reg.h
 372         subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
 373 
 374         SYSCALL_PUSH
 375 
 376 #ifdef TRAPTRACE
 377         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
 378         TRACE_REGS(%edi, %esp, %ebx, %ecx)      / Uses label "9"
 379         pushl   %eax
 380         TRACE_STAMP(%edi)               / Clobbers %eax, %edx, uses "9"
 381         popl    %eax
 382         movl    %eax, TTR_SYSNUM(%edi)
 383 #endif
 384 
 385 _watch_do_syscall:
 386         movl    %esp, %ebp
 387 
 388         / Interrupts may be enabled here, so we must make sure this thread
 389         / doesn't migrate off the CPU while it updates the CPU stats.
 390         /
 391         / XXX This is only true if we got here via call gate thru the LDT for
 392         / old style syscalls. Perhaps this preempt++-- will go away soon?
 393         movl    %gs:CPU_THREAD, %ebx
 394         addb    $1, T_PREEMPT(%ebx)
 395         CPU_STATS_SYS_SYSCALL_INC
 396         subb    $1, T_PREEMPT(%ebx)
 397 
 398         ENABLE_INTR_FLAGS
 399 
 400         pushl   %eax                            / preserve across mstate call
 401         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 402         popl    %eax
 403 
 404         movl    %gs:CPU_THREAD, %ebx
 405         
 406         ASSERT_LWPTOREGS(%ebx, %esp)
 407 
 408         CHECK_PRESYS_NE(%ebx, %eax)
 409         jne     _full_syscall_presys
 410         SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
 411 
 412 _syslcall_call:
 413         call    *SY_CALLC(%eax)
 414 
 415 _syslcall_done:
 416         CHECK_POSTSYS_NE(%ebx, %ecx)
 417         jne     _full_syscall_postsys
 418         SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
 419         movl    %eax, REGOFF_EAX(%esp)
 420         movl    %edx, REGOFF_EDX(%esp)
 421 
 422         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 423 
 424         /
 425         / get back via iret
 426         /
 427         CLI(%edx)
 428         jmp     sys_rtt_syscall
 429 
 430 _full_syscall_presys:
 431         movl    T_LWP(%ebx), %esi
 432         subl    $SYS_DROP, %esp
 433         movb    $LWP_SYS, LWP_STATE(%esi)
 434         pushl   %esp
 435         pushl   %ebx
 436         call    syscall_entry
 437         addl    $8, %esp
 438         jmp     _syslcall_call
 439 
 440 _full_syscall_postsys:
 441         addl    $SYS_DROP, %esp
 442         pushl   %edx
 443         pushl   %eax
 444         pushl   %ebx
 445         call    syscall_exit
 446         addl    $12, %esp
 447         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 448         jmp     _sys_rtt
 449 
 450 _syscall_fault:
 451         push    $0xe                    / EFAULT
 452         call    set_errno
 453         addl    $4, %esp
 454         xorl    %eax, %eax              / fake syscall_err()
 455         xorl    %edx, %edx
 456         jmp     _syslcall_done
 457         SET_SIZE(sys_call)
 458         SET_SIZE(brand_sys_call)
 459 
 460 #endif  /* __lint */
 461 
 462 /*
 463  * System call handler via the sysenter instruction
 464  *
 465  * Here's how syscall entry usually works (see sys_call for details).
 466  *
 467  * There, the caller (lcall or int) in userland has arranged that:
 468  *
 469  * -    %eax contains the syscall number
 470  * -    the user stack contains the args to the syscall
 471  *
 472  * Normally the lcall instruction into the call gate causes the processor
 473  * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
 474  * The sys_call handler then leaves space for r_trapno and r_err, and
 475  * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
 476  * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
 477  * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
 478  * extracts %efl and puts it into r_efl (which happens to live at the offset
 479  * that <top-of-stack> was copied into). Note that the value in r_efl has
 480  * the IF (interrupt enable) flag turned on. (The int instruction into the
 481  * interrupt gate does essentially the same thing, only instead of
 482  * <top-of-stack> we get eflags - see comment above.)
 483  *
 484  * In the sysenter case, things are a lot more primitive.
 485  *
 486  * The caller in userland has arranged that:
 487  *
 488  * -    %eax contains the syscall number
 489  * -    %ecx contains the user %esp
 490  * -    %edx contains the return %eip
 491  * -    the user stack contains the args to the syscall
 492  *
 493  * e.g.
 494  *      <args on the stack>
 495  *      mov     $SYS_callnum, %eax
 496  *      mov     $1f, %edx       / return %eip
 497  *      mov     %esp, %ecx      / return %esp
 498  *      sysenter
 499  * 1:   
 500  *
 501  * Hardware and (privileged) initialization code have arranged that by
 502  * the time the sysenter instructions completes:
 503  *
 504  * - %eip is pointing to sys_sysenter (below).
 505  * - %cs and %ss are set to kernel text and stack (data) selectors.
 506  * - %esp is pointing at the lwp's stack
 507  * - Interrupts have been disabled.
 508  *
 509  * The task for the sysenter handler is:
 510  *
 511  * -    recreate the same regs structure on the stack and the same
 512  *      kernel state as if we'd come in on an lcall
 513  * -    do the normal work of a syscall
 514  * -    execute the system call epilogue, use sysexit to return to userland.
 515  *
 516  * Note that we are unable to return both "rvals" to userland with this
 517  * call, as %edx is used by the sysexit instruction.
 518  *
 519  * One final complication in this routine is its interaction with
 520  * single-stepping in a debugger.  For most of the system call mechanisms,
 521  * the CPU automatically clears the single-step flag before we enter the
 522  * kernel.  The sysenter mechanism does not clear the flag, so a user
 523  * single-stepping through a libc routine may suddenly find him/herself
 524  * single-stepping through the kernel.  To detect this, kmdb compares the
 525  * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
 526  * If it finds that we have single-stepped to a sysenter entry point, it
 527  * explicitly clears the flag and executes the sys_sysenter routine.
 528  *
 529  * One final complication in this final complication is the fact that we
 530  * have two different entry points for sysenter: brand_sys_sysenter and
 531  * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
 532  * through the kernel with kmdb, we will eventually hit the instruction at
 533  * sys_sysenter.  kmdb cannot distinguish between that valid single-step
 534  * and the undesirable one mentioned above.  To avoid this situation, we
 535  * simply add a jump over the instruction at sys_sysenter to make it
 536  * impossible to single-step to it.
 537  */
 538 #if defined(__lint)
 539 
 540 void
 541 sys_sysenter()
 542 {}
 543 
 544 #else   /* __lint */
 545 
 546         ENTRY_NP(brand_sys_sysenter)
 547         pushl   %edx
 548         BRAND_CALLBACK(BRAND_CB_SYSENTER)
 549         popl    %edx
 550         /*
 551          * Jump over sys_sysenter to allow single-stepping as described
 552          * above.
 553          */
 554         ja      1f
 555 
 556         ALTENTRY(sys_sysenter)
 557         nop
 558 1:
 559         /
 560         / do what the call gate would've done to the stack ..
 561         /
 562         pushl   $UDS_SEL        / (really %ss, but it's the same ..)
 563         pushl   %ecx            / userland makes this a copy of %esp
 564         pushfl
 565         orl     $PS_IE, (%esp)  / turn interrupts on when we return to user
 566         pushl   $UCS_SEL
 567         pushl   %edx            / userland makes this a copy of %eip
 568         /
 569         / done.  finish building the stack frame
 570         /
 571         subl    $8, %esp        / leave space for ERR and TRAPNO
 572 
 573         SYSENTER_PUSH
 574 
 575 #ifdef TRAPTRACE
 576         TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER) / uses labels 8 and 9
 577         TRACE_REGS(%edi, %esp, %ebx, %ecx)              / uses label 9
 578         pushl   %eax
 579         TRACE_STAMP(%edi)               / clobbers %eax, %edx, uses label 9
 580         popl    %eax
 581         movl    %eax, TTR_SYSNUM(%edi)
 582 #endif
 583         movl    %esp, %ebp
 584 
 585         CPU_STATS_SYS_SYSCALL_INC
 586 
 587         ENABLE_INTR_FLAGS
 588 
 589         pushl   %eax                            / preserve across mstate call
 590         MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
 591         popl    %eax
 592 
 593         movl    %gs:CPU_THREAD, %ebx
 594 
 595         ASSERT_LWPTOREGS(%ebx, %esp)
 596 
 597         CHECK_PRESYS_NE(%ebx, %eax)
 598         jne     _full_syscall_presys
 599         SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
 600 
 601 _sysenter_call:
 602         call    *SY_CALLC(%eax)
 603 
 604 _sysenter_done:
 605         CHECK_POSTSYS_NE(%ebx, %ecx)
 606         jne     _full_syscall_postsys
 607         SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
 608         /
 609         / sysexit uses %edx to restore %eip, so we can't use it
 610         / to return a value, sigh.
 611         / 
 612         movl    %eax, REGOFF_EAX(%esp)
 613         / movl  %edx, REGOFF_EDX(%esp)
 614 
 615         / Interrupts will be turned on by the 'sti' executed just before
 616         / sysexit. The following ensures that restoring the user's EFLAGS
 617         / doesn't enable interrupts too soon.
 618         andl    $_BITNOT(PS_IE), REGOFF_EFL(%esp)
 619 
 620         MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
 621 
 622         cli
 623 
 624         SYSCALL_POP
 625 
 626         popl    %edx                    / sysexit: %edx -> %eip
 627         addl    $4, %esp                / get CS off the stack
 628         popfl                           / EFL
 629         popl    %ecx                    / sysexit: %ecx -> %esp
 630         sti
 631         sysexit
 632         SET_SIZE(sys_sysenter)
 633         SET_SIZE(brand_sys_sysenter)
 634 
 635 /*
 636  * Declare a uintptr_t which covers the entire pc range of syscall
 637  * handlers for the stack walkers that need this.
 638  */
 639         .align  CPTRSIZE
 640         .globl  _allsyscalls_size
 641         .type   _allsyscalls_size, @object
 642 _allsyscalls_size:
 643         .NWORD  . - _allsyscalls
 644         SET_SIZE(_allsyscalls_size)
 645 
 646 #endif  /* __lint */
 647 
 648 /*
 649  * These are the thread context handlers for lwps using sysenter/sysexit.
 650  */
 651 
 652 #if defined(__lint)
 653 
 654 /*ARGSUSED*/
 655 void
 656 sep_save(void *ksp)
 657 {}
 658 
 659 /*ARGSUSED*/                    
 660 void
 661 sep_restore(void *ksp)
 662 {}
 663 
 664 #else   /* __lint */
 665 
 666         /*
 667          * setting this value to zero as we switch away causes the
 668          * stack-pointer-on-sysenter to be NULL, ensuring that we
 669          * don't silently corrupt another (preempted) thread stack
 670          * when running an lwp that (somehow) didn't get sep_restore'd
 671          */
 672         ENTRY_NP(sep_save)
 673         xorl    %edx, %edx
 674         xorl    %eax, %eax
 675         movl    $MSR_INTC_SEP_ESP, %ecx
 676         wrmsr
 677         ret
 678         SET_SIZE(sep_save)
 679 
 680         /*
 681          * Update the kernel stack pointer as we resume onto this cpu.
 682          */
 683         ENTRY_NP(sep_restore)
 684         movl    4(%esp), %eax                   /* per-lwp kernel sp */
 685         xorl    %edx, %edx
 686         movl    $MSR_INTC_SEP_ESP, %ecx
 687         wrmsr
 688         ret
 689         SET_SIZE(sep_restore)
 690 
 691 #endif  /* __lint */
 692 
 693 /*
 694  * Call syscall().  Called from trap() on watchpoint at lcall 0,7
 695  */
 696 
 697 #if defined(__lint)
 698 
 699 void
 700 watch_syscall(void)
 701 {}
 702 
 703 #else   /* __lint */
 704 
 705         ENTRY_NP(watch_syscall)
 706         CLI(%eax)
 707         movl    %gs:CPU_THREAD, %ebx
 708         movl    T_STACK(%ebx), %esp             / switch to the thread stack
 709         movl    REGOFF_EAX(%esp), %eax          / recover original syscall#
 710         jmp     _watch_do_syscall
 711         SET_SIZE(watch_syscall)
 712 
 713 #endif  /* __lint */